Author: remusr
Date: Sun Apr  6 11:58:31 2014
New Revision: 1585290

URL: http://svn.apache.org/r1585290
Log:
HIVE-5998 Add vectorized reader for Parquet files reviewed by Brock

Added:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q
    hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out
Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
 Sun Apr  6 11:58:31 2014
@@ -26,6 +26,7 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.common.type.Decimal128;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -35,6 +36,7 @@ import org.apache.hadoop.hive.serde2.io.
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.BooleanWritable;
@@ -42,6 +44,7 @@ import org.apache.hadoop.io.FloatWritabl
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 
 /**
  * This class is used as a static factory for VectorColumnAssign.
@@ -215,10 +218,31 @@ public class VectorColumnAssignFactory {
   public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch 
outputBatch,
       int outColIndex, ObjectInspector objInspector) throws HiveException {
     PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
+    return buildObjectAssign(outputBatch, outColIndex, 
poi.getPrimitiveCategory());
+  }
+
+  public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch 
outputBatch,
+      int outColIndex, PrimitiveCategory category) throws HiveException {
     VectorColumnAssign outVCA = null;
     ColumnVector destCol = outputBatch.cols[outColIndex];
-    if (destCol instanceof LongColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+    if (destCol == null) {
+      switch(category) {
+      case VOID:
+        outVCA = new VectorLongColumnAssign() {
+          // This is a dummy assigner
+          @Override
+          public void assignObjectValue(Object val, int destIndex) throws 
HiveException {
+            // This is no-op, there is no column to assign to and val is 
expected to be null
+            assert (val == null);
+          }
+        };
+        break;
+      default:
+        throw new HiveException("Incompatible (null) vector column and 
primitive category " +
+            category);
+      }
+    } else if (destCol instanceof LongColumnVector) {
+      switch(category) {
       case BOOLEAN:
         outVCA = new VectorLongColumnAssign() {
           @Override
@@ -320,11 +344,11 @@ public class VectorColumnAssignFactory {
         break;
       default:
         throw new HiveException("Incompatible Long vector column and primitive 
category " +
-            poi.getPrimitiveCategory());
+            category);
       }
     }
     else if (destCol instanceof DoubleColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+      switch(category) {
       case DOUBLE:
         outVCA = new VectorDoubleColumnAssign() {
           @Override
@@ -355,11 +379,26 @@ public class VectorColumnAssignFactory {
         break;
       default:
         throw new HiveException("Incompatible Double vector column and 
primitive category " +
-            poi.getPrimitiveCategory());
+            category);
       }
     }
     else if (destCol instanceof BytesColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+      switch(category) {
+      case BINARY:
+        outVCA = new VectorBytesColumnAssign() {
+          @Override
+          public void assignObjectValue(Object val, int destIndex) throws 
HiveException {
+            if (val == null) {
+              assignNull(destIndex);
+            }
+            else {
+              BinaryWritable bw = (BinaryWritable) val;
+              byte[] bytes = bw.getBytes();
+              assignBytes(bytes, 0, bytes.length, destIndex);
+            }
+          }
+        }.init(outputBatch, (BytesColumnVector) destCol);
+        break;
       case STRING:
         outVCA = new VectorBytesColumnAssign() {
           @Override
@@ -377,11 +416,11 @@ public class VectorColumnAssignFactory {
         break;
       default:
         throw new HiveException("Incompatible Bytes vector column and 
primitive category " +
-            poi.getPrimitiveCategory());
+            category);
       }
     }
     else if (destCol instanceof DecimalColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+      switch(category) {
       case DECIMAL:
         outVCA = new VectorDecimalColumnAssign() {
           @Override
@@ -398,7 +437,7 @@ public class VectorColumnAssignFactory {
           break;
         default:
           throw new HiveException("Incompatible Decimal vector column and 
primitive category " +
-              poi.getPrimitiveCategory());
+              category);
         }
     }
     else {
@@ -431,4 +470,39 @@ public class VectorColumnAssignFactory {
     }
     return vcas;
   }
-}
\ No newline at end of file
+
+  public static VectorColumnAssign[] buildAssigners(VectorizedRowBatch 
outputBatch,
+      Writable[] writables) throws HiveException {
+    VectorColumnAssign[] vcas = new VectorColumnAssign[outputBatch.numCols];
+    for (int i = 0; i < outputBatch.numCols; ++i) {
+      if (writables[i] == null) {
+        assert(outputBatch.cols[i] == null);
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.VOID);
+      } else if (writables[i] instanceof ByteWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BYTE);
+      } else if (writables[i] instanceof ShortWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.SHORT);
+      } else if (writables[i] instanceof IntWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INT);
+      } else if (writables[i] instanceof LongWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.LONG);
+      } else if (writables[i] instanceof FloatWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.FLOAT);
+      } else if (writables[i] instanceof DoubleWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DOUBLE);
+      } else if (writables[i] instanceof Text) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.STRING);
+      } else if (writables[i] instanceof BinaryWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BINARY);
+      } else if (writables[i] instanceof TimestampWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, 
PrimitiveCategory.TIMESTAMP);
+      } else if (writables[i] instanceof BooleanWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BOOLEAN);
+      } else {
+        throw new HiveException("Unimplemented vector assigner for writable 
type " +
+           writables[i].getClass());
+      }
+    }
+    return vcas;
+  }
+}

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
 Sun Apr  6 11:58:31 2014
@@ -97,6 +97,13 @@ public class VectorizedRowBatch implemen
   }
 
   /**
+   * Returns the maximum size of the batch (number of rows it can hold)
+   */
+  public int getMaxSize() {
+      return selected.length;
+  }
+
+  /**
    * Return count of qualifying rows.
    *
    * @return number of rows that have not been filtered out

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
 Sun Apr  6 11:58:31 2014
@@ -582,4 +582,17 @@ public class VectorizedRowBatchCtx {
     }
   }
 
+  public VectorColumnAssign[] buildObjectAssigners(VectorizedRowBatch 
outputBatch)
+        throws HiveException {
+    List<? extends StructField> fieldRefs = rowOI.getAllStructFieldRefs();
+    assert outputBatch.numCols == fieldRefs.size();
+    VectorColumnAssign[] assigners = new VectorColumnAssign[fieldRefs.size()];
+    for(int i = 0; i < assigners.length; ++i) {
+        StructField fieldRef = fieldRefs.get(i);
+        ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
+        assigners[i] = VectorColumnAssignFactory.buildObjectAssign(
+                outputBatch, i, fieldOI);
+    }
+    return assigners;
+  }
 }

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
 Sun Apr  6 11:58:31 2014
@@ -14,7 +14,10 @@
 package org.apache.hadoop.hive.ql.io.parquet;
 
 import java.io.IOException;
-
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
 import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
 import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
 import org.apache.hadoop.io.ArrayWritable;
@@ -29,18 +32,25 @@ import parquet.hadoop.ParquetInputFormat
  * A Parquet InputFormat for Hive (with the deprecated package mapred)
  *
  */
-public class MapredParquetInputFormat extends FileInputFormat<Void, 
ArrayWritable> {
+public class MapredParquetInputFormat extends FileInputFormat<Void, 
ArrayWritable>
+    implements VectorizedInputFormatInterface {
+
+  private static final Log LOG = 
LogFactory.getLog(MapredParquetInputFormat.class);
 
   private final ParquetInputFormat<ArrayWritable> realInput;
 
+  private final transient VectorizedParquetInputFormat vectorizedSelf;
+
   public MapredParquetInputFormat() {
     this(new ParquetInputFormat<ArrayWritable>(DataWritableReadSupport.class));
   }
 
   protected MapredParquetInputFormat(final ParquetInputFormat<ArrayWritable> 
inputFormat) {
     this.realInput = inputFormat;
+    vectorizedSelf = new VectorizedParquetInputFormat(inputFormat);
   }
 
+  @SuppressWarnings({ "unchecked", "rawtypes" })
   @Override
   public org.apache.hadoop.mapred.RecordReader<Void, ArrayWritable> 
getRecordReader(
       final org.apache.hadoop.mapred.InputSplit split,
@@ -48,7 +58,19 @@ public class MapredParquetInputFormat ex
       final org.apache.hadoop.mapred.Reporter reporter
       ) throws IOException {
     try {
-      return (RecordReader<Void, ArrayWritable>) new 
ParquetRecordReaderWrapper(realInput, split, job, reporter);
+      if (Utilities.isVectorMode(job)) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Using vectorized record reader");
+        }
+        return (RecordReader) vectorizedSelf.getRecordReader(split, job, 
reporter);
+      }
+      else {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Using row-mode record reader");
+        }
+        return (RecordReader<Void, ArrayWritable>)
+          new ParquetRecordReaderWrapper(realInput, split, job, reporter);
+      }
     } catch (final InterruptedException e) {
       throw new RuntimeException("Cannot create a RecordReaderWrapper", e);
     }

Added: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java?rev=1585290&view=auto
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
 (added)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
 Sun Apr  6 11:58:31 2014
@@ -0,0 +1,160 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import java.io.IOException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssign;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssignFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import parquet.hadoop.ParquetInputFormat;
+
+/**
+ * Vectorized input format for Parquet files
+ */
+public class VectorizedParquetInputFormat extends 
FileInputFormat<NullWritable, VectorizedRowBatch>
+  implements VectorizedInputFormatInterface {
+
+  private static final Log LOG = 
LogFactory.getLog(VectorizedParquetInputFormat.class);
+
+  /**
+   * Vectorized record reader for vectorized Parquet input format
+   */
+  private static class VectorizedParquetRecordReader implements
+      RecordReader<NullWritable, VectorizedRowBatch> {
+    private static final Log LOG = 
LogFactory.getLog(VectorizedParquetRecordReader.class);
+
+    private final ParquetRecordReaderWrapper internalReader;
+      private VectorizedRowBatchCtx rbCtx;
+      private ArrayWritable internalValues;
+      private Void internalKey;
+      private VectorColumnAssign[] assigners;
+
+    public VectorizedParquetRecordReader(
+        ParquetInputFormat<ArrayWritable> realInput,
+        FileSplit split,
+        JobConf conf, Reporter reporter) throws IOException, 
InterruptedException {
+      internalReader = new ParquetRecordReaderWrapper(
+        realInput,
+        split,
+        conf,
+        reporter);
+      try {
+        rbCtx = new VectorizedRowBatchCtx();
+        rbCtx.init(conf, split);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+      @Override
+      public NullWritable createKey() {
+        internalKey = internalReader.createKey();
+        return NullWritable.get();
+      }
+
+      @Override
+      public VectorizedRowBatch createValue() {
+        VectorizedRowBatch outputBatch = null;
+        try {
+          outputBatch = rbCtx.createVectorizedRowBatch();
+          internalValues = internalReader.createValue();
+        } catch (HiveException e) {
+          throw new RuntimeException("Error creating a batch", e);
+        }
+        return outputBatch;
+      }
+
+      @Override
+      public long getPos() throws IOException {
+        return internalReader.getPos();
+      }
+
+      @Override
+      public void close() throws IOException {
+        internalReader.close();
+      }
+
+      @Override
+      public float getProgress() throws IOException {
+        return internalReader.getProgress();
+      }
+
+    @Override
+    public boolean next(NullWritable key, VectorizedRowBatch outputBatch)
+        throws IOException {
+      assert(outputBatch.numCols == assigners.length);
+      outputBatch.reset();
+      int maxSize = outputBatch.getMaxSize();
+      try {
+        while (outputBatch.size < maxSize) {
+          if (false == internalReader.next(internalKey, internalValues)) {
+            outputBatch.endOfFile = true;
+            break;
+          }
+          Writable[] writables = internalValues.get();
+
+          if (null == assigners) {
+            // Normally we'd build the assigners from the rbCtx.rowOI, but 
with Parquet
+            // we have a discrepancy between the metadata type (Eg. tinyint -> 
BYTE) and
+            // the writable value (IntWritable). see Parquet's ETypeConverter 
class.
+            assigners = VectorColumnAssignFactory.buildAssigners(outputBatch, 
writables);
+          }
+
+          for(int i = 0; i < outputBatch.numCols; ++i) {
+            assigners[i].assignObjectValue(writables[i], outputBatch.size);
+          }
+          ++outputBatch.size;
+         }
+      } catch (HiveException e) {
+        throw new RuntimeException(e);
+      }
+      return outputBatch.size > 0;
+    }
+  }
+
+  private final ParquetInputFormat<ArrayWritable> realInput;
+
+  public VectorizedParquetInputFormat(ParquetInputFormat<ArrayWritable> 
realInput) {
+    this.realInput = realInput;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(
+      InputSplit split, JobConf conf, Reporter reporter) throws IOException {
+    try {
+      return (RecordReader<NullWritable, VectorizedRowBatch>)
+        new VectorizedParquetRecordReader(realInput, (FileSplit) split, conf, 
reporter);
+    } catch (final InterruptedException e) {
+      throw new RuntimeException("Cannot create a 
VectorizedParquetRecordReader", e);
+    }
+  }
+
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q?rev=1585290&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q Sun Apr  
6 11:58:31 2014
@@ -0,0 +1,44 @@
+create table if not exists alltypes_parquet (
+  cint int, 
+  ctinyint tinyint, 
+  csmallint smallint, 
+  cfloat float, 
+  cdouble double, 
+  cstring1 string) stored as parquet;
+  
+insert overwrite table alltypes_parquet 
+  select cint, 
+    ctinyint, 
+    csmallint, 
+    cfloat, 
+    cdouble, 
+    cstring1 
+  from alltypesorc;
+  
+SET hive.vectorized.execution.enabled=true;
+  
+explain select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10;
+select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10;
+
+explain select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint;
+select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint;

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out?rev=1585290&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out Sun 
Apr  6 11:58:31 2014
@@ -0,0 +1,358 @@
+PREHOOK: query: create table if not exists alltypes_parquet (
+  cint int, 
+  ctinyint tinyint, 
+  csmallint smallint, 
+  cfloat float, 
+  cdouble double, 
+  cstring1 string) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table if not exists alltypes_parquet (
+  cint int, 
+  ctinyint tinyint, 
+  csmallint smallint, 
+  cfloat float, 
+  cdouble double, 
+  cstring1 string) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypes_parquet
+PREHOOK: query: insert overwrite table alltypes_parquet 
+  select cint, 
+    ctinyint, 
+    csmallint, 
+    cfloat, 
+    cdouble, 
+    cstring1 
+  from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypes_parquet
+POSTHOOK: query: insert overwrite table alltypes_parquet 
+  select cint, 
+    ctinyint, 
+    csmallint, 
+    cfloat, 
+    cdouble, 
+    cstring1 
+  from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypes_parquet
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, 
comment:null), ]
+PREHOOK: query: explain select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, 
comment:null), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypes_parquet
+            Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (cint = 528534767) (type: boolean)
+              Statistics: Num rows: 6144 Data size: 36864 Basic stats: 
COMPLETE Column stats: NONE
+              Select Operator
+                expressions: cint (type: int), ctinyint (type: tinyint), 
csmallint (type: smallint), cfloat (type: float), cdouble (type: double), 
cstring1 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 6144 Data size: 36864 Basic stats: 
COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 10 Data size: 60 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+
+PREHOOK: query: select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, 
comment:null), ]
+528534767      -50     -13326  -50.0   -13326.0        cvLH6Eat2yFsyy7p
+528534767      NULL    -4213   NULL    -4213.0 cvLH6Eat2yFsyy7p
+528534767      -28     -15813  -28.0   -15813.0        cvLH6Eat2yFsyy7p
+528534767      31      -9566   31.0    -9566.0 cvLH6Eat2yFsyy7p
+528534767      -34     15007   -34.0   15007.0 cvLH6Eat2yFsyy7p
+528534767      29      7021    29.0    7021.0  cvLH6Eat2yFsyy7p
+528534767      31      4963    31.0    4963.0  cvLH6Eat2yFsyy7p
+528534767      27      -7824   27.0    -7824.0 cvLH6Eat2yFsyy7p
+528534767      -11     -15431  -11.0   -15431.0        cvLH6Eat2yFsyy7p
+528534767      61      -15549  61.0    -15549.0        cvLH6Eat2yFsyy7p
+PREHOOK: query: explain select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, 
comment:null), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypes_parquet
+            Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cint (type: int), 
csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), 
cdouble (type: double)
+              outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, 
cdouble
+              Statistics: Num rows: 12288 Data size: 73728 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: max(cint), min(csmallint), count(cstring1), 
avg(cfloat), stddev_pop(cdouble)
+                keys: ctinyint (type: tinyint)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 12288 Data size: 73728 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: tinyint)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: tinyint)
+                  Statistics: Num rows: 12288 Data size: 73728 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: int), _col2 (type: 
smallint), _col3 (type: bigint), _col4 (type: 
struct<count:bigint,sum:double,input:float>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0), min(VALUE._col1), 
count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
+          keys: KEY._col0 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: tinyint), _col1 (type: int), _col2 
(type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: 
double)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+            Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 6144 Data size: 36864 Basic stats: 
COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+PREHOOK: query: select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, 
comment:null), ]
+NULL   1073418988      -16379  3115    NULL    305051.4870777435
+-64    626923679       -15920  21      -64.0   9254.456539277186
+-63    626923679       -12516  16      -63.0   9263.605837223322
+-62    626923679       -15992  24      -62.0   9004.593091474135
+-61    626923679       -15142  22      -61.0   9357.236187870849
+-60    626923679       -15792  24      -60.0   9892.656196775464
+-59    626923679       -15789  28      -59.0   9829.790704244733
+-58    626923679       -15169  20      -58.0   9549.096672008198
+-57    626923679       -14893  32      -57.0   8572.083461570477
+-56    626923679       -11999  33      -56.0   9490.842152672341
+-55    626923679       -13381  26      -55.0   9157.562103946742
+-54    626923679       -14815  23      -54.0   9614.154026896626
+-53    626923679       -15445  19      -53.0   9387.739325499799
+-52    626923679       -16369  30      -52.0   8625.06871423408
+-51    1073680599      -15734  1028    -51.0   9531.569305177045
+-50    626923679       -14320  27      -50.0   8548.827748002343
+-49    626923679       -14831  23      -49.0   9894.429191738676
+-48    626923679       -15462  26      -48.0   9913.883371354861
+-47    626923679       -16096  19      -47.0   9011.009178780589
+-46    626923679       -12427  21      -46.0   9182.943188188632
+-45    626923679       -15027  21      -45.0   8567.489593562543
+-44    626923679       -15667  21      -44.0   10334.01810499552
+-43    626923679       -15607  27      -43.0   8715.255026265124
+-42    626923679       -16025  14      -42.0   9692.646755759979
+-41    626923679       -12606  21      -41.0   9034.40949481481
+-40    626923679       -14678  23      -40.0   9883.334986561835
+-39    626923679       -15612  19      -39.0   9765.551806305297
+-38    626923679       -14914  28      -38.0   8767.375358291503
+-37    626923679       -14780  17      -37.0   10368.905538788269
+-36    626923679       -16208  23      -36.0   8773.547684436919
+-35    626923679       -16059  23      -35.0   10136.580492864763
+-34    626923679       -15450  29      -34.0   8708.243526705026
+-33    626923679       -12779  21      -33.0   8854.331159704514
+-32    626923679       -15866  25      -32.0   9535.546396775915
+-31    626923679       -15915  22      -31.0   9187.596784112568
+-30    626923679       -14863  23      -30.0   9193.941914019653
+-29    626923679       -14747  26      -29.0   9052.945656011721
+-28    626923679       -15813  20      -28.0   9616.869413270924
+-27    626923679       -14984  20      -27.0   8465.29660255097
+-26    626923679       -15686  15      -26.0   10874.523900405318
+-25    626923679       -15862  24      -25.0   9778.256724727018
+-24    626923679       -16311  26      -24.0   9386.736402961187
+-23    626923679       -16355  36      -23.345263230173213     
9401.831290253447
+-22    626923679       -14701  22      -22.0   8809.230165774987
+-21    626923679       -16017  27      -21.0   9480.349236669877
+-20    626923679       -16126  24      -20.0   9868.92268080106
+-19    626923679       -15935  25      -19.0   9967.22240685782
+-18    626923679       -14863  24      -18.0   9638.430684071413
+-17    626923679       -15922  19      -17.0   9944.104273894172
+-16    626923679       -15154  21      -16.0   8884.207393686478
+-15    626923679       -16036  24      -15.0   9450.506254395024
+-14    626923679       -13884  22      -14.0   10125.818731386042
+-13    626923679       -15446  30      -13.0   8907.942987576693
+-12    626923679       -16373  22      -12.0   10173.15707541171
+-11    626923679       -15659  32      -11.0   10453.738567408038
+-10    626923679       -15384  28      -10.0   8850.451610567823
+-9     626923679       -15329  31      -9.0    8999.391457373968
+-8     626923679       -14678  18      -8.0    9976.831992670684
+-7     626923679       -14584  23      -7.0    9946.605446407746
+-6     626923679       -15980  30      -6.0    10262.829252317424
+-5     626923679       -15780  24      -5.0    10599.227726422314
+-4     626923679       -16207  21      -4.0    9682.726604102581
+-3     626923679       -13632  16      -3.0    8836.215573422822
+-2     626923679       -16277  20      -2.0    10800.090249507177
+-1     626923679       -15441  36      -1.0486250072717667     
8786.246963933321
+0      626923679       -14254  24      0.0     10057.5018088718
+1      626923679       -14610  30      1.0     10016.486277900643
+2      626923679       -16227  25      2.0     10083.276127543355
+3      626923679       -16339  30      3.0     10483.526375885149
+4      626923679       -15999  29      4.0     9516.189702058042
+5      626923679       -16169  31      5.0     11114.001902469323
+6      626923679       -15948  30      6.0     9644.247255286113
+7      626923679       -15839  25      7.0     10077.151640330823
+8      1070764888      -15778  1034    8.0     9562.355155774725
+9      626923679       -13629  25      9.0     10157.217948808622
+10     626923679       -15887  26      10.0    9104.820520135108
+11     1072654057      -14696  1035    11.0    9531.018991371746
+12     626923679       -14642  18      12.0    9696.038286378725
+13     626923679       -14771  26      13.0    8128.265919972384
+14     626923679       -13367  28      14.0    9074.674998750581
+15     626923679       -16339  28      15.0    9770.473400901916
+16     626923679       -14001  26      16.0    10130.883606275334
+17     626923679       -16109  22      16.73235294865627       
1353416.3383574807
+18     626923679       -15779  21      18.0    10820.004053788869
+19     626923679       -16049  21      19.0    9423.560227007669
+20     626923679       -15149  21      20.0    11161.893298093504
+21     626923679       -15931  23      21.0    9683.044864861204
+22     626923679       -16280  26      22.0    9693.155720861765
+23     626923679       -15514  24      23.0    8542.419116415425
+24     626923679       -15086  24      24.0    9661.203790645088
+25     626923679       -11349  23      25.0    8888.959012093468
+26     626923679       -14516  29      26.0    9123.125508880432
+27     626923679       -14965  24      27.0    9802.871860196345
+28     626923679       -14455  20      28.0    9283.289383115296
+29     626923679       -15892  16      29.0    9874.046501817154
+30     626923679       -14111  27      30.0    10066.520234676527
+31     626923679       -15960  24      31.0    10427.970184550613
+32     626923679       -14044  24      32.0    8376.464579403413
+33     626923679       -14642  29      40.61776386607777       
1304429.5939037625
+34     626923679       -15059  28      34.0    8756.731536033676
+35     626923679       -16153  27      35.0    10351.008404963042
+36     626923679       -15912  20      36.0    9475.257975138164
+37     626923679       -12081  24      37.0    9017.860034890362
+38     626923679       -15248  29      38.0    9900.256257785535
+39     626923679       -14887  28      39.0    10513.343644635232
+40     626923679       -15861  22      40.0    9283.318678549174
+41     626923679       -13480  21      41.0    9016.291129937847
+42     626923679       -15834  28      42.0    10318.01399719996
+43     626923679       -15703  28      43.0    8757.796089055722
+44     626923679       -11185  16      44.0    9425.076634933797
+45     626923679       -15228  18      45.0    9459.968668643689
+46     626923679       -15187  22      46.0    9685.908173160062
+47     626923679       -16324  22      47.0    9822.220821743611
+48     626923679       -16372  29      48.0    10079.286173063345
+49     626923679       -15923  27      49.0    9850.111848934683
+50     626923679       -16236  21      50.0    9398.176197406601
+51     626923679       -15790  17      51.0    9220.075799194028
+52     626923679       -15450  20      52.0    9261.723648435052
+53     626923679       -16217  30      53.0    9895.247408969733
+54     626923679       -15245  16      54.0    9789.50878424882
+55     626923679       -15887  21      55.0    9826.38569192808
+56     626923679       -12631  21      56.0    8860.917133763547
+57     626923679       -15620  25      57.0    9413.99393840875
+58     626923679       -13627  20      58.0    9083.529665947459
+59     626923679       -16076  17      59.0    10117.44967077967
+60     626923679       -13606  23      60.0    8346.267436552042
+61     626923679       -15894  29      61.0    8785.714950987198
+62     626923679       -14307  17      62.0    9491.752726667326


Reply via email to