Author: rhbutani
Date: Mon Mar 10 18:22:14 2014
New Revision: 1576037

URL: http://svn.apache.org/r1576037
Log:
HIVE-6414: ParquetInputFormat provides data values that do not match the object 
inspectors (Justin Coffey via Xuefu)

Added:
    hive/branches/branch-0.13/data/files/parquet_types.txt
    
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/
    
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
    
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
    hive/branches/branch-0.13/ql/src/test/queries/clientpositive/parquet_types.q
    
hive/branches/branch-0.13/ql/src/test/results/clientpositive/parquet_types.q.out
Modified:
    
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
    
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java

Added: hive/branches/branch-0.13/data/files/parquet_types.txt
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/data/files/parquet_types.txt?rev=1576037&view=auto
==============================================================================
--- hive/branches/branch-0.13/data/files/parquet_types.txt (added)
+++ hive/branches/branch-0.13/data/files/parquet_types.txt Mon Mar 10 18:22:14 
2014
@@ -0,0 +1,21 @@
+100|1|1|1.0|0.0|abc
+101|2|2|1.1|0.3|def
+102|3|3|1.2|0.6|ghi
+103|1|4|1.3|0.9|jkl
+104|2|5|1.4|1.2|mno
+105|3|1|1.0|1.5|pqr
+106|1|2|1.1|1.8|stu
+107|2|3|1.2|2.1|vwx
+108|3|4|1.3|2.4|yza
+109|1|5|1.4|2.7|bcd
+110|2|1|1.0|3.0|efg
+111|3|2|1.1|3.3|hij
+112|1|3|1.2|3.6|klm
+113|2|4|1.3|3.9|nop
+114|3|5|1.4|4.2|qrs
+115|1|1|1.0|4.5|tuv
+116|2|2|1.1|4.8|wxy
+117|3|3|1.2|5.1|zab
+118|1|4|1.3|5.4|cde
+119|2|5|1.4|5.7|fgh
+120|3|1|1.0|6.0|ijk

Modified: 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java?rev=1576037&r1=1576036&r2=1576037&view=diff
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
 (original)
+++ 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetByteInspector.java
 Mon Mar 10 18:22:14 2014
@@ -35,6 +35,11 @@ public class ParquetByteInspector extend
   }
 
   @Override
+  public Object getPrimitiveJavaObject(final Object o) {
+    return o == null ? null : get(o);
+  }
+
+  @Override
   public Object create(final byte val) {
     return new ByteWritable(val);
   }
@@ -51,6 +56,7 @@ public class ParquetByteInspector extend
     if (o instanceof IntWritable) {
       return (byte) ((IntWritable) o).get();
     }
+
     return ((ByteWritable) o).get();
   }
 }

Modified: 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java?rev=1576037&r1=1576036&r2=1576037&view=diff
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java
 (original)
+++ 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetShortInspector.java
 Mon Mar 10 18:22:14 2014
@@ -35,6 +35,11 @@ public class ParquetShortInspector exten
   }
 
   @Override
+  public Object getPrimitiveJavaObject(final Object o) {
+    return o == null ? null : get(o);
+  }
+
+  @Override
   public Object create(final short val) {
     return new ShortWritable(val);
   }
@@ -51,6 +56,7 @@ public class ParquetShortInspector exten
     if (o instanceof IntWritable) {
       return (short) ((IntWritable) o).get();
     }
+
     return ((ShortWritable) o).get();
   }
 }

Added: 
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java?rev=1576037&view=auto
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
 (added)
+++ 
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetByteInspector.java
 Mon Mar 10 18:22:14 2014
@@ -0,0 +1,55 @@
+package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestParquetByteInspector {
+
+  private ParquetByteInspector inspector;
+
+  @Before
+  public void setUp() {
+    inspector = new ParquetByteInspector();
+  }
+
+  @Test
+  public void testByteWritable() {
+    ByteWritable obj = new ByteWritable((byte) 5);
+    assertEquals(obj, inspector.getPrimitiveWritableObject(obj));
+    assertEquals((byte) 5, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testIntWritable() {
+    IntWritable obj = new IntWritable(10);
+    assertEquals(new ByteWritable((byte) 10), 
inspector.getPrimitiveWritableObject(obj));
+    assertEquals((byte) 10, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testNull() {
+    assertNull(inspector.getPrimitiveWritableObject(null));
+    assertNull(inspector.getPrimitiveJavaObject(null));
+  }
+
+  @Test
+  public void testCreate() {
+    assertEquals(new ByteWritable((byte) 8), inspector.create((byte) 8));
+  }
+
+  @Test
+  public void testSet() {
+    ByteWritable obj = new ByteWritable();
+    assertEquals(new ByteWritable((byte) 12), inspector.set(obj, (byte) 12));
+  }
+
+  @Test
+  public void testGet() {
+    ByteWritable obj = new ByteWritable((byte) 15);
+    assertEquals((byte) 15, inspector.get(obj));
+  }
+}

Added: 
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java?rev=1576037&view=auto
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
 (added)
+++ 
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/TestParquetShortInspector.java
 Mon Mar 10 18:22:14 2014
@@ -0,0 +1,57 @@
+package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestParquetShortInspector {
+
+  private ParquetShortInspector inspector;
+
+  @Before
+  public void setUp() {
+    inspector = new ParquetShortInspector();
+  }
+
+  @Test
+  public void testShortWritable() {
+    ShortWritable obj = new ShortWritable((short) 5);
+    assertEquals(obj, inspector.getPrimitiveWritableObject(obj));
+    assertEquals((short) 5, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testIntWritable() {
+    IntWritable obj = new IntWritable(10);
+    assertEquals(new ShortWritable((short) 10), 
inspector.getPrimitiveWritableObject(obj));
+    assertEquals((short) 10, inspector.getPrimitiveJavaObject(obj));
+  }
+
+  @Test
+  public void testNull() {
+    assertNull(inspector.getPrimitiveWritableObject(null));
+    assertNull(inspector.getPrimitiveJavaObject(null));
+  }
+
+  @Test
+  public void testCreate() {
+    assertEquals(new ShortWritable((short) 8), inspector.create((short) 8));
+  }
+
+  @Test
+  public void testSet() {
+    ShortWritable obj = new ShortWritable();
+    assertEquals(new ShortWritable((short) 12), inspector.set(obj, (short) 
12));
+  }
+
+  @Test
+  public void testGet() {
+    ShortWritable obj = new ShortWritable((short) 15);
+    assertEquals((short) 15, inspector.get(obj));
+  }
+}

Added: 
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/parquet_types.q
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/queries/clientpositive/parquet_types.q?rev=1576037&view=auto
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/parquet_types.q 
(added)
+++ 
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/parquet_types.q 
Mon Mar 10 18:22:14 2014
@@ -0,0 +1,38 @@
+DROP TABLE parquet_types_staging;
+DROP TABLE parquet_types;
+
+CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO 
TABLE parquet_types_staging;
+
+INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging;
+
+SELECT * FROM parquet_types;
+
+SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;

Added: 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/parquet_types.q.out
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1576037&view=auto
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/parquet_types.q.out
 (added)
+++ 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/parquet_types.q.out
 Mon Mar 10 18:22:14 2014
@@ -0,0 +1,142 @@
+PREHOOK: query: DROP TABLE parquet_types_staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types_staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE parquet_types
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' 
OVERWRITE INTO TABLE parquet_types_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' 
OVERWRITE INTO TABLE parquet_types_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM 
parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM 
parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+POSTHOOK: Output: default@parquet_types
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, 
type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, 
type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, 
comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, 
type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, 
type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, 
type:tinyint, comment:null), ]
+PREHOOK: query: SELECT * FROM parquet_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, 
type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, 
type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, 
comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, 
type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, 
type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, 
type:tinyint, comment:null), ]
+100    1       1       1.0     0.0     abc
+101    2       2       1.1     0.3     def
+102    3       3       1.2     0.6     ghi
+103    1       4       1.3     0.9     jkl
+104    2       5       1.4     1.2     mno
+105    3       1       1.0     1.5     pqr
+106    1       2       1.1     1.8     stu
+107    2       3       1.2     2.1     vwx
+108    3       4       1.3     2.4     yza
+109    1       5       1.4     2.7     bcd
+110    2       1       1.0     3.0     efg
+111    3       2       1.1     3.3     hij
+112    1       3       1.2     3.6     klm
+113    2       4       1.3     3.9     nop
+114    3       5       1.4     4.2     qrs
+115    1       1       1.0     4.5     tuv
+116    2       2       1.1     4.8     wxy
+117    3       3       1.2     5.1     zab
+118    1       4       1.3     5.4     cde
+119    2       5       1.4     5.7     fgh
+120    3       1       1.0     6.0     ijk
+PREHOOK: query: SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, 
type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, 
type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, 
comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, 
type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, 
type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, 
type:tinyint, comment:null), ]
+1      118     1       7       1.1857142789023263      1.8000000000000003
+2      119     1       7       1.2142857142857142      1.8
+3      120     1       7       1.171428578240531       1.7999999999999996


Reply via email to