Author: brock
Date: Tue Feb 3 01:56:07 2015
New Revision: 1656618
URL: http://svn.apache.org/r1656618
Log:
HIVE-9303 - Parquet files are written with incorrect definition levels (Sergio
Pena via Brock)
Added:
hive/trunk/ql/src/test/queries/clientpositive/parquet_write_correct_definition_levels.q
hive/trunk/ql/src/test/results/clientpositive/parquet_write_correct_definition_levels.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1656618&r1=1656617&r2=1656618&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
Tue Feb 3 01:56:07 2015
@@ -310,6 +310,10 @@ public class ParquetHiveSerDe extends Ab
}
private Writable createObject(final Object obj, final ObjectInspector
inspector) throws SerDeException {
+ if (obj == null) {
+ return null;
+ }
+
switch (inspector.getCategory()) {
case STRUCT:
return createStruct(obj, (StructObjectInspector) inspector);
Added:
hive/trunk/ql/src/test/queries/clientpositive/parquet_write_correct_definition_levels.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_write_correct_definition_levels.q?rev=1656618&view=auto
==============================================================================
---
hive/trunk/ql/src/test/queries/clientpositive/parquet_write_correct_definition_levels.q
(added)
+++
hive/trunk/ql/src/test/queries/clientpositive/parquet_write_correct_definition_levels.q
Tue Feb 3 01:56:07 2015
@@ -0,0 +1,18 @@
+CREATE TABLE text_tbl (a STRUCT<b:STRUCT<c:INT>>)
+STORED AS TEXTFILE;
+
+-- This inserts one NULL row
+INSERT OVERWRITE TABLE text_tbl
+SELECT IF(false, named_struct("b", named_struct("c", 1)), NULL)
+FROM src LIMIT 1;
+
+-- We test that parquet is written with a level 0 definition
+CREATE TABLE parq_tbl
+STORED AS PARQUET
+AS SELECT * FROM text_tbl;
+
+SELECT * FROM text_tbl;
+SELECT * FROM parq_tbl;
+
+DROP TABLE text_tbl;
+DROP TABLE parq_tbl;
\ No newline at end of file
Added:
hive/trunk/ql/src/test/results/clientpositive/parquet_write_correct_definition_levels.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_write_correct_definition_levels.q.out?rev=1656618&view=auto
==============================================================================
---
hive/trunk/ql/src/test/results/clientpositive/parquet_write_correct_definition_levels.q.out
(added)
+++
hive/trunk/ql/src/test/results/clientpositive/parquet_write_correct_definition_levels.q.out
Tue Feb 3 01:56:07 2015
@@ -0,0 +1,75 @@
+PREHOOK: query: CREATE TABLE text_tbl (a STRUCT<b:STRUCT<c:INT>>)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@text_tbl
+POSTHOOK: query: CREATE TABLE text_tbl (a STRUCT<b:STRUCT<c:INT>>)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@text_tbl
+PREHOOK: query: -- This inserts one NULL row
+INSERT OVERWRITE TABLE text_tbl
+SELECT IF(false, named_struct("b", named_struct("c", 1)), NULL)
+FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@text_tbl
+POSTHOOK: query: -- This inserts one NULL row
+INSERT OVERWRITE TABLE text_tbl
+SELECT IF(false, named_struct("b", named_struct("c", 1)), NULL)
+FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@text_tbl
+POSTHOOK: Lineage: text_tbl.a EXPRESSION []
+PREHOOK: query: -- We test that parquet is written with a level 0 definition
+CREATE TABLE parq_tbl
+STORED AS PARQUET
+AS SELECT * FROM text_tbl
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@text_tbl
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parq_tbl
+POSTHOOK: query: -- We test that parquet is written with a level 0 definition
+CREATE TABLE parq_tbl
+STORED AS PARQUET
+AS SELECT * FROM text_tbl
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@text_tbl
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parq_tbl
+PREHOOK: query: SELECT * FROM text_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@text_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM text_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@text_tbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: SELECT * FROM parq_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parq_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parq_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parq_tbl
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: DROP TABLE text_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@text_tbl
+PREHOOK: Output: default@text_tbl
+POSTHOOK: query: DROP TABLE text_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@text_tbl
+POSTHOOK: Output: default@text_tbl
+PREHOOK: query: DROP TABLE parq_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@parq_tbl
+PREHOOK: Output: default@parq_tbl
+POSTHOOK: query: DROP TABLE parq_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@parq_tbl
+POSTHOOK: Output: default@parq_tbl