[ 
https://issues.apache.org/jira/browse/HIVE-14171?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16356734#comment-16356734
 ] 

KaiXu commented on HIVE-14171:
------------------------------

Thanks [~colinma] for the information. 
To [~vihangk1], several queries(e.g. q22, q64, q75, q80, q85) of TPC-DS hits 
java.lang.OutOfMemoryError: Java heap space, when set to false. It's OK with 
TXT file, the configuration is the same.

java.lang.OutOfMemoryError: Java heap space
        at 
org.apache.hadoop.hive.serde2.WriteBuffers.nextBufferToWrite(WriteBuffers.java:246)
        at 
org.apache.hadoop.hive.serde2.WriteBuffers.write(WriteBuffers.java:222)
        at 
org.apache.hadoop.hive.serde2.WriteBuffers.write(WriteBuffers.java:207)
        at 
org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.put(BytesBytesMultiHashMap.java:422)
        at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.putRow(MapJoinBytesTableContainer.java:395)
        at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe.loadOptimized(MapJoinTableContainerSerDe.java:200)
        at 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe.load(MapJoinTableContainerSerDe.java:152)
        at 
org.apache.hadoop.hive.ql.exec.spark.HashTableLoader.load(HashTableLoader.java:169)
        at 
org.apache.hadoop.hive.ql.exec.spark.HashTableLoader.load(HashTableLoader.java:148)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.loadHashTable(MapJoinOperator.java:315)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:187)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:183)
        at 
org.apache.hadoop.hive.ql.exec.mr.ObjectCache.retrieve(ObjectCache.java:60)
        at 
org.apache.hadoop.hive.ql.exec.mr.ObjectCache.retrieveAsync(ObjectCache.java:68)
        at 
org.apache.hadoop.hive.ql.exec.ObjectCacheWrapper.retrieveAsync(ObjectCacheWrapper.java:51)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.initializeOp(MapJoinOperator.java:181)
        at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:366)
        at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:556)
        at 
org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:508)
        at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376)
        at 
org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.init(SparkReduceRecordHandler.java:200)
        at 
org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunction.call(HiveReduceFunction.java:46)
        at 
org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunction.call(HiveReduceFunction.java:28)
        at 
org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$7$1.apply(JavaRDDLike.scala:185)
        at 
org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$7$1.apply(JavaRDDLike.scala:185)
        at 
org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:785)
        at 
org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:785)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
        at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:105)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)

> Parquet: Simple vectorization throws NPEs
> -----------------------------------------
>
>                 Key: HIVE-14171
>                 URL: https://issues.apache.org/jira/browse/HIVE-14171
>             Project: Hive
>          Issue Type: Bug
>          Components: File Formats, Vectorization
>    Affects Versions: 2.2.0
>            Reporter: Gopal V
>            Priority: Major
>              Labels: Parquet
>
> {code}
>  create temporary table cd_parquet stored as parquet as select * from 
> customer_demographics;
> select count(1) from cd_parquet where cd_gender = 'F';
> {code}
> {code}
> Caused by: java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.next(ParquetRecordReaderWrapper.java:206)
>       at 
> org.apache.hadoop.hive.ql.io.parquet.VectorizedParquetInputFormat$VectorizedParquetRecordReader.next(VectorizedParquetInputFormat.java:118)
>       at 
> org.apache.hadoop.hive.ql.io.parquet.VectorizedParquetInputFormat$VectorizedParquetRecordReader.next(VectorizedParquetInputFormat.java:51)
>       at 
> org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:350)
>       ... 17 more
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to