[
https://issues.apache.org/jira/browse/HIVE-16780?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16027342#comment-16027342
]
liyunzhang_intel edited comment on HIVE-16780 at 5/27/17 4:17 PM:
------------------------------------------------------------------
[~csun]: I found that if i disable "hive.optimize.index.filter", the case
pass.
if enable hive.optimize.index.filter, case fail, the exception is
{code}
17/05/27 23:39:45 DEBUG Executor task launch worker-0 PerfLogger: </PERFLOG
method=SparkInitializeOperators start=1495899585574 end=1495899585933
duration=359 from=org.apache.hadoop.hive.ql.exec.spark.SparkRecordHandler>
17/05/27 23:39:45 INFO Executor task launch worker-0 Utilities: PLAN PATH =
hdfs://bdpe41:8020/tmp/hive/root/029a2d8a-c6e5-4ea9-adea-ef8fbea3cde2/hive_2017-05-27_23-39-06_464_5915518562441677640-1/-mr-10007/617d9dd6-9f9a-4786-8131-a7b98e8abc3e/map.xml
17/05/27 23:39:45 DEBUG Executor task launch worker-0 Utilities: Found plan in
cache for name: map.xml
17/05/27 23:39:45 DEBUG Executor task launch worker-0 DFSClient: Connecting to
datanode 10.239.47.162:50010
17/05/27 23:39:45 DEBUG Executor task launch worker-0 MapOperator: Processing
alias(es) srcpart_hour for file
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_hour/000008_0
17/05/27 23:39:45 DEBUG Executor task launch worker-0 ObjectCache: Creating
root_20170527233906_ac2934e1-2e58-4116-9f0d-35dee302d689_DynamicValueRegistry
17/05/27 23:39:45 ERROR Executor task launch worker-0 SparkMapRecordHandler:
Error processing row: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
Runtime Error while processing row {"hr":"11","hour":"11"}
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row {"hr":"11","hour":"11"}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:562)
at
org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
at
org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
at
org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
at
org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
at
scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at
org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
at
org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974)
at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:85)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.IllegalStateException: Failed to retrieve dynamic value
for RS_7_srcpart__col3_min
at
org.apache.hadoop.hive.ql.plan.DynamicValue.getValue(DynamicValue.java:126)
at
org.apache.hadoop.hive.ql.plan.DynamicValue.getWritableValue(DynamicValue.java:101)
at
org.apache.hadoop.hive.ql.exec.ExprNodeDynamicValueEvaluator._evaluate(ExprNodeDynamicValueEvaluator.java:51)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan.evaluate(GenericUDFOPEqualOrGreaterThan.java:108)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween.evaluate(GenericUDFBetween.java:57)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:63)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:63)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorHead._evaluate(ExprNodeEvaluatorHead.java:44)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
at
org.apache.hadoop.hive.ql.exec.FilterOperator.process(FilterOperator.java:112)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at
org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at
org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
... 17 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.exec.mr.ObjectCache.retrieve(ObjectCache.java:62)
at
org.apache.hadoop.hive.ql.exec.mr.ObjectCache.retrieve(ObjectCache.java:51)
at
org.apache.hadoop.hive.ql.exec.ObjectCacheWrapper.retrieve(ObjectCacheWrapper.java:40)
at
org.apache.hadoop.hive.ql.plan.DynamicValue.getValue(DynamicValue.java:119)
... 41 more
Caused by: java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.exec.mr.ObjectCache.retrieve(ObjectCache.java:60)
... 44 more
17/05/27 23:39:45 ERROR Executor task launch worker-0 Executor: Exception in
task 1.0 in stage 0.0 (TID 1)
java.lang.RuntimeException: Error processing row:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row {"hr":"11","hour":"11"}
at
org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:149)
at
org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48)
at
org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27)
at
org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85)
at
scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at
org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
at
org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127)
at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974)
at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:85)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row {"hr":"11","hour":"11"}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:562)
at
org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136)
... 16 more
Caused by: java.lang.IllegalStateException: Failed to retrieve dynamic value
for RS_7_srcpart__col3_min
at
org.apache.hadoop.hive.ql.plan.DynamicValue.getValue(DynamicValue.java:126)
at
org.apache.hadoop.hive.ql.plan.DynamicValue.getWritableValue(DynamicValue.java:101)
at
org.apache.hadoop.hive.ql.exec.ExprNodeDynamicValueEvaluator._evaluate(ExprNodeDynamicValueEvaluator.java:51)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan.evaluate(GenericUDFOPEqualOrGreaterThan.java:108)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween.evaluate(GenericUDFBetween.java:57)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd.evaluate(GenericUDFOPAnd.java:63)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187)
at
org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
at
org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88)
at org.apache.hadoop.hive
{code}
Can you help to verify whether this pass or not in your environment? in my
enviroment, hive version:54dbca69c9ea630b9cccd5550bdb455b9bbc240c spark:2.0.0.
was (Author: kellyzly):
the explain of above script is
{code}
STAGE PLANS:
Stage: Stage-2
Spark
DagName: root_20170526150838_2e0b1aeb-104a-4a50-8638-79dbf7bd0d28:4
Vertices:
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date
filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
Spark HashTable Sink Operator
keys:
0 ds (type: string)
1 ds (type: string)
Position of Big Table: 0
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
partition key expr: ds
tmp Path:
hdfs://bdpe41:8020/tmp/hive/root/574c5c5b-1a07-4c4f-9056-35482b189871/hive_2017-05-26_15-08-38_396_2133309677947979513-1/-mr-10004/1/4
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
target column name: ds
target work: Map 1
Local Work:
Map Reduce Local Work
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date [srcpart_date]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
Partition
base file name: srcpart_date
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart_date
name: default.srcpart_date
Truncated Path -> Alias:
/srcpart_date [srcpart_date]
Stage: Stage-3
Spark
DagName: root_20170526150838_2e0b1aeb-104a-4a50-8638-79dbf7bd0d28:5
Vertices:
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date
filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
Spark HashTable Sink Operator
keys:
0 ds (type: string)
1 ds (type: string)
Position of Big Table: 0
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
partition key expr: ds
tmp Path:
hdfs://bdpe41:8020/tmp/hive/root/574c5c5b-1a07-4c4f-9056-35482b189871/hive_2017-05-26_15-08-38_396_2133309677947979513-1/-mr-10004/1/4
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
target column name: ds
target work: Map 1
Local Work:
Map Reduce Local Work
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date [srcpart_date]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
Partition
base file name: srcpart_date
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart_date
name: default.srcpart_date
Truncated Path -> Alias:
/srcpart_date [srcpart_date]
Stage: Stage-1
Spark
Edges:
Reducer 2 <- Map 6 (GROUP, 1)
Reducer 3 <- Map 7 (GROUP, 1)
DagName: root_20170526150838_2e0b1aeb-104a-4a50-8638-79dbf7bd0d28:3
Vertices:
Map 6
Map Operator Tree:
TableScan
alias: srcpart
Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL
Column stats: NONE
GatherStats: false
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 ds (type: string)
1 ds (type: string)
outputColumnNames: _col3
input vertices:
1 Map 4
Position of Big Table: 0
Statistics: Num rows: 2 Data size: 46 Basic stats: COMPLETE
Column stats: NONE
Select Operator
expressions: _col3 (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 46 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
aggregations: min(_col0), max(_col0),
bloom_filter(_col0, expectedEntries=1000000)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 552 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 552 Basic stats:
COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
auto parallelism: false
quick start: true
Local Work:
Map Reduce Local Work
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12 [srcpart]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 11
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782438
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 12
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782438
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
hr 11
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782439
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
hr 12
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782439
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
/srcpart/ds=2008-04-09/hr=11 [srcpart]
/srcpart/ds=2008-04-09/hr=12 [srcpart]
Map 7
Map Operator Tree:
TableScan
alias: srcpart
Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL
Column stats: NONE
GatherStats: false
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 ds (type: string)
1 ds (type: string)
outputColumnNames: _col3
input vertices:
1 Map 4
Position of Big Table: 0
Statistics: Num rows: 2 Data size: 46 Basic stats: COMPLETE
Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col3 (type: string)
1 hr (type: string)
input vertices:
1 Map 5
Position of Big Table: 0
Statistics: Num rows: 2 Data size: 50 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12 [srcpart]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 11
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782438
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 12
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782438
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
hr 11
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782439
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
hr 12
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782439
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
/srcpart/ds=2008-04-09/hr=11 [srcpart]
/srcpart/ds=2008-04-09/hr=12 [srcpart]
Reducer 2
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0), max(VALUE._col1),
bloom_filter(VALUE._col2, expectedEntries=1000000)
mode: final
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE
Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE
Column stats: NONE
tag: -1
value expressions: _col0 (type: string), _col1 (type:
string), _col2 (type: binary)
auto parallelism: false
Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
directory:
hdfs://bdpe41:8020/tmp/hive/root/574c5c5b-1a07-4c4f-9056-35482b189871/hive_2017-05-26_15-08-38_396_2133309677947979513-1/-mr-10001/.hive-staging_hive_2017-05-26_15-08-38_396_2133309677947979513-1/-ext-10002
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
Stats Publishing Key Prefix:
hdfs://bdpe41:8020/tmp/hive/root/574c5c5b-1a07-4c4f-9056-35482b189871/hive_2017-05-26_15-08-38_396_2133309677947979513-1/-mr-10001/.hive-staging_hive_2017-05-26_15-08-38_396_2133309677947979513-1/-ext-10002/
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0
columns.types bigint
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
{code}
the Map 5 is missing in the explain, that is maybe why the exception is thrown
out.
if disabling map join, the script successes. the explain is
{code}
Stage-2 is a root stage [MAPRED]
Stage-3 depends on stages: Stage-2 [MAPRED]
Stage-1 depends on stages: Stage-3 [MAPRED]
Stage-0 depends on stages: Stage-1 [FETCH]
STAGE PLANS:
Stage: Stage-2
Spark
DagName: root_20170527150735_53c5290f-9082-40d5-9e7c-866b1e9bafdd:2
Vertices:
Map 6
Map Operator Tree:
TableScan
alias: srcpart_date
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
partition key expr: ds
tmp Path:
hdfs://bdpe41:8020/tmp/hive/root/063af4a7-c3e0-421d-a12c-592af6e84059/hive_2017-05-27_15-07-35_887_2057692265398433754-1/-mr-10004/1/6
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
target column name: ds
target work: Map 1
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date [srcpart_date]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
Partition
base file name: srcpart_date
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart_date
name: default.srcpart_date
Truncated Path -> Alias:
/srcpart_date [srcpart_date]
Stage: Stage-3
Spark
DagName: root_20170527150735_53c5290f-9082-40d5-9e7c-866b1e9bafdd:3
Vertices:
Map 5
Map Operator Tree:
TableScan
alias: srcpart_hour
Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE
Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: hr is not null (type: boolean)
Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE
Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col3 (type: string)
1 hr (type: string)
Position of Big Table: 0
Select Operator
expressions: hr (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 10 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 10 Basic stats:
COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
partition key expr: hr
tmp Path:
hdfs://bdpe41:8020/tmp/hive/root/063af4a7-c3e0-421d-a12c-592af6e84059/hive_2017-05-27_15-07-35_887_2057692265398433754-1/-mr-10004/1/5
Statistics: Num rows: 2 Data size: 10 Basic stats:
COMPLETE Column stats: NONE
target column name: hr
target work: Map 1
Local Work:
Map Reduce Local Work
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_hour [srcpart_hour]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_hour
Partition
base file name: srcpart_hour
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns hr,hour
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart_hour
name default.srcpart_hour
numFiles 12
numRows 2
rawDataSize 10
serialization.ddl struct srcpart_hour { string hr, string
hour}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 12
transient_lastDdlTime 1495782477
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns hr,hour
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_hour
name default.srcpart_hour
numFiles 12
numRows 2
rawDataSize 10
serialization.ddl struct srcpart_hour { string hr, string
hour}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 12
transient_lastDdlTime 1495782477
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart_hour
name: default.srcpart_hour
Truncated Path -> Alias:
/srcpart_hour [srcpart_hour]
Stage: Stage-1
Spark
Edges:
Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 12), Map 4 (PARTITION-LEVEL
SORT, 12)
Reducer 3 <- Reducer 2 (GROUP, 1)
DagName: root_20170527150735_53c5290f-9082-40d5-9e7c-866b1e9bafdd:1
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: srcpart
Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL
Column stats: NONE
GatherStats: false
Reduce Output Operator
key expressions: ds (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: ds (type: string)
Statistics: Num rows: 1 Data size: 23248 Basic stats:
PARTIAL Column stats: NONE
tag: 0
value expressions: hr (type: string)
auto parallelism: false
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11 [srcpart]
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12 [srcpart]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 11
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=11
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782438
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 12
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-08/hr=12
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782438
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
hr 11
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=11
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782439
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
hr 12
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart/ds=2008-04-09/hr=12
name default.srcpart
numFiles 1
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
transient_lastDdlTime 1495782439
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct srcpart { string key, string
value}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
transient_lastDdlTime 1495782437
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
/srcpart/ds=2008-04-09/hr=11 [srcpart]
/srcpart/ds=2008-04-09/hr=12 [srcpart]
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE
Column stats: NONE
Reduce Output Operator
key expressions: ds (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: ds (type: string)
Statistics: Num rows: 2 Data size: 42 Basic stats:
COMPLETE Column stats: NONE
tag: 1
auto parallelism: false
Path -> Alias:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date [srcpart_date]
Path -> Partition:
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
Partition
base file name: srcpart_date
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
column.name.delimiter ,
columns ds,date
columns.comments
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
location
hdfs://bdpe41:8020/user/hive/warehouse/srcpart_date
name default.srcpart_date
numFiles 12
numRows 2
rawDataSize 42
serialization.ddl struct srcpart_date { string ds, string
date}
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 44
transient_lastDdlTime 1495782474
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart_date
name: default.srcpart_date
Truncated Path -> Alias:
/srcpart_date [srcpart_date]
Reducer 2
Local Work:
Map Reduce Local Work
Needs Tagging: true
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
0 ds (type: string)
1 ds (type: string)
outputColumnNames: _col3
Statistics: Num rows: 2 Data size: 46 Basic stats: COMPLETE
Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col3 (type: string)
1 hr (type: string)
input vertices:
1 Map 5
Position of Big Table: 0
Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE
Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
directory:
hdfs://bdpe41:8020/tmp/hive/root/063af4a7-c3e0-421d-a12c-592af6e84059/hive_2017-05-27_15-07-35_887_2057692265398433754-1/-mr-10001/.hive-staging_hive_2017-05-27_15-07-35_887_2057692265398433754-1/-ext-10002
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
Stats Publishing Key Prefix:
hdfs://bdpe41:8020/tmp/hive/root/063af4a7-c3e0-421d-a12c-592af6e84059/hive_2017-05-27_15-07-35_887_2057692265398433754-1/-mr-10001/.hive-staging_hive_2017-05-27_15-07-35_887_2057692265398433754-1/-ext-10002/
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0
columns.types bigint
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
{code}
> Case "multiple sources, single key" in spark_dynamic_pruning.q fails
> ---------------------------------------------------------------------
>
> Key: HIVE-16780
> URL: https://issues.apache.org/jira/browse/HIVE-16780
> Project: Hive
> Issue Type: Bug
> Reporter: liyunzhang_intel
> Assignee: liyunzhang_intel
>
> script.q
> {code}
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.spark.dynamic.partition.pruning=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.strict.checks.cartesian.product=false;
> set hive.spark.dynamic.partition.pruning=true;
> -- multiple sources, single key
> select count(*) from srcpart join srcpart_date on (srcpart.ds =
> srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
> {code}
> exception
> {code}
> job failed with java.io.FileNotFoundException: File
> hdfs://bdpe41:8020/tmp/hive/root/de80d82a-b910-4b87-940c-6be3ea37ba25/hive_2017-05-27_14-55-30_114_8497388836256415979-1/-mr-10004/1/5
> does not exist.
> FAILED: Execution Error, return code 3 from
> org.apache.hadoop.hive.ql.exec.spark.SparkTask. java.lang.RuntimeException:
> org.apache.hadoop.hive.ql.metadata.HiveException:
> java.io.FileNotFoundException: File
> hdfs://bdpe41:8020/tmp/hive/root/de80d82a-b910-4b87-940c-6be3ea37ba25/hive_2017-05-27_14-55-30_114_8497388836256415979-1/-mr-10004/1/5
> does not exist.
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.init(HiveInputFormat.java:404)
> at
> org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:498)
> at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:200)
> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248)
> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246)
> at scala.Option.getOrElse(Option.scala:121)
> at org.apache.spark.rdd.RDD.partitions(RDD.scala:246)
> at
> org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248)
> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246)
> at scala.Option.getOrElse(Option.scala:121)
> at org.apache.spark.rdd.RDD.partitions(RDD.scala:246)
> at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)
> at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:91)
> at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:235)
> at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:233)
> at scala.Option.getOrElse(Option.scala:121)
> at org.apache.spark.rdd.RDD.dependencies(RDD.scala:233)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.rddToString(SparkUtilities.java:144)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.rddToString(SparkUtilities.java:149)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.rddToString(SparkUtilities.java:149)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.rddToString(SparkUtilities.java:149)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.rddGraphToString(SparkUtilities.java:134)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkPlan.generateGraph(SparkPlan.java:93)
> at
> org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient$JobStatusJob.call(RemoteHiveSparkClient.java:349)
> at
> org.apache.hive.spark.client.RemoteDriver$JobWrapper.call(RemoteDriver.java:358)
> at
> org.apache.hive.spark.client.RemoteDriver$JobWrapper.call(RemoteDriver.java:323)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
> java.io.FileNotFoundException: File
> hdfs://bdpe41:8020/tmp/hive/root/de80d82a-b910-4b87-940c-6be3ea37ba25/hive_2017-05-27_14-55-30_114_8497388836256415979-1/-mr-10004/1/5
> does not exist.
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkDynamicPartitionPruner.processFiles(SparkDynamicPartitionPruner.java:147)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkDynamicPartitionPruner.prune(SparkDynamicPartitionPruner.java:76)
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.init(HiveInputFormat.java:402)
> ... 30 more
> Caused by: java.io.FileNotFoundException: File
> hdfs://bdpe41:8020/tmp/hive/root/de80d82a-b910-4b87-940c-6be3ea37ba25/hive_2017-05-27_14-55-30_114_8497388836256415979-1/-mr-10004/1/5
> does not exist.
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:795)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.access$700(DistributedFileSystem.java:106)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:853)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:849)
> at
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:860)
> at
> org.apache.hadoop.hive.ql.exec.spark.SparkDynamicPartitionPruner.processFiles(SparkDynamicPartitionPruner.java:119)
> ... 32 more
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)