[
https://issues.apache.org/jira/browse/HUDI-5526?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Brandon Scheller updated HUDI-5526:
-----------------------------------
Description:
Hive "count" queries fail on hudi bootstrap tables when they are using Hive3.
This has been tested on all EMR-6.x releases and fails with the same error. The
same query works with Hive2.
For example with the query:
{code:java}
SELECT COUNT(*) FROM HUDI_BOOTSTRAP_TABLE;{code}
Gives the following error:
{code:java}
TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) :
attempt_1672881902089_0008_1_00_000000_1:java.lang.RuntimeException:
java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException:
java.io.IOException: cannot find dir =
[s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo: [
[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one]
,
[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]
]
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.RuntimeException: java.io.IOException:
java.lang.RuntimeException: java.io.IOException: cannot find dir =
[s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:206)
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:145)
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:111)
at
org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:157)
at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83)
at
org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:703)
at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:662)
at
org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150)
at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114)
at
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:525)
at
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:171)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266)
... 14 more
Caused by: java.io.IOException: java.lang.RuntimeException:
java.io.IOException: cannot find dir =
[s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
[org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
at
[org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:421)
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:203)
... 25 more
Caused by: java.lang.RuntimeException: java.io.IOException: cannot find dir =
[s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:156)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.VectorizedParquetInputFormat.getRecordReader(VectorizedParquetInputFormat.java:50)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:87)
at
org.apache.hudi.hadoop.HoodieParquetInputFormat.getRecordReader(HoodieParquetInputFormat.java:203)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:418)
... 26 more
Caused by: java.io.IOException: cannot find dir =
[s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:402)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:371)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:366)
at
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:272)
at
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:263)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.initPartitionValues(VectorizedParquetRecordReader.java:164)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:153)
... 30 more
{code}
was:
Hive "count" queries fail on hudi bootstrap tables when they are using Hive3.
This has been tested on all EMR-6.x releases and fails with the same error. The
same query works with Hive2.
For example with the query:
{code:java}
SELECT COUNT(*) FROM HUDI_BOOTSTRAP_TABLE;{code}
Gives the following error:
{code:java}
TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) :
attempt_1672881902089_0008_1_00_000000_1:java.lang.RuntimeException:
java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException:
java.io.IOException: cannot find dir =
[s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo: [
[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one]
,
[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]
]
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
at
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
at
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.RuntimeException: java.io.IOException:
java.lang.RuntimeException: java.io.IOException: cannot find dir =
[s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:206)
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:145)
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:111)
at
org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:157)
at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83)
at
org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:703)
at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:662)
at
org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150)
at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114)
at
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:525)
at
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:171)
at
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266)
... 14 more
Caused by: java.io.IOException: java.lang.RuntimeException:
java.io.IOException: cannot find dir =
[s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
[org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
at
[org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:421)
at
org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:203)
... 25 more
Caused by: java.lang.RuntimeException: java.io.IOException: cannot find dir =
[s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:156)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.VectorizedParquetInputFormat.getRecordReader(VectorizedParquetInputFormat.java:50)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:87)
at
org.apache.hudi.hadoop.HoodieParquetInputFormat.getRecordReader(HoodieParquetInputFormat.java:203)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:418)
... 26 more
Caused by: java.io.IOException: cannot find dir =
[s3://yxchang-emr-dev/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
in pathToPartitionInfo:
[[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
[s3://bschelle-emr-dev2/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:402)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:371)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:366)
at
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:272)
at
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:263)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.initPartitionValues(VectorizedParquetRecordReader.java:164)
at
[org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:153)
... 30 more
{code}
> Hive "Count" queries don't work with bootstrap tables w/Hive3
> -------------------------------------------------------------
>
> Key: HUDI-5526
> URL: https://issues.apache.org/jira/browse/HUDI-5526
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Brandon Scheller
> Priority: Major
>
> Hive "count" queries fail on hudi bootstrap tables when they are using Hive3.
> This has been tested on all EMR-6.x releases and fails with the same error.
> The same query works with Hive2.
> For example with the query:
> {code:java}
> SELECT COUNT(*) FROM HUDI_BOOTSTRAP_TABLE;{code}
> Gives the following error:
> {code:java}
> TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) :
> attempt_1672881902089_0008_1_00_000000_1:java.lang.RuntimeException:
> java.lang.RuntimeException: java.io.IOException: java.lang.RuntimeException:
> java.io.IOException: cannot find dir =
> [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
> in pathToPartitionInfo: [
> [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one]
> ,
> [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]
> ]
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
> at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
> at
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
> at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750)
> Caused by: java.lang.RuntimeException: java.io.IOException:
> java.lang.RuntimeException: java.io.IOException: cannot find dir =
> [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
> in pathToPartitionInfo:
> [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
> [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
> at
> org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:206)
> at
> org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:145)
> at
> org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:111)
> at
> org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:157)
> at
> org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83)
> at
> org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:703)
> at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:662)
> at
> org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150)
> at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114)
> at
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:525)
> at
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:171)
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266)
> ... 14 more
> Caused by: java.io.IOException: java.lang.RuntimeException:
> java.io.IOException: cannot find dir =
> [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
> in pathToPartitionInfo:
> [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
> [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
> at
> [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
> at
> [org.apache.hadoop.hive.io|http://org.apache.hadoop.hive.io/].HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:421)
> at
> org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:203)
> ... 25 more
> Caused by: java.lang.RuntimeException: java.io.IOException: cannot find dir =
> [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
> in pathToPartitionInfo:
> [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
> [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:156)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.VectorizedParquetInputFormat.getRecordReader(VectorizedParquetInputFormat.java:50)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:87)
> at
> org.apache.hudi.hadoop.HoodieParquetInputFormat.getRecordReader(HoodieParquetInputFormat.java:203)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveInputFormat.getRecordReader(HiveInputFormat.java:418)
> ... 26 more
> Caused by: java.io.IOException: cannot find dir =
> [s3://my-bucket/test-data/hudi/parquet-source-tables/hive_style_partitioned_tb/event_type=two/part-00000-98fb0380-374c-40f5-8a57-89d95270a2c3-c000.parquet]
> in pathToPartitionInfo:
> [[s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=one],
> [s3://my-bucket/hudi-table/test_bootstrap_hive_partitionedrt/event_type=two]]
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:402)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:371)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].HiveFileFormatUtils.getFromPathRecursively(HiveFileFormatUtils.java:366)
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:272)
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx.getPartitionValues(VectorizedRowBatchCtx.java:263)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.initPartitionValues(VectorizedParquetRecordReader.java:164)
> at
> [org.apache.hadoop.hive.ql.io|http://org.apache.hadoop.hive.ql.io/].parquet.vector.VectorizedParquetRecordReader.<init>(VectorizedParquetRecordReader.java:153)
> ... 30 more
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)