[
https://issues.apache.org/jira/browse/HUDI-6545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
HBG updated HUDI-6545:
----------------------
Description:
presto read MOR table with log format throw error:
```log
2023-07-17 10:33:19 (UTC+8) ERROR- Execute presto query failed with
exception:java.sql.SQLException: Query failed (#20230717_023302_05043_kuewt):
Exception when reading log file at
io.prestosql.jdbc.AbstractPrestoResultSet.resultsException(AbstractPrestoResultSet.java:1731)
at
io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:216)
at
io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:176)
at
io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:141)
at
io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.hasNext(AbstractIterator.java:136)
at
java.util.Spliterators$IteratorSpliterator.tryAdvance(Spliterators.java:1811)
at
java.util.stream.StreamSpliterators$WrappingSpliterator.lambda$initPartialTraversalState$0(StreamSpliterators.java:295)
at
java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.fillBuffer(StreamSpliterators.java:207)
at
java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.doAdvance(StreamSpliterators.java:162)
at
java.util.stream.StreamSpliterators$WrappingSpliterator.tryAdvance(StreamSpliterators.java:301)
at java.util.Spliterators$1Adapter.hasNext(Spliterators.java:681)
at
io.prestosql.jdbc.PrestoResultSet$AsyncIterator.lambda$new$0(PrestoResultSet.java:122)
at
java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1640)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)Caused by:
io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Exception when
reading log file at
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:360)
at
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:200)
at
org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110)
at
org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103)
at
org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:325)
at
org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.getMergedLogRecordScanner(RealtimeCompactedRecordReader.java:97)
at
org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:66)
at
org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70)
at
org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47)
at
org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:81)
at
io.trino.plugin.hudi.HudiRecordCursors.createRecordReader(HudiRecordCursors.java:109)
at
io.trino.plugin.hudi.HudiRecordCursors.lambda$createRealtimeRecordCursor$0(HudiRecordCursors.java:76)
at
io.trino.plugin.hive.authentication.UserGroupInformationUtils.lambda$executeActionInDoAs$0(UserGroupInformationUtils.java:29)
at
java.base/java.security.AccessController.doPrivileged(AccessController.java:399)
at java.base/javax.security.auth.Subject.doAs(Subject.java:376) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1816)
at
io.trino.plugin.hive.authentication.UserGroupInformationUtils.executeActionInDoAs(UserGroupInformationUtils.java:27)
at
io.trino.plugin.hive.authentication.ImpersonatingHdfsAuthentication.doAs(ImpersonatingHdfsAuthentication.java:42)
at io.trino.plugin.hive.HdfsEnvironment.doAs(HdfsEnvironment.java:114) at
io.trino.plugin.hudi.HudiRecordCursors.createRealtimeRecordCursor(HudiRecordCursors.java:75)
at
io.trino.plugin.hudi.HudiPageSourceProvider.createPageSource(HudiPageSourceProvider.java:195)
at
io.trino.plugin.base.classloader.ClassLoaderSafeConnectorPageSourceProvider.createPageSource(ClassLoaderSafeConnectorPageSourceProvider.java:49)
at
io.trino.split.PageSourceManager.createPageSource(PageSourceManager.java:62) at
io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:308) at
io.trino.operator.Driver.processInternal(Driver.java:410) at
io.trino.operator.Driver.lambda$process$10(Driver.java:313) at
io.trino.operator.Driver.tryWithLock(Driver.java:698) at
io.trino.operator.Driver.process(Driver.java:305) at
io.trino.operator.Driver.processForDuration(Driver.java:276) at
io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:785)
at
io.trino.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:164)
at
io.trino.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:492)
at
io.trino.$gen.Trino_trino389_sql_027_dirty____20230716_133816_2.run(Unknown
Source) at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:833)Caused by:
io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Not
implemented by the FileSystemWrapper FileSystem implementation at
org.apache.hadoop.fs.FileSystem.getScheme(FileSystem.java:300) at
org.apache.hudi.common.table.log.block.HoodieParquetDataBlock.readRecordsFromBlockPayload(HoodieParquetDataBlock.java:150)
at
org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:125)
at
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.getRecordsIterator(AbstractHoodieLogRecordReader.java:528)
at
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:387)
at
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:453)
at
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:351)
... 35 common frames omitted2023-07-17 10:33:19 (UTC+8) ERROR- Task Execution
failed with CommonException: Query failed (#20230717_023302_05043_kuewt):
Exception when reading log file
```
Class FileSystemWrapper in presto didn't override method 'getSchema', so
calling this method will throw this error.
So in pr https://github.com/apache/hudi/pull/9219, 'FileSystem#getSchema()' is
replaced by 'URI#getSchema()'
> fix presto read parquet format log file issue
> ---------------------------------------------
>
> Key: HUDI-6545
> URL: https://issues.apache.org/jira/browse/HUDI-6545
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: HBG
> Priority: Major
> Labels: pull-request-available
>
> presto read MOR table with log format throw error:
> ```log
> 2023-07-17 10:33:19 (UTC+8) ERROR- Execute presto query failed with
> exception:java.sql.SQLException: Query failed (#20230717_023302_05043_kuewt):
> Exception when reading log file at
> io.prestosql.jdbc.AbstractPrestoResultSet.resultsException(AbstractPrestoResultSet.java:1731)
> at
> io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:216)
> at
> io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:176)
> at
> io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:141)
> at
> io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.hasNext(AbstractIterator.java:136)
> at
> java.util.Spliterators$IteratorSpliterator.tryAdvance(Spliterators.java:1811)
> at
> java.util.stream.StreamSpliterators$WrappingSpliterator.lambda$initPartialTraversalState$0(StreamSpliterators.java:295)
> at
> java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.fillBuffer(StreamSpliterators.java:207)
> at
> java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.doAdvance(StreamSpliterators.java:162)
> at
> java.util.stream.StreamSpliterators$WrappingSpliterator.tryAdvance(StreamSpliterators.java:301)
> at java.util.Spliterators$1Adapter.hasNext(Spliterators.java:681)
> at
> io.prestosql.jdbc.PrestoResultSet$AsyncIterator.lambda$new$0(PrestoResultSet.java:122)
> at
> java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1640)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)Caused by:
> io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Exception
> when reading log file at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:360)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:200)
> at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110)
> at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103)
> at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:325)
> at
> org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.getMergedLogRecordScanner(RealtimeCompactedRecordReader.java:97)
> at
> org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:66)
> at
> org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70)
> at
> org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47)
> at
> org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:81)
> at
> io.trino.plugin.hudi.HudiRecordCursors.createRecordReader(HudiRecordCursors.java:109)
> at
> io.trino.plugin.hudi.HudiRecordCursors.lambda$createRealtimeRecordCursor$0(HudiRecordCursors.java:76)
> at
> io.trino.plugin.hive.authentication.UserGroupInformationUtils.lambda$executeActionInDoAs$0(UserGroupInformationUtils.java:29)
> at
> java.base/java.security.AccessController.doPrivileged(AccessController.java:399)
> at java.base/javax.security.auth.Subject.doAs(Subject.java:376) at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1816)
> at
> io.trino.plugin.hive.authentication.UserGroupInformationUtils.executeActionInDoAs(UserGroupInformationUtils.java:27)
> at
> io.trino.plugin.hive.authentication.ImpersonatingHdfsAuthentication.doAs(ImpersonatingHdfsAuthentication.java:42)
> at io.trino.plugin.hive.HdfsEnvironment.doAs(HdfsEnvironment.java:114)
> at
> io.trino.plugin.hudi.HudiRecordCursors.createRealtimeRecordCursor(HudiRecordCursors.java:75)
> at
> io.trino.plugin.hudi.HudiPageSourceProvider.createPageSource(HudiPageSourceProvider.java:195)
> at
> io.trino.plugin.base.classloader.ClassLoaderSafeConnectorPageSourceProvider.createPageSource(ClassLoaderSafeConnectorPageSourceProvider.java:49)
> at
> io.trino.split.PageSourceManager.createPageSource(PageSourceManager.java:62)
> at io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:308)
> at io.trino.operator.Driver.processInternal(Driver.java:410) at
> io.trino.operator.Driver.lambda$process$10(Driver.java:313) at
> io.trino.operator.Driver.tryWithLock(Driver.java:698) at
> io.trino.operator.Driver.process(Driver.java:305) at
> io.trino.operator.Driver.processForDuration(Driver.java:276) at
> io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:785)
> at
> io.trino.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:164)
> at
> io.trino.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:492)
> at
> io.trino.$gen.Trino_trino389_sql_027_dirty____20230716_133816_2.run(Unknown
> Source) at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
> at java.base/java.lang.Thread.run(Thread.java:833)Caused by:
> io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Not
> implemented by the FileSystemWrapper FileSystem implementation at
> org.apache.hadoop.fs.FileSystem.getScheme(FileSystem.java:300) at
> org.apache.hudi.common.table.log.block.HoodieParquetDataBlock.readRecordsFromBlockPayload(HoodieParquetDataBlock.java:150)
> at
> org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:125)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.getRecordsIterator(AbstractHoodieLogRecordReader.java:528)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:387)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:453)
> at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:351)
> ... 35 common frames omitted2023-07-17 10:33:19 (UTC+8) ERROR- Task
> Execution failed with CommonException: Query failed
> (#20230717_023302_05043_kuewt): Exception when reading log file
> ```
> Class FileSystemWrapper in presto didn't override method 'getSchema', so
> calling this method will throw this error.
> So in pr https://github.com/apache/hudi/pull/9219, 'FileSystem#getSchema()'
> is replaced by 'URI#getSchema()'
--
This message was sent by Atlassian Jira
(v8.20.10#820010)