[ 
https://issues.apache.org/jira/browse/HUDI-6545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

HBG updated HUDI-6545:
----------------------
    Description: 
presto read MOR table with log format throw error:

```log
2023-07-17 10:33:19 (UTC+8) ERROR- Execute presto query failed with 
exception:java.sql.SQLException: Query failed (#20230717_023302_05043_kuewt): 
Exception when reading log file       at 
io.prestosql.jdbc.AbstractPrestoResultSet.resultsException(AbstractPrestoResultSet.java:1731)
        at 
io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:216)
  at 
io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:176)
  at 
io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:141)
       at 
io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.hasNext(AbstractIterator.java:136)
        at 
java.util.Spliterators$IteratorSpliterator.tryAdvance(Spliterators.java:1811)   
     at 
java.util.stream.StreamSpliterators$WrappingSpliterator.lambda$initPartialTraversalState$0(StreamSpliterators.java:295)
      at 
java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.fillBuffer(StreamSpliterators.java:207)
      at 
java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.doAdvance(StreamSpliterators.java:162)
       at 
java.util.stream.StreamSpliterators$WrappingSpliterator.tryAdvance(StreamSpliterators.java:301)
      at java.util.Spliterators$1Adapter.hasNext(Spliterators.java:681)       
at 
io.prestosql.jdbc.PrestoResultSet$AsyncIterator.lambda$new$0(PrestoResultSet.java:122)
       at 
java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1640)
     at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
     at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
     at java.lang.Thread.run(Thread.java:748)Caused by: 
io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Exception when 
reading log file     at 
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:360)
  at 
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:200)
  at 
org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110)
     at 
org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103)
  at 
org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:325)
   at 
org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.getMergedLogRecordScanner(RealtimeCompactedRecordReader.java:97)
       at 
org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:66)
  at 
org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70)
 at 
org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47)
        at 
org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:81)
   at 
io.trino.plugin.hudi.HudiRecordCursors.createRecordReader(HudiRecordCursors.java:109)
        at 
io.trino.plugin.hudi.HudiRecordCursors.lambda$createRealtimeRecordCursor$0(HudiRecordCursors.java:76)
        at 
io.trino.plugin.hive.authentication.UserGroupInformationUtils.lambda$executeActionInDoAs$0(UserGroupInformationUtils.java:29)
        at 
java.base/java.security.AccessController.doPrivileged(AccessController.java:399)
     at java.base/javax.security.auth.Subject.doAs(Subject.java:376) at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1816)
 at 
io.trino.plugin.hive.authentication.UserGroupInformationUtils.executeActionInDoAs(UserGroupInformationUtils.java:27)
 at 
io.trino.plugin.hive.authentication.ImpersonatingHdfsAuthentication.doAs(ImpersonatingHdfsAuthentication.java:42)
    at io.trino.plugin.hive.HdfsEnvironment.doAs(HdfsEnvironment.java:114)  at 
io.trino.plugin.hudi.HudiRecordCursors.createRealtimeRecordCursor(HudiRecordCursors.java:75)
 at 
io.trino.plugin.hudi.HudiPageSourceProvider.createPageSource(HudiPageSourceProvider.java:195)
        at 
io.trino.plugin.base.classloader.ClassLoaderSafeConnectorPageSourceProvider.createPageSource(ClassLoaderSafeConnectorPageSourceProvider.java:49)
     at 
io.trino.split.PageSourceManager.createPageSource(PageSourceManager.java:62) at 
io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:308)    at 
io.trino.operator.Driver.processInternal(Driver.java:410)    at 
io.trino.operator.Driver.lambda$process$10(Driver.java:313)  at 
io.trino.operator.Driver.tryWithLock(Driver.java:698)        at 
io.trino.operator.Driver.process(Driver.java:305)    at 
io.trino.operator.Driver.processForDuration(Driver.java:276) at 
io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:785)
  at 
io.trino.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:164)
  at 
io.trino.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:492)  
     at 
io.trino.$gen.Trino_trino389_sql_027_dirty____20230716_133816_2.run(Unknown 
Source)  at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
    at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
    at java.base/java.lang.Thread.run(Thread.java:833)Caused by: 
io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Not 
implemented by the FileSystemWrapper FileSystem implementation        at 
org.apache.hadoop.fs.FileSystem.getScheme(FileSystem.java:300)       at 
org.apache.hudi.common.table.log.block.HoodieParquetDataBlock.readRecordsFromBlockPayload(HoodieParquetDataBlock.java:150)
   at 
org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:125)
   at 
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.getRecordsIterator(AbstractHoodieLogRecordReader.java:528)
    at 
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:387)
      at 
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:453)
 at 
org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:351)
  ... 35 common frames omitted2023-07-17 10:33:19 (UTC+8) ERROR- Task Execution 
failed with CommonException: Query failed (#20230717_023302_05043_kuewt): 
Exception when reading log file 
```

Class FileSystemWrapper in presto didn't override method 'getSchema', so 
calling this method will throw this error.

So in pr https://github.com/apache/hudi/pull/9219, 'FileSystem#getSchema()' is 
replaced by 'URI#getSchema()'

> fix presto read parquet format log file issue
> ---------------------------------------------
>
>                 Key: HUDI-6545
>                 URL: https://issues.apache.org/jira/browse/HUDI-6545
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: HBG
>            Priority: Major
>              Labels: pull-request-available
>
> presto read MOR table with log format throw error:
> ```log
> 2023-07-17 10:33:19 (UTC+8) ERROR- Execute presto query failed with 
> exception:java.sql.SQLException: Query failed (#20230717_023302_05043_kuewt): 
> Exception when reading log file     at 
> io.prestosql.jdbc.AbstractPrestoResultSet.resultsException(AbstractPrestoResultSet.java:1731)
>         at 
> io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:216)
>   at 
> io.prestosql.jdbc.PrestoResultSet$ResultsPageIterator.computeNext(PrestoResultSet.java:176)
>   at 
> io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:141)
>        at 
> io.prestosql.jdbc.$internal.guava.collect.AbstractIterator.hasNext(AbstractIterator.java:136)
>         at 
> java.util.Spliterators$IteratorSpliterator.tryAdvance(Spliterators.java:1811) 
>        at 
> java.util.stream.StreamSpliterators$WrappingSpliterator.lambda$initPartialTraversalState$0(StreamSpliterators.java:295)
>       at 
> java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.fillBuffer(StreamSpliterators.java:207)
>       at 
> java.util.stream.StreamSpliterators$AbstractWrappingSpliterator.doAdvance(StreamSpliterators.java:162)
>        at 
> java.util.stream.StreamSpliterators$WrappingSpliterator.tryAdvance(StreamSpliterators.java:301)
>       at java.util.Spliterators$1Adapter.hasNext(Spliterators.java:681)       
> at 
> io.prestosql.jdbc.PrestoResultSet$AsyncIterator.lambda$new$0(PrestoResultSet.java:122)
>        at 
> java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1640)
>      at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)Caused by: 
> io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Exception 
> when reading log file     at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:360)
>   at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:200)
>   at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110)
>      at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103)
>   at 
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:325)
>    at 
> org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.getMergedLogRecordScanner(RealtimeCompactedRecordReader.java:97)
>        at 
> org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:66)
>   at 
> org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70)
>  at 
> org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47)
>         at 
> org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:81)
>    at 
> io.trino.plugin.hudi.HudiRecordCursors.createRecordReader(HudiRecordCursors.java:109)
>         at 
> io.trino.plugin.hudi.HudiRecordCursors.lambda$createRealtimeRecordCursor$0(HudiRecordCursors.java:76)
>         at 
> io.trino.plugin.hive.authentication.UserGroupInformationUtils.lambda$executeActionInDoAs$0(UserGroupInformationUtils.java:29)
>         at 
> java.base/java.security.AccessController.doPrivileged(AccessController.java:399)
>      at java.base/javax.security.auth.Subject.doAs(Subject.java:376) at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1816)
>  at 
> io.trino.plugin.hive.authentication.UserGroupInformationUtils.executeActionInDoAs(UserGroupInformationUtils.java:27)
>  at 
> io.trino.plugin.hive.authentication.ImpersonatingHdfsAuthentication.doAs(ImpersonatingHdfsAuthentication.java:42)
>     at io.trino.plugin.hive.HdfsEnvironment.doAs(HdfsEnvironment.java:114)  
> at 
> io.trino.plugin.hudi.HudiRecordCursors.createRealtimeRecordCursor(HudiRecordCursors.java:75)
>  at 
> io.trino.plugin.hudi.HudiPageSourceProvider.createPageSource(HudiPageSourceProvider.java:195)
>         at 
> io.trino.plugin.base.classloader.ClassLoaderSafeConnectorPageSourceProvider.createPageSource(ClassLoaderSafeConnectorPageSourceProvider.java:49)
>      at 
> io.trino.split.PageSourceManager.createPageSource(PageSourceManager.java:62) 
> at io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:308)  
>   at io.trino.operator.Driver.processInternal(Driver.java:410)    at 
> io.trino.operator.Driver.lambda$process$10(Driver.java:313)  at 
> io.trino.operator.Driver.tryWithLock(Driver.java:698)        at 
> io.trino.operator.Driver.process(Driver.java:305)    at 
> io.trino.operator.Driver.processForDuration(Driver.java:276) at 
> io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:785)
>   at 
> io.trino.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:164)
>   at 
> io.trino.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:492)
>        at 
> io.trino.$gen.Trino_trino389_sql_027_dirty____20230716_133816_2.run(Unknown 
> Source)  at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
>     at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
>     at java.base/java.lang.Thread.run(Thread.java:833)Caused by: 
> io.prestosql.jdbc.$internal.client.FailureInfo$FailureException: Not 
> implemented by the FileSystemWrapper FileSystem implementation        at 
> org.apache.hadoop.fs.FileSystem.getScheme(FileSystem.java:300)       at 
> org.apache.hudi.common.table.log.block.HoodieParquetDataBlock.readRecordsFromBlockPayload(HoodieParquetDataBlock.java:150)
>    at 
> org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:125)
>    at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.getRecordsIterator(AbstractHoodieLogRecordReader.java:528)
>     at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:387)
>       at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:453)
>  at 
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:351)
>   ... 35 common frames omitted2023-07-17 10:33:19 (UTC+8) ERROR- Task 
> Execution failed with CommonException: Query failed 
> (#20230717_023302_05043_kuewt): Exception when reading log file 
> ```
> Class FileSystemWrapper in presto didn't override method 'getSchema', so 
> calling this method will throw this error.
> So in pr https://github.com/apache/hudi/pull/9219, 'FileSystem#getSchema()' 
> is replaced by 'URI#getSchema()'



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to