dubeme opened a new issue #2631: URL: https://github.com/apache/iceberg/issues/2631
Hi, Please can someone help me with this issue. I successfully wrote an iceberg table called `cat.table`. But when I try querying I get this errors `UnsupportedOperationException: Byte-buffer read unsupported by input stream`. Here is more details: ### Setup * Databricks - Runtime 7.6 ML * Spark - Version 3.0.1 * Iceberg - Iceberg-spark3-runtime_0.11.1 * HDFS - Azure Data Lake ### Schema Column name| Type -|- col_00 | bigint col_01 | bigint col_02 | string col_03 | double col_04 | double col_05 | double col_06 | decimal(28,8) col_07 | decimal(28,8) col_08 | map<bigint,decimal(28,8)> col_09 | timestamp col_10 | timestamp ### Query that runs ```sql SELECT * FROM cat.table ``` ### Queries that throw the exception ```sql -- Query 1 SELECT * FROM cat.table WHERE col_09 > date_sub(current_timestamp(), 30) -- Query 2 SELECT col_00, COUNT(*) AS count FROM cat.table WHERE col_09 > date_sub(current_timestamp(), 30) GROUP BY col_00 ORDER BY col_00 ``` ### Stack dump org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 32.0 failed 4 times, most recent failure: Lost task 0.3 in stage 32.0 (TID 1454, <redacted ip address>, executor 6): java.lang.UnsupportedOperationException: Byte-buffer read unsupported by input stream at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:146) at com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$1(FileSystemWithMetrics.scala:198) at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeMetric(FileSystemWithMetrics.scala:151) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesMetric(FileSystemWithMetrics.scala:171) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.$anonfun$withTimeAndBytesReadMetric$1(FileSystemWithMetrics.scala:185) at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23) at com.databricks.spark.metrics.SamplerWithPeriod.sample(FileSystemWithMetrics.scala:78) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:185) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric$(FileSystemWithMetrics.scala:184) at com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:192) at com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:198) at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.H2SeekableInputStream$H2Reader.read(H2SeekableInputStream.java:81) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.H2SeekableInputStream.readFully(H2SeekableInputStream.java:90) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.H2SeekableInputStream.readFully(H2SeekableInputStream.java:75) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:542) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:712) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:609) at org.apache.iceberg.parquet.ReadConf.newReader(ReadConf.java:218) at org.apache.iceberg.parquet.ReadConf.<init>(ReadConf.java:74) at org.apache.iceberg.parquet.ParquetReader.init(ParquetReader.java:66) at org.apache.iceberg.parquet.ParquetReader.iterator(ParquetReader.java:77) at org.apache.iceberg.spark.source.RowDataReader.open(RowDataReader.java:95) at org.apache.iceberg.spark.source.BaseDataReader.next(BaseDataReader.java:93) at org.apache.spark.sql.execution.datasources.v2.PartitionIterator.hasNext(DataSourceRDD.scala:79) at org.apache.spark.sql.execution.datasources.v2.MetricsIterator.hasNext(DataSourceRDD.scala:112) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:732) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:187) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144) at org.apache.spark.scheduler.Task.run(Task.scala:117) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$8(Executor.scala:677) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:680) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2519) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2466) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2460) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2460) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1152) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1152) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1152) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2721) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2668) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2656) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2339) at org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:298) at org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:308) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:82) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:88) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:61) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:57) at org.apache.spark.sql.execution.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:483) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:483) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:427) at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:58) at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3013) at org.apache.spark.sql.Dataset.$anonfun$collectResult$1(Dataset.scala:3004) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3728) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:248) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:841) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:198) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3726) at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3003) at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation0(OutputAggregator.scala:194) at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation(OutputAggregator.scala:57) at com.databricks.backend.daemon.driver.SQLDriverLocal.executeSql(SQLDriverLocal.scala:115) at com.databricks.backend.daemon.driver.SQLDriverLocal.repl(SQLDriverLocal.scala:144) at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$11(DriverLocal.scala:488) at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:240) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62) at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:235) at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:232) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:50) at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:277) at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:270) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:50) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:465) at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:690) at scala.util.Try$.apply(Try.scala:213) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:682) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:523) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:635) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:428) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:371) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:223) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.UnsupportedOperationException: Byte-buffer read unsupported by input stream at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:146) at com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$1(FileSystemWithMetrics.scala:198) at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeMetric(FileSystemWithMetrics.scala:151) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesMetric(FileSystemWithMetrics.scala:171) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.$anonfun$withTimeAndBytesReadMetric$1(FileSystemWithMetrics.scala:185) at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23) at com.databricks.spark.metrics.SamplerWithPeriod.sample(FileSystemWithMetrics.scala:78) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:185) at com.databricks.spark.metrics.ExtendedTaskIOMetrics.withTimeAndBytesReadMetric$(FileSystemWithMetrics.scala:184) at com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:192) at com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:198) at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.H2SeekableInputStream$H2Reader.read(H2SeekableInputStream.java:81) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.H2SeekableInputStream.readFully(H2SeekableInputStream.java:90) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.util.H2SeekableInputStream.readFully(H2SeekableInputStream.java:75) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:542) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:712) at org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:609) at org.apache.iceberg.parquet.ReadConf.newReader(ReadConf.java:218) at org.apache.iceberg.parquet.ReadConf.<init>(ReadConf.java:74) at org.apache.iceberg.parquet.ParquetReader.init(ParquetReader.java:66) at org.apache.iceberg.parquet.ParquetReader.iterator(ParquetReader.java:77) at org.apache.iceberg.spark.source.RowDataReader.open(RowDataReader.java:95) at org.apache.iceberg.spark.source.BaseDataReader.next(BaseDataReader.java:93) at org.apache.spark.sql.execution.datasources.v2.PartitionIterator.hasNext(DataSourceRDD.scala:79) at org.apache.spark.sql.execution.datasources.v2.MetricsIterator.hasNext(DataSourceRDD.scala:112) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:732) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:187) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144) at org.apache.spark.scheduler.Task.run(Task.scala:117) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$8(Executor.scala:677) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:680) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org