[
https://issues.apache.org/jira/browse/KYLIN-5988?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17927962#comment-17927962
]
Guoliang Sun commented on KYLIN-5988:
-------------------------------------
h3. RootCause
This issue is caused by the V3 model not having a corresponding snapshot built.
The logic for this part needs to be improved.
> [Storage V3] Query hitting the model throws an error in the V3 project, and
> the built data is cleared
> -----------------------------------------------------------------------------------------------------
>
> Key: KYLIN-5988
> URL: https://issues.apache.org/jira/browse/KYLIN-5988
> Project: Kylin
> Issue Type: Bug
> Affects Versions: 5.0.0
> Reporter: Guoliang Sun
> Priority: Major
>
> 1. Create and build an incremental model.
> 2. Query hitting the model throws an error.
> 3. Error message is as follows:
> {code:java}
> Error while executing SQL "SELECT"LINEORDER"."LO_ORDERKEY" as
> "LINEORDER_LO_ORDERKEY", "LINEORDER"."LO_PARTKEY" as "LINEORDER_LO_PARTKEY",
> "LINEORDER"."LO_DISCOUNT" as "LINEORDER_LO_DISCOUNT",
> "LINEORDER"."LO_SUPPLYCOST" as "LINEORDER_LO_SUPPLYCOST",
> "LINEORDER"."LO_COMMITDATE" as "LINEORDER_LO_COMMITDATE",
> "LINEORDER"."LO_EXTENDEDPRICE" as "LINEORDER_LO_EXTENDEDPRICE",
> "LINEORDER"."LO_TAX" as "LINEORDER_LO_TAX", "LINEORDER"."LO_SUPPKEY" as
> "LINEORDER_LO_SUPPKEY", "LINEORDER"."LO_ORDTOTALPRICE" as
> "LINEORDER_LO_ORDTOTALPRICE", "LINEORDER"."LO_REVENUE" as
> "LINEORDER_LO_REVENUE", "LINEORDER"."LO_ORDERDATE" as
> "LINEORDER_LO_ORDERDATE", "LINEORDER"."LO_ORDERPRIOTITY" as
> "LINEORDER_LO_ORDERPRIOTITY", "LINEORDER"."LO_SHIPPRIOTITY" as
> "LINEORDER_LO_SHIPPRIOTITY", "LINEORDER"."LO_QUANTITY" as
> "LINEORDER_LO_QUANTITY", "LINEORDER"."LO_SHIPMODE" as
> "LINEORDER_LO_SHIPMODE", "LINEORDER"."LO_LINENUMBER" as
> "LINEORDER_LO_LINENUMBER", "LINEORDER"."LO_CUSTKEY" as
> "LINEORDER_LO_CUSTKEY", "CUSTOMER"."C_ADDRESS" as "CUSTOMER_C_ADDRESS",
> "CUSTOMER"."C_NATION" as "CUSTOMER_C_NATION", "CUSTOMER"."C_CITY" as
> "CUSTOMER_C_CITY", "CUSTOMER"."C_PHONE" as "CUSTOMER_C_PHONE",
> "CUSTOMER"."C_REGION" as "CUSTOMER_C_REGION", "CUSTOMER"."C_NAME" as
> "CUSTOMER_C_NAME", "CUSTOMER"."C_MKTSEGMENT" as "CUSTOMER_C_MKTSEGMENT",
> "CUSTOMER"."C_CUSTKEY" as "CUSTOMER_C_CUSTKEY"FROM "SSB"."LINEORDER" as
> "LINEORDER"INNER JOIN "SSB"."CUSTOMER" as "CUSTOMER"ON
> "LINEORDER"."LO_CUSTKEY" = "CUSTOMER"."C_CUSTKEY"LIMIT 500": Job aborted due
> to stage failure: Task 0 in stage 10106.0 failed 1 times, most recent
> failure: Lost task 0.0 in stage 10106.0 (TID 63159) (oliver-slave03.kylin.com
> executor 1): java.lang.NoClassDefFoundError: Could not initialize class
> org.xerial.snappy.Snappy at
> org.apache.parquet.hadoop.codec.SnappyDecompressor.decompress(SnappyDecompressor.java:62)
> at
> org.apache.parquet.hadoop.codec.NonBlockedDecompressorStream.read(NonBlockedDecompressorStream.java:51)
> at java.io.DataInputStream.readFully(DataInputStream.java:195) at
> java.io.DataInputStream.readFully(DataInputStream.java:169) at
> org.apache.parquet.bytes.BytesInput$StreamBytesInput.toByteArray(BytesInput.java:286)
> at
> org.apache.parquet.bytes.BytesInput.toByteBuffer(BytesInput.java:237)
> at org.apache.parquet.bytes.BytesInput.toInputStream(BytesInput.java:246)
> at
> org.apache.parquet.column.values.dictionary.PlainValuesDictionary$PlainLongDictionary.<init>(PlainValuesDictionary.java:154)
> at org.apache.parquet.column.Encoding$1.initDictionary(Encoding.java:96)
> at org.apache.parquet.column.Encoding$5.initDictionary(Encoding.java:163)
> at
> org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.<init>(VectorizedColumnReader.java:123)
> at
> org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initColumnReader(VectorizedParquetRecordReader.java:426)
> at
> org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:408)
> at
> org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:323)
> at
> org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:226)
> at
> org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:42)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:118)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:293)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:118)
> at
> org.apache.spark.sql.execution.KylinStorageScanExec$$anon$1.hasNext(KylinStorageScanExec.scala:405)
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown
> Source) at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
> Source) at
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:771)
> at
> org.apache.spark.sql.execution.RowToCHNativeColumnarExec.$anonfun$doExecuteColumnarInternal$3(RowToCHNativeColumnarExec.scala:54)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:856)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:856)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:366)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:330) at
> org.apache.gluten.execution.ColumnarInputRDDsWrapper.$anonfun$getIterators$1(WholeStageTransformer.scala:445)
> at scala.collection.immutable.List.flatMap(List.scala:366) at
> org.apache.gluten.execution.ColumnarInputRDDsWrapper.getIterators(WholeStageTransformer.scala:436)
> at
> org.apache.gluten.execution.WholeStageZippedPartitionsRDD.$anonfun$compute$1(WholeStageZippedPartitionsRDD.scala:48)
> at org.apache.gluten.utils.Arm$.withResource(Arm.scala:25) at
> org.apache.gluten.metrics.GlutenTimeMetric$.millis(GlutenTimeMetric.scala:37)
> at
> org.apache.gluten.execution.WholeStageZippedPartitionsRDD.compute(WholeStageZippedPartitionsRDD.scala:46)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:366)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:330) at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:366) at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:330) at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at
> org.apache.spark.scheduler.Task.run(Task.scala:136) at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:549)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1511)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:552)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Driver stacktrace:{code}
> SQL
> {code:java}
> SELECT
> "LINEORDER"."LO_ORDERKEY" as "LINEORDER_LO_ORDERKEY"
> , "LINEORDER"."LO_PARTKEY" as "LINEORDER_LO_PARTKEY"
> , "LINEORDER"."LO_DISCOUNT" as "LINEORDER_LO_DISCOUNT"
> , "LINEORDER"."LO_SUPPLYCOST" as "LINEORDER_LO_SUPPLYCOST"
> , "LINEORDER"."LO_COMMITDATE" as "LINEORDER_LO_COMMITDATE"
> , "LINEORDER"."LO_EXTENDEDPRICE" as "LINEORDER_LO_EXTENDEDPRICE"
> , "LINEORDER"."LO_TAX" as "LINEORDER_LO_TAX"
> , "LINEORDER"."LO_SUPPKEY" as "LINEORDER_LO_SUPPKEY"
> , "LINEORDER"."LO_ORDTOTALPRICE" as "LINEORDER_LO_ORDTOTALPRICE"
> , "LINEORDER"."LO_REVENUE" as "LINEORDER_LO_REVENUE"
> , "LINEORDER"."LO_ORDERDATE" as "LINEORDER_LO_ORDERDATE"
> , "LINEORDER"."LO_ORDERPRIOTITY" as "LINEORDER_LO_ORDERPRIOTITY"
> , "LINEORDER"."LO_SHIPPRIOTITY" as "LINEORDER_LO_SHIPPRIOTITY"
> , "LINEORDER"."LO_QUANTITY" as "LINEORDER_LO_QUANTITY"
> , "LINEORDER"."LO_SHIPMODE" as "LINEORDER_LO_SHIPMODE"
> , "LINEORDER"."LO_LINENUMBER" as "LINEORDER_LO_LINENUMBER"
> , "LINEORDER"."LO_CUSTKEY" as "LINEORDER_LO_CUSTKEY"
> , "CUSTOMER"."C_ADDRESS" as "CUSTOMER_C_ADDRESS"
> , "CUSTOMER"."C_NATION" as "CUSTOMER_C_NATION"
> , "CUSTOMER"."C_CITY" as "CUSTOMER_C_CITY"
> , "CUSTOMER"."C_PHONE" as "CUSTOMER_C_PHONE"
> , "CUSTOMER"."C_REGION" as "CUSTOMER_C_REGION"
> , "CUSTOMER"."C_NAME" as "CUSTOMER_C_NAME"
> , "CUSTOMER"."C_MKTSEGMENT" as "CUSTOMER_C_MKTSEGMENT"
> , "CUSTOMER"."C_CUSTKEY" as "CUSTOMER_C_CUSTKEY"
> FROM "SSB"."LINEORDER" as "LINEORDER"
> INNER JOIN "SSB"."CUSTOMER" as "CUSTOMER"
> ON "LINEORDER"."LO_CUSTKEY" = "CUSTOMER"."C_CUSTKEY" {code}
> 4. Meanwhile, the built data is also cleared.
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)