This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 414c25032f [log] Add detailed failure log and test for query metadata
columns (#5479)
414c25032f is described below
commit 414c25032f3a280aec18f1e9337fc76ecef9343c
Author: askwang <[email protected]>
AuthorDate: Mon Apr 21 09:13:12 2025 +0800
[log] Add detailed failure log and test for query metadata columns (#5479)
---
docs/content/spark/sql-query.md | 4 ++++
.../paimon/spark/PaimonRecordReaderIterator.scala | 3 ++-
.../apache/paimon/spark/sql/PaimonQueryTest.scala | 20 ++++++++++++++++++++
3 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/docs/content/spark/sql-query.md b/docs/content/spark/sql-query.md
index f1597b9347..9f737d67e9 100644
--- a/docs/content/spark/sql-query.md
+++ b/docs/content/spark/sql-query.md
@@ -49,6 +49,10 @@ Paimon also supports reading some hidden metadata columns,
currently supporting
SELECT *, __paimon_file_path, __paimon_partition, __paimon_bucket,
__paimon_row_index FROM t;
```
+{{< hint info >}}
+Note: only append table or deletion vector table support querying metadata
columns.
+{{< /hint >}}
+
### Batch Time Travel
Paimon batch reads with time travel can specify a snapshot or a tag and read
the corresponding data.
diff --git
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
index 6d6c1ca47c..4a71cdff88 100644
---
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
+++
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonRecordReaderIterator.scala
@@ -50,7 +50,8 @@ case class PaimonRecordReaderIterator(
if (needMetadata) {
if (!isFileRecordIterator || !split.isInstanceOf[DataSplit]) {
throw new RuntimeException(
- "There need be FileRecordIterator when metadata columns are
required.")
+ "There need be FileRecordIterator when metadata columns are
required. " +
+ "Only append table or deletion vector table support querying
metadata columns.")
}
}
}
diff --git
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
index ae90b5b1f3..d7a2ecfd93 100644
---
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
+++
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
@@ -21,7 +21,9 @@ package org.apache.paimon.spark.sql
import org.apache.paimon.spark.PaimonSparkTestBase
import org.apache.paimon.table.source.DataSplit
+import org.apache.spark.SparkException
import org.apache.spark.sql.{Row, SparkSession}
+import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Assertions
import java.util
@@ -387,6 +389,24 @@ class PaimonQueryTest extends PaimonSparkTestBase {
)
}
+ test("Paimon Query: not support querying metadata columns for pk table") {
+ spark.sql("""
+ |CREATE TABLE T (id INT, name STRING)
+ |TBLPROPERTIES ('primary-key' = 'id', 'bucket' = '1')
+ |""".stripMargin)
+
+ spark.sql("INSERT INTO T VALUES(1,'a')")
+ assertThat(spark.sql("SELECT *,__paimon_file_path FROM
T").collect()).hasSize(1)
+
+ // query failed if more than one file in a bucket
+ spark.sql("INSERT INTO T VALUES(2,'b')")
+ assert(
+ intercept[SparkException] {
+ spark.sql("SELECT *,__paimon_file_path FROM T").collect()
+ }.getMessage
+ .contains("Only append table or deletion vector table support querying
metadata columns."))
+ }
+
private def getAllFiles(
tableName: String,
partitions: Seq[String],