This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new fa0d97476c [core] Fix convert null row to column row vector (#6990)
fa0d97476c is described below
commit fa0d97476c7fff6d415291bbc7317e1a7dbbcb9a
Author: Zouxxyy <[email protected]>
AuthorDate: Fri Jan 9 13:15:18 2026 +0800
[core] Fix convert null row to column row vector (#6990)
---
.../paimon/data/columnar/heap/HeapRowVector.java | 10 ++++
.../data/columnar/RowToColumnConverterTest.java | 49 ++++++++++++++++
.../apache/paimon/spark/sql/VariantTestBase.scala | 67 ++++++++++++++++++++++
3 files changed, 126 insertions(+)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/data/columnar/heap/HeapRowVector.java
b/paimon-common/src/main/java/org/apache/paimon/data/columnar/heap/HeapRowVector.java
index a031a5d260..003189c02b 100644
---
a/paimon-common/src/main/java/org/apache/paimon/data/columnar/heap/HeapRowVector.java
+++
b/paimon-common/src/main/java/org/apache/paimon/data/columnar/heap/HeapRowVector.java
@@ -61,4 +61,14 @@ public class HeapRowVector extends AbstractStructVector
System.arraycopy(fields, 0, this.children, 0, fields.length);
this.vectorizedColumnBatch = new VectorizedColumnBatch(children);
}
+
+ @Override
+ public void appendNull() {
+ super.appendNull();
+ for (ColumnVector child : children) {
+ if (child instanceof WritableColumnVector) {
+ ((WritableColumnVector) child).appendNull();
+ }
+ }
+ }
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/data/columnar/RowToColumnConverterTest.java
b/paimon-common/src/test/java/org/apache/paimon/data/columnar/RowToColumnConverterTest.java
index 3d28183385..9f2ef2fc3c 100644
---
a/paimon-common/src/test/java/org/apache/paimon/data/columnar/RowToColumnConverterTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/data/columnar/RowToColumnConverterTest.java
@@ -458,4 +458,53 @@ public class RowToColumnConverterTest {
assertThat(new
String(nameVector.getBytes(2).getBytes())).isEqualTo("Charlie");
assertThat(rowVector.getRow(2).getInt(0)).isEqualTo(3);
}
+
+ @Test
+ public void testConvertNullableRowType() {
+ RowType rowType =
+ RowType.of(
+ new DataField(
+ 0,
+ "f",
+ DataTypes.ROW(
+ DataTypes.FIELD(0, "id",
DataTypes.INT()),
+ DataTypes.FIELD(1, "name",
DataTypes.STRING()))));
+ RowToColumnConverter converter = new RowToColumnConverter(rowType);
+
+ // Test null row value
+ GenericRow row1 = GenericRow.of((Object) null);
+
+ // Test row with null fields
+ GenericRow rowValue2 = GenericRow.of(null,
BinaryString.fromString("Bob"));
+ GenericRow row2 = GenericRow.of(rowValue2);
+
+ // Test row with all null fields
+ GenericRow rowValue3 = GenericRow.of(null, null);
+ GenericRow row3 = GenericRow.of(rowValue3);
+
+ HeapIntVector idVector = new HeapIntVector(3);
+ HeapBytesVector nameVector = new HeapBytesVector(3);
+ HeapRowVector rowVector = new HeapRowVector(3, idVector, nameVector);
+
+ WritableColumnVector[] vectors = new WritableColumnVector[]
{rowVector};
+
+ // Convert null row
+ converter.convert(row1, vectors);
+ assertThat(rowVector.isNullAt(0)).isTrue();
+ assertThat(idVector.isNullAt(0)).isTrue();
+ assertThat(nameVector.isNullAt(0)).isTrue();
+
+ // Convert row with null id field
+ converter.convert(row2, vectors);
+ assertThat(rowVector.isNullAt(1)).isFalse();
+ assertThat(idVector.isNullAt(1)).isTrue();
+ assertThat(nameVector.isNullAt(1)).isFalse();
+ assertThat(new
String(nameVector.getBytes(1).getBytes())).isEqualTo("Bob");
+
+ // Convert row with all null fields
+ converter.convert(row3, vectors);
+ assertThat(rowVector.isNullAt(2)).isFalse();
+ assertThat(idVector.isNullAt(2)).isTrue();
+ assertThat(nameVector.isNullAt(2)).isTrue();
+ }
}
diff --git
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/VariantTestBase.scala
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/VariantTestBase.scala
index da4fc175fe..c1cf9e226e 100644
---
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/VariantTestBase.scala
+++
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/VariantTestBase.scala
@@ -386,4 +386,71 @@ abstract class VariantTestBase extends PaimonSparkTestBase
{
}
}
}
+
+ test("Paimon Variant: read and write variant with null value") {
+ withTable("source_tbl", "target_tbl") {
+ sql("CREATE TABLE source_tbl (id INT, js STRING) USING paimon")
+ val n = 100
+ val nullCount = 98
+ val values = (1 to n)
+ .map {
+ i =>
+ if (i <= nullCount) {
+ s"($i, null)"
+ } else {
+ val jsonStr =
+ s"""
+ |'{
+ | "id":$i,"name":"user$i","age":${20 + (i % 50)},
+ |
"tags":[{"type":"vip","level":$i},{"type":"premium","level":$i}],
+ | "address":{"city":"city$i","street":"street$i"}
+ |}'
+ |""".stripMargin
+ s"($i, $jsonStr)"
+ }
+ }
+ .mkString(", ")
+ sql(s"INSERT INTO source_tbl VALUES $values")
+
+ sql("CREATE TABLE target_tbl (id INT, v VARIANT) USING paimon")
+ sql("INSERT INTO target_tbl SELECT id, parse_json(js) FROM source_tbl")
+
+ checkAnswer(
+ sql("""
+ |SELECT
+ |variant_get(v, '$.name', 'string'),
+ |variant_get(v, '$.tags', 'string'),
+ |variant_get(v, '$.tags', 'array<string>'),
+ |variant_get(v, '$.tags', 'array<struct<type string, level
int>>'),
+ |variant_get(v, '$.tags[0]', 'string'),
+ |variant_get(v, '$.tags[0]', 'struct<type string, level int>'),
+ |variant_get(v, '$.tags[1].type', 'string'),
+ |variant_get(v, '$.address', 'string')
+ |FROM target_tbl where v IS NOT NULL
+ |""".stripMargin),
+ Seq(
+ Row(
+ "user99",
+
"[{\"level\":99,\"type\":\"vip\"},{\"level\":99,\"type\":\"premium\"}]",
+ Array("{\"level\":99,\"type\":\"vip\"}",
"{\"level\":99,\"type\":\"premium\"}"),
+ Array(Row("vip", 99), Row("premium", 99)),
+ "{\"level\":99,\"type\":\"vip\"}",
+ Row("vip", 99),
+ "premium",
+ "{\"city\":\"city99\",\"street\":\"street99\"}"
+ ),
+ Row(
+ "user100",
+
"[{\"level\":100,\"type\":\"vip\"},{\"level\":100,\"type\":\"premium\"}]",
+ Array("{\"level\":100,\"type\":\"vip\"}",
"{\"level\":100,\"type\":\"premium\"}"),
+ Array(Row("vip", 100), Row("premium", 100)),
+ "{\"level\":100,\"type\":\"vip\"}",
+ Row("vip", 100),
+ "premium",
+ "{\"city\":\"city100\",\"street\":\"street100\"}"
+ )
+ )
+ )
+ }
+ }
}