(paimon) branch master updated: [core] RowId field by default is not null

lzljs3620320 Mon, 29 Dec 2025 05:33:18 -0800

This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new 2d0cf51eca [core] RowId field by default is not null
2d0cf51eca is described below

commit 2d0cf51eca77d6b302791cd1ad92fc5ab3bb9fc6
Author: JingsongLi <[email protected]>
AuthorDate: Mon Dec 29 20:50:36 2025 +0800

    [core] RowId field by default is not null
---
 .../java/org/apache/paimon/table/SpecialFields.java   | 19 ++++++-------------
 .../paimon/operation/DataEvolutionSplitRead.java      |  5 ++++-
 .../org/apache/paimon/operation/RawFileSplitRead.java |  4 +++-
 .../paimon/spark/commands/PaimonSparkWriter.scala     |  5 +++++
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git 
a/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java 
b/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java
index b994706a3b..687001ec39 100644
--- a/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java
+++ b/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java
@@ -91,7 +91,7 @@ public class SpecialFields {
                     Integer.MAX_VALUE - 4, "rowkind", new 
VarCharType(VarCharType.MAX_LENGTH));
 
     public static final DataField ROW_ID =
-            new DataField(Integer.MAX_VALUE - 5, "_ROW_ID", 
DataTypes.BIGINT());
+            new DataField(Integer.MAX_VALUE - 5, "_ROW_ID", 
DataTypes.BIGINT().notNull());
 
     public static final Set<String> SYSTEM_FIELD_NAMES =
             Stream.of(
@@ -140,18 +140,14 @@ public class SpecialFields {
     }
 
     public static RowType rowTypeWithRowTracking(RowType rowType) {
-        return rowTypeWithRowTracking(rowType, false);
-    }
-
-    public static RowType rowTypeWithRowTracking(RowType rowType, boolean 
sequenceNumberNullable) {
-        return rowTypeWithRowTracking(rowType, true, sequenceNumberNullable);
+        return rowTypeWithRowTracking(rowType, false, false);
     }
 
     /**
      * Add row tracking fields to rowType.
      *
-     * @param sequenceNumberNullable sequence number is not null for user, but 
is nullable when read
-     *     and write
+     * @param rowIdNullable id is not null for user, but is nullable when read 
(not null for write)
+     * @param sequenceNumberNullable sn is not null for user, but is nullable 
when read and write
      */
     public static RowType rowTypeWithRowTracking(
             RowType rowType, boolean rowIdNullable, boolean 
sequenceNumberNullable) {
@@ -166,12 +162,9 @@ public class SpecialFields {
                                         + "' conflicts with existing field 
names.");
                     }
                 });
+        fieldsWithRowTracking.add(rowIdNullable ? ROW_ID.copy(true) : ROW_ID);
         fieldsWithRowTracking.add(
-                rowIdNullable ? SpecialFields.ROW_ID : 
SpecialFields.ROW_ID.copy(false));
-        fieldsWithRowTracking.add(
-                sequenceNumberNullable
-                        ? SpecialFields.SEQUENCE_NUMBER.copy(true)
-                        : SpecialFields.SEQUENCE_NUMBER);
+                sequenceNumberNullable ? SEQUENCE_NUMBER.copy(true) : 
SEQUENCE_NUMBER);
         return new RowType(fieldsWithRowTracking);
     }
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
 
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
index fa11671dc6..fa04dbad59 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
@@ -165,7 +165,10 @@ public class DataEvolutionSplitRead implements 
SplitRead<InternalRow> {
                 new Builder(
                         formatDiscover,
                         readRowType.getFields(),
-                        schema -> 
rowTypeWithRowTracking(schema.logicalRowType(), true).getFields(),
+                        // file has no row id and sequence number, they are in 
manifest entry
+                        schema ->
+                                
rowTypeWithRowTracking(schema.logicalRowType(), true, true)
+                                        .getFields(),
                         null,
                         null,
                         null,
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java 
b/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java
index 05ee3ba499..43ac7c152e 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java
@@ -183,7 +183,9 @@ public class RawFileSplitRead implements 
SplitRead<InternalRow> {
                         readRowType.getFields(),
                         schema -> {
                             if (rowTrackingEnabled) {
-                                return 
rowTypeWithRowTracking(schema.logicalRowType(), true)
+                                // maybe file has no row id and sequence 
number, but in manifest
+                                // entry
+                                return 
rowTypeWithRowTracking(schema.logicalRowType(), true, true)
                                         .getFields();
                             }
                             return schema.fields();
diff --git 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
index 907fe62348..721c09e2d5 100644
--- 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
+++ 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
@@ -70,6 +70,11 @@ case class PaimonSparkWriter(
 
   private val writeType = {
     if (writeRowTracking) {
+      // The historical data and new data are processed separately.
+      // 1. The historical data contains the non-null RowId, but its 
sequenceNumber may not have been generated yet,
+      //    will be generated according to the Snapshot id when committing. 
(But the previously updated data included
+      //    the sequenceNumber value).
+      // 2. The new data will be written to the branch without 
writeRowTracking.
       SpecialFields.rowTypeWithRowTracking(table.rowType(), false, true)
     } else {
       table.rowType()

(paimon) branch master updated: [core] RowId field by default is not null

Reply via email to