This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 2d0cf51eca [core] RowId field by default is not null
2d0cf51eca is described below
commit 2d0cf51eca77d6b302791cd1ad92fc5ab3bb9fc6
Author: JingsongLi <[email protected]>
AuthorDate: Mon Dec 29 20:50:36 2025 +0800
[core] RowId field by default is not null
---
.../java/org/apache/paimon/table/SpecialFields.java | 19 ++++++-------------
.../paimon/operation/DataEvolutionSplitRead.java | 5 ++++-
.../org/apache/paimon/operation/RawFileSplitRead.java | 4 +++-
.../paimon/spark/commands/PaimonSparkWriter.scala | 5 +++++
4 files changed, 18 insertions(+), 15 deletions(-)
diff --git
a/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java
b/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java
index b994706a3b..687001ec39 100644
--- a/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java
+++ b/paimon-api/src/main/java/org/apache/paimon/table/SpecialFields.java
@@ -91,7 +91,7 @@ public class SpecialFields {
Integer.MAX_VALUE - 4, "rowkind", new
VarCharType(VarCharType.MAX_LENGTH));
public static final DataField ROW_ID =
- new DataField(Integer.MAX_VALUE - 5, "_ROW_ID",
DataTypes.BIGINT());
+ new DataField(Integer.MAX_VALUE - 5, "_ROW_ID",
DataTypes.BIGINT().notNull());
public static final Set<String> SYSTEM_FIELD_NAMES =
Stream.of(
@@ -140,18 +140,14 @@ public class SpecialFields {
}
public static RowType rowTypeWithRowTracking(RowType rowType) {
- return rowTypeWithRowTracking(rowType, false);
- }
-
- public static RowType rowTypeWithRowTracking(RowType rowType, boolean
sequenceNumberNullable) {
- return rowTypeWithRowTracking(rowType, true, sequenceNumberNullable);
+ return rowTypeWithRowTracking(rowType, false, false);
}
/**
* Add row tracking fields to rowType.
*
- * @param sequenceNumberNullable sequence number is not null for user, but
is nullable when read
- * and write
+ * @param rowIdNullable id is not null for user, but is nullable when read
(not null for write)
+ * @param sequenceNumberNullable sn is not null for user, but is nullable
when read and write
*/
public static RowType rowTypeWithRowTracking(
RowType rowType, boolean rowIdNullable, boolean
sequenceNumberNullable) {
@@ -166,12 +162,9 @@ public class SpecialFields {
+ "' conflicts with existing field
names.");
}
});
+ fieldsWithRowTracking.add(rowIdNullable ? ROW_ID.copy(true) : ROW_ID);
fieldsWithRowTracking.add(
- rowIdNullable ? SpecialFields.ROW_ID :
SpecialFields.ROW_ID.copy(false));
- fieldsWithRowTracking.add(
- sequenceNumberNullable
- ? SpecialFields.SEQUENCE_NUMBER.copy(true)
- : SpecialFields.SEQUENCE_NUMBER);
+ sequenceNumberNullable ? SEQUENCE_NUMBER.copy(true) :
SEQUENCE_NUMBER);
return new RowType(fieldsWithRowTracking);
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
index fa11671dc6..fa04dbad59 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
@@ -165,7 +165,10 @@ public class DataEvolutionSplitRead implements
SplitRead<InternalRow> {
new Builder(
formatDiscover,
readRowType.getFields(),
- schema ->
rowTypeWithRowTracking(schema.logicalRowType(), true).getFields(),
+ // file has no row id and sequence number, they are in
manifest entry
+ schema ->
+
rowTypeWithRowTracking(schema.logicalRowType(), true, true)
+ .getFields(),
null,
null,
null,
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java
b/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java
index 05ee3ba499..43ac7c152e 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java
@@ -183,7 +183,9 @@ public class RawFileSplitRead implements
SplitRead<InternalRow> {
readRowType.getFields(),
schema -> {
if (rowTrackingEnabled) {
- return
rowTypeWithRowTracking(schema.logicalRowType(), true)
+ // maybe file has no row id and sequence
number, but in manifest
+ // entry
+ return
rowTypeWithRowTracking(schema.logicalRowType(), true, true)
.getFields();
}
return schema.fields();
diff --git
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
index 907fe62348..721c09e2d5 100644
---
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
+++
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonSparkWriter.scala
@@ -70,6 +70,11 @@ case class PaimonSparkWriter(
private val writeType = {
if (writeRowTracking) {
+ // The historical data and new data are processed separately.
+ // 1. The historical data contains the non-null RowId, but its
sequenceNumber may not have been generated yet,
+ // will be generated according to the Snapshot id when committing.
(But the previously updated data included
+ // the sequenceNumber value).
+ // 2. The new data will be written to the branch without
writeRowTracking.
SpecialFields.rowTypeWithRowTracking(table.rowType(), false, true)
} else {
table.rowType()