This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new fc7a8fdddd [orc] Default value of 'orc.timestamp-ltz.legacy.type'
should be true (#5547)
fc7a8fdddd is described below
commit fc7a8fddddad065cd78bbc112d46e3ef0cbc31e9
Author: Jingsong Lee <[email protected]>
AuthorDate: Mon Apr 28 19:01:51 2025 +0800
[orc] Default value of 'orc.timestamp-ltz.legacy.type' should be true
(#5547)
---
docs/content/migration/upgrade-compatibility.md | 35 ----------------------
.../shortcodes/generated/orc_configuration.html | 2 +-
.../java/org/apache/paimon/format/OrcOptions.java | 2 +-
.../format/orc/reader/AbstractOrcColumnVector.java | 18 ++++++-----
.../format/orc/reader/OrcArrayColumnVector.java | 9 ++++--
.../format/orc/reader/OrcMapColumnVector.java | 12 ++++++--
.../format/orc/reader/OrcRowColumnVector.java | 11 +++++--
.../format/orc/writer/RowDataVectorizer.java | 4 ---
.../paimon/format/orc/OrcReaderFactoryTest.java | 9 +-----
.../paimon/format/orc/OrcWriterFactoryTest.java | 3 +-
10 files changed, 40 insertions(+), 65 deletions(-)
diff --git a/docs/content/migration/upgrade-compatibility.md
b/docs/content/migration/upgrade-compatibility.md
deleted file mode 100644
index 4900415e39..0000000000
--- a/docs/content/migration/upgrade-compatibility.md
+++ /dev/null
@@ -1,35 +0,0 @@
----
-title: "Upgrade Compatibility"
-weight: 1
-type: docs
-aliases:
-- /migration/upgrade-compatibility.html
----
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied. See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-# Upgrade Compatibility
-
-This page will introduce the compatibility issues when upgrading Paimon to a
newer version.
-
-
-| Compatibility Issue |
Issue Link | Introduced Version | Affected
Version | Affected Scope | Need
Manual Fix | Fix Procedure
|
-|-----------------------------------------------------------------------|----------------------------------------------|:------------------:|:----------------:|--------------------------------------------------------------|-----------------|------------------------------------------------------------------------------------------------------------|
-| Incompatible CommitMessage Serializer/Deserializer |
https://github.com/apache/paimon/issues/3367 | < 0.8 | \>=
0.8.1 | Flink Engine, streaming mode | No
| Paimon will automatically fallback to legacy serializer to resolve
this issue. |
-| Fix the timezone conversion for timestamp_ltz data_type in Orc Format |
https://github.com/apache/paimon/issues/5066 | < 1.1 | \>= 1.1
| Orc Format table including fields of timestamp_ltz data_type | Yes
| When reading legacy orc format table, user should manually enable
`orc.timestamp-ltz.legacy.type` as true. |
diff --git a/docs/layouts/shortcodes/generated/orc_configuration.html
b/docs/layouts/shortcodes/generated/orc_configuration.html
index fab33e5963..e48edb4dd0 100644
--- a/docs/layouts/shortcodes/generated/orc_configuration.html
+++ b/docs/layouts/shortcodes/generated/orc_configuration.html
@@ -40,7 +40,7 @@ under the License.
</tr>
<tr>
<td><h5>orc.timestamp-ltz.legacy.type</h5></td>
- <td style="word-wrap: break-word;">false</td>
+ <td style="word-wrap: break-word;">true</td>
<td>Boolean</td>
<td>This option is used to be compatible with the paimon-orc‘s old
behavior for the `timestamp_ltz` data type.</td>
</tr>
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java
b/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java
index e102543d6b..3cd95aea01 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java
@@ -45,7 +45,7 @@ public class OrcOptions {
public static final ConfigOption<Boolean> ORC_TIMESTAMP_LTZ_LEGACY_TYPE =
key("orc.timestamp-ltz.legacy.type")
.booleanType()
- .defaultValue(false)
+ .defaultValue(true)
.withDescription(
"This option is used to be compatible with the
paimon-orc‘s old behavior for the `timestamp_ltz` data type.");
}
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
index 93ae8a2aea..a77f4ebe3d 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
@@ -60,11 +60,6 @@ public abstract class AbstractOrcColumnVector
return !vector.noNulls && vector.isNull[rowMapper(i)];
}
- public static org.apache.paimon.data.columnar.ColumnVector
createPaimonVector(
- ColumnVector vector, VectorizedRowBatch orcBatch, DataType
dataType) {
- return createPaimonVector(vector, orcBatch, dataType, false);
- }
-
public static org.apache.paimon.data.columnar.ColumnVector
createPaimonVector(
ColumnVector vector,
VectorizedRowBatch orcBatch,
@@ -86,12 +81,19 @@ public abstract class AbstractOrcColumnVector
return new OrcTimestampColumnVector(vector, orcBatch, dataType,
legacyTimestampLtzType);
} else if (vector instanceof ListColumnVector) {
return new OrcArrayColumnVector(
- (ListColumnVector) vector, orcBatch, (ArrayType) dataType);
+ (ListColumnVector) vector,
+ orcBatch,
+ (ArrayType) dataType,
+ legacyTimestampLtzType);
} else if (vector instanceof StructColumnVector) {
return new OrcRowColumnVector(
- (StructColumnVector) vector, orcBatch, (RowType) dataType);
+ (StructColumnVector) vector,
+ orcBatch,
+ (RowType) dataType,
+ legacyTimestampLtzType);
} else if (vector instanceof MapColumnVector) {
- return new OrcMapColumnVector((MapColumnVector) vector, orcBatch,
(MapType) dataType);
+ return new OrcMapColumnVector(
+ (MapColumnVector) vector, orcBatch, (MapType) dataType,
legacyTimestampLtzType);
} else {
throw new UnsupportedOperationException(
"Unsupported vector: " + vector.getClass().getName());
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
index 25a1935f3e..6d9244fc75 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
@@ -34,10 +34,15 @@ public class OrcArrayColumnVector extends
AbstractOrcColumnVector
private final ColumnVector paimonVector;
public OrcArrayColumnVector(
- ListColumnVector hiveVector, VectorizedRowBatch orcBatch,
ArrayType type) {
+ ListColumnVector hiveVector,
+ VectorizedRowBatch orcBatch,
+ ArrayType type,
+ boolean legacyTimestampLtzType) {
super(hiveVector, orcBatch);
this.hiveVector = hiveVector;
- this.paimonVector = createPaimonVector(hiveVector.child, orcBatch,
type.getElementType());
+ this.paimonVector =
+ createPaimonVector(
+ hiveVector.child, orcBatch, type.getElementType(),
legacyTimestampLtzType);
}
@Override
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
index 79d5ef8888..d6a3a5a142 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
@@ -35,12 +35,18 @@ public class OrcMapColumnVector extends
AbstractOrcColumnVector
private final ColumnVector valuePaimonVector;
public OrcMapColumnVector(
- MapColumnVector hiveVector, VectorizedRowBatch orcBatch, MapType
type) {
+ MapColumnVector hiveVector,
+ VectorizedRowBatch orcBatch,
+ MapType type,
+ boolean legacyTimestampLtzType) {
super(hiveVector, orcBatch);
this.hiveVector = hiveVector;
- this.keyPaimonVector = createPaimonVector(hiveVector.keys, orcBatch,
type.getKeyType());
+ this.keyPaimonVector =
+ createPaimonVector(
+ hiveVector.keys, orcBatch, type.getKeyType(),
legacyTimestampLtzType);
this.valuePaimonVector =
- createPaimonVector(hiveVector.values, orcBatch,
type.getValueType());
+ createPaimonVector(
+ hiveVector.values, orcBatch, type.getValueType(),
legacyTimestampLtzType);
}
@Override
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
index 6c73c9fdbe..2468f4794c 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
@@ -33,13 +33,20 @@ public class OrcRowColumnVector extends
AbstractOrcColumnVector
private final VectorizedColumnBatch batch;
public OrcRowColumnVector(
- StructColumnVector hiveVector, VectorizedRowBatch orcBatch,
RowType type) {
+ StructColumnVector hiveVector,
+ VectorizedRowBatch orcBatch,
+ RowType type,
+ boolean legacyTimestampLtzType) {
super(hiveVector, orcBatch);
int len = hiveVector.fields.length;
ColumnVector[] paimonVectors = new ColumnVector[len];
for (int i = 0; i < len; i++) {
paimonVectors[i] =
- createPaimonVector(hiveVector.fields[i], orcBatch,
type.getTypeAt(i));
+ createPaimonVector(
+ hiveVector.fields[i],
+ orcBatch,
+ type.getTypeAt(i),
+ legacyTimestampLtzType);
}
this.batch = new VectorizedColumnBatch(paimonVectors);
}
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java
index 47c448c17f..51c7acf170 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java
@@ -34,10 +34,6 @@ public class RowDataVectorizer extends
Vectorizer<InternalRow> {
private final List<FieldWriter> fieldWriters;
- public RowDataVectorizer(TypeDescription schema, DataType[] fieldTypes) {
- this(schema, fieldTypes, false);
- }
-
public RowDataVectorizer(
TypeDescription schema, DataType[] fieldTypes, boolean
legacyTimestampLtzType) {
super(schema);
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
index 87f7c8839a..237066943b 100644
---
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
+++
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java
@@ -279,14 +279,7 @@ class OrcReaderFactoryTest {
conjunctPredicates,
BATCH_SIZE,
false,
- false);
- }
-
- private RecordReader<InternalRow> createReader(OrcReaderFactory format,
Path split)
- throws IOException {
- LocalFileIO fileIO = new LocalFileIO();
- return format.createReader(
- new FormatReaderContext(fileIO, split,
fileIO.getFileSize(split)));
+ true);
}
private void forEach(OrcReaderFactory format, Path file,
Consumer<InternalRow> action)
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java
index 52df5afb4e..6487436431 100644
---
a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java
+++
b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java
@@ -49,7 +49,8 @@ class OrcWriterFactoryTest {
new TestOrcWriterFactory(
new RowDataVectorizer(
TypeDescription.fromString("struct<_col0:string,_col1:int>"),
- new DataType[] {DataTypes.STRING(),
DataTypes.INT()}),
+ new DataType[] {DataTypes.STRING(),
DataTypes.INT()},
+ true),
memoryManager);
factory.create(new
LocalPositionOutputStream(tmpDir.resolve("file1").toFile()), "LZ4");
factory.create(new
LocalPositionOutputStream(tmpDir.resolve("file2").toFile()), "LZ4");