This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch release-1.1 in repository https://gitbox.apache.org/repos/asf/paimon.git
commit 4e42c061cb53a6c75d6e6d0b2bdf9ed7d6082b93 Author: Jingsong Lee <[email protected]> AuthorDate: Mon Apr 28 19:01:51 2025 +0800 [orc] Default value of 'orc.timestamp-ltz.legacy.type' should be true (#5547) --- docs/content/migration/upgrade-compatibility.md | 35 ---------------------- .../shortcodes/generated/orc_configuration.html | 2 +- .../java/org/apache/paimon/format/OrcOptions.java | 2 +- .../format/orc/reader/AbstractOrcColumnVector.java | 18 ++++++----- .../format/orc/reader/OrcArrayColumnVector.java | 9 ++++-- .../format/orc/reader/OrcMapColumnVector.java | 12 ++++++-- .../format/orc/reader/OrcRowColumnVector.java | 11 +++++-- .../format/orc/writer/RowDataVectorizer.java | 4 --- .../paimon/format/orc/OrcReaderFactoryTest.java | 9 +----- .../paimon/format/orc/OrcWriterFactoryTest.java | 3 +- 10 files changed, 40 insertions(+), 65 deletions(-) diff --git a/docs/content/migration/upgrade-compatibility.md b/docs/content/migration/upgrade-compatibility.md deleted file mode 100644 index 4900415e39..0000000000 --- a/docs/content/migration/upgrade-compatibility.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Upgrade Compatibility" -weight: 1 -type: docs -aliases: -- /migration/upgrade-compatibility.html ---- -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Upgrade Compatibility - -This page will introduce the compatibility issues when upgrading Paimon to a newer version. - - -| Compatibility Issue | Issue Link | Introduced Version | Affected Version | Affected Scope | Need Manual Fix | Fix Procedure | -|-----------------------------------------------------------------------|----------------------------------------------|:------------------:|:----------------:|--------------------------------------------------------------|-----------------|------------------------------------------------------------------------------------------------------------| -| Incompatible CommitMessage Serializer/Deserializer | https://github.com/apache/paimon/issues/3367 | < 0.8 | \>= 0.8.1 | Flink Engine, streaming mode | No | Paimon will automatically fallback to legacy serializer to resolve this issue. | -| Fix the timezone conversion for timestamp_ltz data_type in Orc Format | https://github.com/apache/paimon/issues/5066 | < 1.1 | \>= 1.1 | Orc Format table including fields of timestamp_ltz data_type | Yes | When reading legacy orc format table, user should manually enable `orc.timestamp-ltz.legacy.type` as true. | diff --git a/docs/layouts/shortcodes/generated/orc_configuration.html b/docs/layouts/shortcodes/generated/orc_configuration.html index fab33e5963..e48edb4dd0 100644 --- a/docs/layouts/shortcodes/generated/orc_configuration.html +++ b/docs/layouts/shortcodes/generated/orc_configuration.html @@ -40,7 +40,7 @@ under the License. </tr> <tr> <td><h5>orc.timestamp-ltz.legacy.type</h5></td> - <td style="word-wrap: break-word;">false</td> + <td style="word-wrap: break-word;">true</td> <td>Boolean</td> <td>This option is used to be compatible with the paimon-orc‘s old behavior for the `timestamp_ltz` data type.</td> </tr> diff --git a/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java b/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java index e102543d6b..3cd95aea01 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/OrcOptions.java @@ -45,7 +45,7 @@ public class OrcOptions { public static final ConfigOption<Boolean> ORC_TIMESTAMP_LTZ_LEGACY_TYPE = key("orc.timestamp-ltz.legacy.type") .booleanType() - .defaultValue(false) + .defaultValue(true) .withDescription( "This option is used to be compatible with the paimon-orc‘s old behavior for the `timestamp_ltz` data type."); } diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java index 93ae8a2aea..a77f4ebe3d 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java @@ -60,11 +60,6 @@ public abstract class AbstractOrcColumnVector return !vector.noNulls && vector.isNull[rowMapper(i)]; } - public static org.apache.paimon.data.columnar.ColumnVector createPaimonVector( - ColumnVector vector, VectorizedRowBatch orcBatch, DataType dataType) { - return createPaimonVector(vector, orcBatch, dataType, false); - } - public static org.apache.paimon.data.columnar.ColumnVector createPaimonVector( ColumnVector vector, VectorizedRowBatch orcBatch, @@ -86,12 +81,19 @@ public abstract class AbstractOrcColumnVector return new OrcTimestampColumnVector(vector, orcBatch, dataType, legacyTimestampLtzType); } else if (vector instanceof ListColumnVector) { return new OrcArrayColumnVector( - (ListColumnVector) vector, orcBatch, (ArrayType) dataType); + (ListColumnVector) vector, + orcBatch, + (ArrayType) dataType, + legacyTimestampLtzType); } else if (vector instanceof StructColumnVector) { return new OrcRowColumnVector( - (StructColumnVector) vector, orcBatch, (RowType) dataType); + (StructColumnVector) vector, + orcBatch, + (RowType) dataType, + legacyTimestampLtzType); } else if (vector instanceof MapColumnVector) { - return new OrcMapColumnVector((MapColumnVector) vector, orcBatch, (MapType) dataType); + return new OrcMapColumnVector( + (MapColumnVector) vector, orcBatch, (MapType) dataType, legacyTimestampLtzType); } else { throw new UnsupportedOperationException( "Unsupported vector: " + vector.getClass().getName()); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java index 25a1935f3e..6d9244fc75 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java @@ -34,10 +34,15 @@ public class OrcArrayColumnVector extends AbstractOrcColumnVector private final ColumnVector paimonVector; public OrcArrayColumnVector( - ListColumnVector hiveVector, VectorizedRowBatch orcBatch, ArrayType type) { + ListColumnVector hiveVector, + VectorizedRowBatch orcBatch, + ArrayType type, + boolean legacyTimestampLtzType) { super(hiveVector, orcBatch); this.hiveVector = hiveVector; - this.paimonVector = createPaimonVector(hiveVector.child, orcBatch, type.getElementType()); + this.paimonVector = + createPaimonVector( + hiveVector.child, orcBatch, type.getElementType(), legacyTimestampLtzType); } @Override diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java index 79d5ef8888..d6a3a5a142 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java @@ -35,12 +35,18 @@ public class OrcMapColumnVector extends AbstractOrcColumnVector private final ColumnVector valuePaimonVector; public OrcMapColumnVector( - MapColumnVector hiveVector, VectorizedRowBatch orcBatch, MapType type) { + MapColumnVector hiveVector, + VectorizedRowBatch orcBatch, + MapType type, + boolean legacyTimestampLtzType) { super(hiveVector, orcBatch); this.hiveVector = hiveVector; - this.keyPaimonVector = createPaimonVector(hiveVector.keys, orcBatch, type.getKeyType()); + this.keyPaimonVector = + createPaimonVector( + hiveVector.keys, orcBatch, type.getKeyType(), legacyTimestampLtzType); this.valuePaimonVector = - createPaimonVector(hiveVector.values, orcBatch, type.getValueType()); + createPaimonVector( + hiveVector.values, orcBatch, type.getValueType(), legacyTimestampLtzType); } @Override diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java index 6c73c9fdbe..2468f4794c 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java @@ -33,13 +33,20 @@ public class OrcRowColumnVector extends AbstractOrcColumnVector private final VectorizedColumnBatch batch; public OrcRowColumnVector( - StructColumnVector hiveVector, VectorizedRowBatch orcBatch, RowType type) { + StructColumnVector hiveVector, + VectorizedRowBatch orcBatch, + RowType type, + boolean legacyTimestampLtzType) { super(hiveVector, orcBatch); int len = hiveVector.fields.length; ColumnVector[] paimonVectors = new ColumnVector[len]; for (int i = 0; i < len; i++) { paimonVectors[i] = - createPaimonVector(hiveVector.fields[i], orcBatch, type.getTypeAt(i)); + createPaimonVector( + hiveVector.fields[i], + orcBatch, + type.getTypeAt(i), + legacyTimestampLtzType); } this.batch = new VectorizedColumnBatch(paimonVectors); } diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java index 47c448c17f..51c7acf170 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/RowDataVectorizer.java @@ -34,10 +34,6 @@ public class RowDataVectorizer extends Vectorizer<InternalRow> { private final List<FieldWriter> fieldWriters; - public RowDataVectorizer(TypeDescription schema, DataType[] fieldTypes) { - this(schema, fieldTypes, false); - } - public RowDataVectorizer( TypeDescription schema, DataType[] fieldTypes, boolean legacyTimestampLtzType) { super(schema); diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java index 87f7c8839a..237066943b 100644 --- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java +++ b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcReaderFactoryTest.java @@ -279,14 +279,7 @@ class OrcReaderFactoryTest { conjunctPredicates, BATCH_SIZE, false, - false); - } - - private RecordReader<InternalRow> createReader(OrcReaderFactory format, Path split) - throws IOException { - LocalFileIO fileIO = new LocalFileIO(); - return format.createReader( - new FormatReaderContext(fileIO, split, fileIO.getFileSize(split))); + true); } private void forEach(OrcReaderFactory format, Path file, Consumer<InternalRow> action) diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java index 52df5afb4e..6487436431 100644 --- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java +++ b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcWriterFactoryTest.java @@ -49,7 +49,8 @@ class OrcWriterFactoryTest { new TestOrcWriterFactory( new RowDataVectorizer( TypeDescription.fromString("struct<_col0:string,_col1:int>"), - new DataType[] {DataTypes.STRING(), DataTypes.INT()}), + new DataType[] {DataTypes.STRING(), DataTypes.INT()}, + true), memoryManager); factory.create(new LocalPositionOutputStream(tmpDir.resolve("file1").toFile()), "LZ4"); factory.create(new LocalPositionOutputStream(tmpDir.resolve("file2").toFile()), "LZ4");
