This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new e7d3da5a0 [hive] hivesql query paimon external table can work after
schema evolution. (#2980)
e7d3da5a0 is described below
commit e7d3da5a08d9c4a6cbfe58c344a0cea30a905597
Author: Kerwin <[email protected]>
AuthorDate: Sat Mar 9 10:40:57 2024 +0800
[hive] hivesql query paimon external table can work after schema evolution.
(#2980)
---
.../hive/SearchArgumentToPredicateConverter.java | 8 +--
.../org/apache/paimon/hive/HiveReadITCase.java | 76 +++++++++++++++++++++-
2 files changed, 77 insertions(+), 7 deletions(-)
diff --git
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/SearchArgumentToPredicateConverter.java
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/SearchArgumentToPredicateConverter.java
index 2c95bb0c2..0963a6493 100644
---
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/SearchArgumentToPredicateConverter.java
+++
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/SearchArgumentToPredicateConverter.java
@@ -49,7 +49,7 @@ public class SearchArgumentToPredicateConverter {
private final ExpressionTree root;
private final List<PredicateLeaf> leaves;
- private final List<String> columnNames;
+ private final List<String> hiveColumnNames;
private final List<DataType> columnTypes;
@Nullable private final Set<String> readColumnNames;
private final PredicateBuilder builder;
@@ -61,7 +61,7 @@ public class SearchArgumentToPredicateConverter {
@Nullable Set<String> readColumnNames) {
this.root = searchArgument.getExpression();
this.leaves = searchArgument.getLeaves();
- this.columnNames =
+ this.hiveColumnNames =
columnNames.stream().map(String::toLowerCase).collect(Collectors.toList());
this.columnTypes = columnTypes;
if (readColumnNames != null) {
@@ -74,7 +74,7 @@ public class SearchArgumentToPredicateConverter {
new PredicateBuilder(
RowType.of(
this.columnTypes.toArray(new DataType[0]),
- this.columnNames.toArray(new String[0])));
+ columnNames.toArray(new String[0])));
}
public Optional<Predicate> convert() {
@@ -140,7 +140,7 @@ public class SearchArgumentToPredicateConverter {
+ " is a partition column.");
}
- int idx = columnNames.indexOf(columnName);
+ int idx = hiveColumnNames.indexOf(columnName);
Preconditions.checkArgument(idx >= 0, "Column " + columnName + " not
found.");
DataType columnType = columnTypes.get(idx);
switch (leaf.getOperator()) {
diff --git
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveReadITCase.java
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveReadITCase.java
index d6c41cd5a..87cbdf485 100644
---
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveReadITCase.java
+++
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveReadITCase.java
@@ -18,16 +18,28 @@
package org.apache.paimon.hive;
+import org.apache.paimon.CoreOptions;
import org.apache.paimon.catalog.AbstractCatalog;
import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.hive.mapred.PaimonInputFormat;
import org.apache.paimon.hive.mapred.PaimonRecordReader;
+import org.apache.paimon.options.CatalogOptions;
+import org.apache.paimon.options.Options;
import org.apache.paimon.schema.Schema;
+import org.apache.paimon.schema.SchemaChange;
import org.apache.paimon.schema.SchemaManager;
+import org.apache.paimon.table.Table;
+import org.apache.paimon.table.sink.StreamTableCommit;
+import org.apache.paimon.table.sink.StreamTableWrite;
+import org.apache.paimon.table.sink.StreamWriteBuilder;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.types.RowType;
import org.apache.paimon.shade.guava30.com.google.common.collect.Lists;
import org.apache.paimon.shade.guava30.com.google.common.collect.Maps;
@@ -45,7 +57,7 @@ import static org.assertj.core.api.Assertions.assertThatCode;
public class HiveReadITCase extends HiveTestBase {
@Test
- public void testReExternalTableWithIgnoreCase() throws Exception {
+ public void testReadExternalTableWithEmptyDataAndIgnoreCase() throws
Exception {
// Create hive external table with paimon table
String tableName = "with_ignore_case";
@@ -72,7 +84,6 @@ public class HiveReadITCase extends HiveTestBase {
"STORED BY '" +
PaimonStorageHandler.class.getName() + "'",
"LOCATION '" + tablePath.toUri().toString() +
"'"));
assertThatCode(() ->
hiveShell.execute(hiveSql)).doesNotThrowAnyException();
-
List<String> result = hiveShell.executeQuery("SHOW CREATE TABLE " +
tableName);
assertThat(result)
.containsAnyOf(
@@ -91,7 +102,6 @@ public class HiveReadITCase extends HiveTestBase {
assertThat(result).containsExactly("Hello", "Paimon");
result = hiveShell.executeQuery("SELECT Col2 FROM " + tableName);
assertThat(result).containsExactly("Hello", "Paimon");
-
result = hiveShell.executeQuery("SELECT * FROM " + tableName + " WHERE
col2 = 'Hello'");
assertThat(result).containsExactly("1\tHello");
result =
@@ -99,4 +109,64 @@ public class HiveReadITCase extends HiveTestBase {
"SELECT * FROM " + tableName + " WHERE Col2 in
('Hello', 'Paimon')");
assertThat(result).containsExactly("1\tHello", "2\tPaimon");
}
+
+ @Test
+ public void testReadExternalTableWithDataAndIgnoreCase() throws Exception {
+ // Create hive external table with paimon table
+ String tableName = "with_data_and_ignore_case";
+
+ // Create a paimon table
+ Identifier identifier = Identifier.create(DATABASE_TEST, tableName);
+
+ Options conf = new Options();
+ conf.set(CatalogOptions.WAREHOUSE, path);
+ conf.set(CoreOptions.BUCKET, 2);
+ conf.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.AVRO);
+ RowType.Builder rowType = RowType.builder();
+ rowType.field("col1", DataTypes.INT());
+ rowType.field("Col2", DataTypes.STRING());
+
+ Table table =
+ FileStoreTestUtils.createFileStoreTable(
+ conf,
+ rowType.build(),
+ Collections.emptyList(),
+ Collections.emptyList(),
+ identifier);
+
+ // insert data into paimon table, make sure has some data file use
older schema file.
+ List<InternalRow> data =
+ Arrays.asList(
+ GenericRow.of(1, BinaryString.fromString("Hello")),
+ GenericRow.of(2, BinaryString.fromString("Paimon")));
+
+ StreamWriteBuilder streamWriteBuilder = table.newStreamWriteBuilder();
+ StreamTableWrite write = streamWriteBuilder.newWrite();
+ StreamTableCommit commit = streamWriteBuilder.newCommit();
+ for (InternalRow rowData : data) {
+ write.write(rowData);
+ }
+ commit.commit(0, write.prepareCommit(true, 0));
+ write.close();
+ commit.close();
+
+ // add column, do some ddl which will generate a new version schema-n
file.
+ Path tablePath = AbstractCatalog.newTableLocation(path, identifier);
+ SchemaManager schemaManager = new SchemaManager(LocalFileIO.create(),
tablePath);
+ schemaManager.commitChanges(SchemaChange.addColumn("N1",
DataTypes.STRING()));
+
+ // Create hive external table
+ String hiveSql =
+ String.join(
+ "\n",
+ Arrays.asList(
+ "CREATE EXTERNAL TABLE " + tableName + " ",
+ "STORED BY '" +
PaimonStorageHandler.class.getName() + "'",
+ "LOCATION '" + tablePath.toUri().toString() +
"'"));
+ assertThatCode(() ->
hiveShell.execute(hiveSql)).doesNotThrowAnyException();
+
+ List<String> result =
+ hiveShell.executeQuery("SELECT * FROM " + tableName + " WHERE
col2 is not null");
+ assertThat(result).containsExactly("1\tHello\tNULL",
"2\tPaimon\tNULL");
+ }
}