This is an automated email from the ASF dual-hosted git repository.
mchades pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 6b3312a01 [#4193] Improvement(catalogs-lakehouse-paimon): Support
paimon CharType and VarBinaryType (#4194)
6b3312a01 is described below
commit 6b3312a017cbe3282578ad454caf1cb65fa6eb02
Author: cai can <[email protected]>
AuthorDate: Tue Jul 23 16:31:57 2024 +0800
[#4193] Improvement(catalogs-lakehouse-paimon): Support paimon CharType and
VarBinaryType (#4194)
### What changes were proposed in this pull request?
Support paimon CharType and VarBinaryType.
### Why are the changes needed?
Fix: https://github.com/apache/gravitino/issues/4193
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing UTs.
---------
Co-authored-by: caican <[email protected]>
---
.../catalog/lakehouse/paimon/utils/TypeUtils.java | 91 +++++++++++++---------
.../lakehouse/paimon/utils/TestTypeUtils.java | 64 ++++++++-------
2 files changed, 88 insertions(+), 67 deletions(-)
diff --git
a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TypeUtils.java
b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TypeUtils.java
index 41a1f066d..0816cb5fd 100644
---
a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TypeUtils.java
+++
b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TypeUtils.java
@@ -25,6 +25,7 @@ import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.BigIntType;
import org.apache.paimon.types.BinaryType;
import org.apache.paimon.types.BooleanType;
+import org.apache.paimon.types.CharType;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypeDefaultVisitor;
import org.apache.paimon.types.DataTypes;
@@ -41,6 +42,7 @@ import org.apache.paimon.types.SmallIntType;
import org.apache.paimon.types.TimeType;
import org.apache.paimon.types.TimestampType;
import org.apache.paimon.types.TinyIntType;
+import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
// Referred to org/apache/paimon/spark/SparkTypeUtils.java
@@ -73,30 +75,11 @@ public class TypeUtils {
private static final PaimonToGravitinoTypeVisitor INSTANCE = new
PaimonToGravitinoTypeVisitor();
- @Override
- public Type visit(VarCharType varCharType) {
- if (varCharType.getLength() == Integer.MAX_VALUE) {
- return Types.StringType.get();
- } else {
- return Types.VarCharType.of(varCharType.getLength());
- }
- }
-
@Override
public Type visit(BooleanType booleanType) {
return Types.BooleanType.get();
}
- @Override
- public Type visit(BinaryType binaryType) {
- return Types.BinaryType.get();
- }
-
- @Override
- public Type visit(DecimalType decimalType) {
- return Types.DecimalType.of(decimalType.getPrecision(),
decimalType.getScale());
- }
-
@Override
public Type visit(TinyIntType tinyIntType) {
return Types.ByteType.get();
@@ -127,6 +110,11 @@ public class TypeUtils {
return Types.DoubleType.get();
}
+ @Override
+ public Type visit(DecimalType decimalType) {
+ return Types.DecimalType.of(decimalType.getPrecision(),
decimalType.getScale());
+ }
+
@Override
public Type visit(DateType dateType) {
return Types.DateType.get();
@@ -148,21 +136,33 @@ public class TypeUtils {
}
@Override
- public Type visit(ArrayType arrayType) {
- return Types.ListType.of(
- arrayType.getElementType().accept(this),
arrayType.getElementType().isNullable());
+ public Type visit(BinaryType binaryType) {
+ return Types.FixedType.of(binaryType.getLength());
}
@Override
- public Type visit(MultisetType multisetType) {
- // Unlike a Java Set, MultisetType allows for multiple instances for
each of its
- // elements with a common subtype. And a conversion is possible through
a map
- // that assigns each value to an integer to represent the multiplicity
of the values.
- // For example, a `MULTISET<INT>` is converted to a `MAP<Integer,
Integer>`, the key of the
- // map represents the elements of the Multiset and the value represents
the multiplicity of
- // the elements in the Multiset.
- return Types.MapType.of(
- multisetType.getElementType().accept(this), Types.IntegerType.get(),
false);
+ public Type visit(VarBinaryType varBinaryType) {
+ return Types.BinaryType.get();
+ }
+
+ @Override
+ public Type visit(VarCharType varCharType) {
+ if (varCharType.getLength() == Integer.MAX_VALUE) {
+ return Types.StringType.get();
+ } else {
+ return Types.VarCharType.of(varCharType.getLength());
+ }
+ }
+
+ @Override
+ public Type visit(CharType charType) {
+ return Types.FixedCharType.of(charType.getLength());
+ }
+
+ @Override
+ public Type visit(ArrayType arrayType) {
+ return Types.ListType.of(
+ arrayType.getElementType().accept(this),
arrayType.getElementType().isNullable());
}
@Override
@@ -187,6 +187,18 @@ public class TypeUtils {
.toArray(Types.StructType.Field[]::new));
}
+ @Override
+ public Type visit(MultisetType multisetType) {
+ // Unlike a Java Set, MultisetType allows for multiple instances for
each of its
+ // elements with a common subtype. And a conversion is possible through
a map
+ // that assigns each value to an integer to represent the multiplicity
of the values.
+ // For example, a `MULTISET<INT>` is converted to a `MAP<Integer,
Integer>`, the key of the
+ // map represents the elements of the Multiset and the value represents
the multiplicity of
+ // the elements in the Multiset.
+ return Types.MapType.of(
+ multisetType.getElementType().accept(this), Types.IntegerType.get(),
false);
+ }
+
@Override
protected Type defaultMethod(DataType dataType) {
return Types.UnparsedType.of(dataType.asSQLString());
@@ -214,12 +226,6 @@ public class TypeUtils {
case DECIMAL:
Types.DecimalType decimalType = (Types.DecimalType) type;
return DataTypes.DECIMAL(decimalType.precision(),
decimalType.scale());
- case BINARY:
- return DataTypes.BINARY(BinaryType.MAX_LENGTH);
- case STRING:
- return DataTypes.STRING();
- case VARCHAR:
- return DataTypes.VARCHAR(((Types.VarCharType) type).length());
case DATE:
return DataTypes.DATE();
case TIME:
@@ -228,6 +234,17 @@ public class TypeUtils {
return ((Types.TimestampType) type).hasTimeZone()
? DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE()
: DataTypes.TIMESTAMP();
+ case STRING:
+ return DataTypes.STRING();
+ case VARCHAR:
+ return DataTypes.VARCHAR(((Types.VarCharType) type).length());
+ case FIXEDCHAR:
+ return DataTypes.CHAR(((Types.FixedCharType) type).length());
+ case FIXED:
+ Types.FixedType fixedType = (Types.FixedType) type;
+ return DataTypes.BINARY(fixedType.length());
+ case BINARY:
+ return DataTypes.VARBINARY(VarBinaryType.MAX_LENGTH);
case LIST:
Types.ListType listType = (Types.ListType) type;
return DataTypes.ARRAY(visit(listType.elementType()));
diff --git
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestTypeUtils.java
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestTypeUtils.java
index 001c37cd8..d5e959d34 100644
---
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestTypeUtils.java
+++
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestTypeUtils.java
@@ -51,23 +51,25 @@ public class TestTypeUtils {
RowType rowType =
RowType.builder()
.fields(
- DataTypes.VARCHAR(10),
- DataTypes.STRING(),
DataTypes.BOOLEAN(),
- DataTypes.BINARY(BinaryType.MAX_LENGTH),
- DataTypes.DECIMAL(8, 3),
DataTypes.TINYINT(),
DataTypes.SMALLINT(),
DataTypes.INT(),
DataTypes.BIGINT(),
DataTypes.FLOAT(),
DataTypes.DOUBLE(),
+ DataTypes.DECIMAL(8, 3),
DataTypes.DATE(),
DataTypes.TIME(),
DataTypes.TIMESTAMP(),
DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(),
- DataTypes.MAP(DataTypes.INT(), DataTypes.STRING()),
- DataTypes.ARRAY(DataTypes.INT()))
+ DataTypes.STRING(),
+ DataTypes.VARCHAR(10),
+ DataTypes.CHAR(10),
+ DataTypes.BINARY(BinaryType.MAX_LENGTH),
+ DataTypes.VARBINARY(VarBinaryType.MAX_LENGTH),
+ DataTypes.ARRAY(DataTypes.INT()),
+ DataTypes.MAP(DataTypes.INT(), DataTypes.STRING()))
.build();
Type gravitinoDataType = toGravitinoDataType(rowType);
@@ -91,9 +93,7 @@ public class TestTypeUtils {
Arrays.asList(
Types.IntervalYearType.get(),
Types.IntervalDayType.get(),
- Types.FixedCharType.of(10),
Types.UUIDType.get(),
- Types.FixedType.of(20),
Types.UnionType.of(Types.IntegerType.get()),
Types.NullType.get(),
Types.UnparsedType.of("unparsed"))
@@ -102,30 +102,8 @@ public class TestTypeUtils {
private Type toGravitinoDataType(DataType dataType) {
switch (dataType.getTypeRoot()) {
- case VARCHAR:
- if (((VarCharType) dataType).getLength() == Integer.MAX_VALUE) {
- return checkDataType(dataType, Name.STRING);
- } else {
- return checkDataType(
- dataType,
- Name.VARCHAR,
- type ->
- assertEquals(
- ((VarCharType) dataType).getLength(),
((Types.VarCharType) type).length()));
- }
case BOOLEAN:
return checkDataType(dataType, Name.BOOLEAN);
- case BINARY:
- return checkDataType(dataType, Name.BINARY);
- case DECIMAL:
- return checkDataType(
- dataType,
- Name.DECIMAL,
- type -> {
- assertEquals(
- ((DecimalType) dataType).getPrecision(),
((Types.DecimalType) type).precision());
- assertEquals(((DecimalType) dataType).getScale(),
((Types.DecimalType) type).scale());
- });
case TINYINT:
return checkDataType(dataType, Name.BYTE);
case SMALLINT:
@@ -138,6 +116,15 @@ public class TestTypeUtils {
return checkDataType(dataType, Name.FLOAT);
case DOUBLE:
return checkDataType(dataType, Name.DOUBLE);
+ case DECIMAL:
+ return checkDataType(
+ dataType,
+ Name.DECIMAL,
+ type -> {
+ assertEquals(
+ ((DecimalType) dataType).getPrecision(),
((Types.DecimalType) type).precision());
+ assertEquals(((DecimalType) dataType).getScale(),
((Types.DecimalType) type).scale());
+ });
case DATE:
return checkDataType(dataType, Name.DATE);
case TIME_WITHOUT_TIME_ZONE:
@@ -152,6 +139,23 @@ public class TestTypeUtils {
dataType,
Name.TIMESTAMP,
type -> assertTrue(((Types.TimestampType) type).hasTimeZone()));
+ case VARCHAR:
+ if (((VarCharType) dataType).getLength() == Integer.MAX_VALUE) {
+ return checkDataType(dataType, Name.STRING);
+ } else {
+ return checkDataType(
+ dataType,
+ Name.VARCHAR,
+ type ->
+ assertEquals(
+ ((VarCharType) dataType).getLength(),
((Types.VarCharType) type).length()));
+ }
+ case CHAR:
+ return checkDataType(dataType, Name.FIXEDCHAR);
+ case BINARY:
+ return checkDataType(dataType, Name.FIXED);
+ case VARBINARY:
+ return checkDataType(dataType, Name.BINARY);
case ARRAY:
return checkDataType(
dataType,