This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 286b9521f0 [#8962] improvement(lance): supports more dataTypes for the
Lance rest server (#9195)
286b9521f0 is described below
commit 286b9521f0485babb09121eb2bc975dba575f4c6
Author: Junda Yang <[email protected]>
AuthorDate: Thu Nov 20 17:09:24 2025 -0800
[#8962] improvement(lance): supports more dataTypes for the Lance rest
server (#9195)
### What changes were proposed in this pull request?
This PR significantly enhances Lance REST server's data type support by
refactoring and expanding the data type converter:
1. Moved and Enhanced Data Type Converter:
- Relocated LanceDataTypeConverter from catalog-generic-lakehouse to
lance-common module for better reusability
- Expanded support from basic types to comprehensive type coverage
including:
- Complex/Nested Types: LIST, STRUCT, MAP, UNION
- Interval Types: INTERVAL_YEAR, INTERVAL_DAY
- Special Types: NULL, EXTERNAL
- Enhanced Types: Better handling of DECIMAL, TIMESTAMP (with
precision), and FIXED types
2. Refactored Related Components:
- Updated LanceCatalogOperations to use the new centralized converter
with simplified convertColumnsToArrowSchema() method
- Streamlined GravitinoLanceNamespaceWrapper (reduced from 184 to 40
lines)
- Cleaned up unused utilities: removed ArrowRecordBatchList and
TestArrowIPC
3. Added Comprehensive Tests:
- TestLanceDataTypeConverter: 473 lines of tests covering all supported
data types and bidirectional conversions (Gravitino ↔ Arrow)
- TestGravitinoLanceNamespaceWrapper: Tests for namespace operations
### Why are the changes needed?
The original LanceDataTypeConverter only supported basic primitive types
(integer, string, etc.), which severely limited Lance REST server's
ability to handle real-world data schemas. Complex nested types
(structs, maps, lists) and specialized types (intervals, unions) were
unsupported, making the Lance catalog unsuitable for many production use
cases. By centralizing and expanding the converter, Lance can now handle
sophisticated data schemas comparable to Iceberg and other modern
lakehouse formats.
Fix: #8962
### Does this PR introduce _any_ user-facing change?
Yes. Users can now create Lance tables with complex nested data types
(structs, maps, lists, unions) and additional types (intervals, nulls)
that were previously unsupported. Existing basic type support remains
unchanged.
### How was this patch tested?
- TestLanceDataTypeConverter: Comprehensive unit tests for all supported
data types including complex nested structures and edge cases
- TestGravitinoLanceNamespaceWrapper: Tests for namespace-level
operations
- Existing integration tests (CatalogGenericLakehouseLanceIT) continue
to pass with the refactored converter
---------
Co-authored-by: mchades <[email protected]>
---
.../catalog-generic-lakehouse/build.gradle.kts | 1 +
.../lakehouse/lance/LanceCatalogOperations.java | 38 +-
.../lakehouse/lance/LanceDataTypeConverter.java | 192 ---------
lance/lance-common/build.gradle.kts | 1 +
.../lance/common/ops/LanceNamespaceOperations.java | 3 +-
.../common/ops/arrow/ArrowRecordBatchList.java | 40 --
.../gravitino/GravitinoLanceNamespaceWrapper.java | 224 ++--------
.../ops/gravitino/LanceDataTypeConverter.java | 326 ++++++++++++++
.../gravitino/lance/common/TestArrowIPC.java | 83 ----
.../TestGravitinoLanceNamespaceWrapper.java | 76 ++++
.../ops/gravitino/TestLanceDataTypeConverter.java | 473 +++++++++++++++++++++
.../service/rest/LanceNamespaceOperations.java | 2 +-
.../lance/service/rest/LanceTableOperations.java | 13 +-
13 files changed, 933 insertions(+), 539 deletions(-)
diff --git a/catalogs/catalog-generic-lakehouse/build.gradle.kts
b/catalogs/catalog-generic-lakehouse/build.gradle.kts
index df401dcde4..25adc25a77 100644
--- a/catalogs/catalog-generic-lakehouse/build.gradle.kts
+++ b/catalogs/catalog-generic-lakehouse/build.gradle.kts
@@ -35,6 +35,7 @@ dependencies {
implementation(project(":core")) {
exclude("*")
}
+ implementation(project(":lance:lance-common"))
implementation(libs.bundles.log4j)
implementation(libs.cglib)
diff --git
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
index fb2ac2b485..965da97b2a 100644
---
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
+++
b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java
@@ -38,7 +38,6 @@ import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.arrow.memory.RootAllocator;
-import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.gravitino.Catalog;
@@ -55,6 +54,7 @@ import org.apache.gravitino.exceptions.NoSuchEntityException;
import org.apache.gravitino.exceptions.NoSuchSchemaException;
import org.apache.gravitino.exceptions.NoSuchTableException;
import org.apache.gravitino.exceptions.TableAlreadyExistsException;
+import org.apache.gravitino.lance.common.ops.gravitino.LanceDataTypeConverter;
import org.apache.gravitino.meta.AuditInfo;
import org.apache.gravitino.meta.GenericTableEntity;
import org.apache.gravitino.rel.Column;
@@ -129,7 +129,7 @@ public class LanceCatalogOperations implements
LakehouseCatalogOperations {
Dataset.create(
new RootAllocator(),
location,
- convertColumnsToSchema(columns),
+ convertColumnsToArrowSchema(columns),
new
WriteParams.Builder().withStorageOptions(storageProps).build())) {
GenericLakehouseTable.Builder builder = GenericLakehouseTable.builder();
return builder
@@ -151,39 +151,13 @@ public class LanceCatalogOperations implements
LakehouseCatalogOperations {
}
}
- private org.apache.arrow.vector.types.pojo.Schema
convertColumnsToSchema(Column[] columns) {
- LanceDataTypeConverter converter = new LanceDataTypeConverter();
+ private org.apache.arrow.vector.types.pojo.Schema
convertColumnsToArrowSchema(Column[] columns) {
List<Field> fields =
Arrays.stream(columns)
.map(
- col -> {
- boolean nullable = col.nullable();
- ArrowType parentType =
converter.fromGravitino(col.dataType());
- List<ArrowType> childTypes =
converter.getChildTypes(col.dataType());
- List<Field> childFields =
- childTypes.stream()
- .map(
- childType ->
- new org.apache.arrow.vector.types.pojo.Field(
- "",
-
org.apache.arrow.vector.types.pojo.FieldType.nullable(
- childType),
- null))
- .collect(Collectors.toList());
-
- if (nullable) {
- return new org.apache.arrow.vector.types.pojo.Field(
- col.name(),
-
org.apache.arrow.vector.types.pojo.FieldType.nullable(parentType),
- childFields);
- }
-
- // not nullable
- return new org.apache.arrow.vector.types.pojo.Field(
- col.name(),
-
org.apache.arrow.vector.types.pojo.FieldType.notNullable(parentType),
- childFields);
- })
+ col ->
+ LanceDataTypeConverter.CONVERTER.toArrowField(
+ col.name(), col.dataType(), col.nullable()))
.collect(Collectors.toList());
return new org.apache.arrow.vector.types.pojo.Schema(fields);
}
diff --git
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java
b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java
deleted file mode 100644
index d7966edd5e..0000000000
---
a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.gravitino.catalog.lakehouse.lance;
-
-import com.google.common.collect.Lists;
-import java.util.List;
-import org.apache.arrow.vector.types.DateUnit;
-import org.apache.arrow.vector.types.FloatingPointPrecision;
-import org.apache.arrow.vector.types.TimeUnit;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
-import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
-import org.apache.arrow.vector.types.pojo.ArrowType.Int;
-import org.apache.gravitino.connector.DataTypeConverter;
-import org.apache.gravitino.json.JsonUtils;
-import org.apache.gravitino.rel.types.Type;
-import org.apache.gravitino.rel.types.Types;
-import org.apache.gravitino.rel.types.Types.FixedType;
-import org.apache.gravitino.rel.types.Types.UnparsedType;
-
-public class LanceDataTypeConverter implements DataTypeConverter<ArrowType,
ArrowType> {
-
- @Override
- public ArrowType fromGravitino(Type type) {
- switch (type.name()) {
- case BOOLEAN:
- return Bool.INSTANCE;
- case BYTE:
- return new Int(8, true);
- case SHORT:
- return new Int(16, true);
- case INTEGER:
- return new Int(32, true);
- case LONG:
- return new Int(64, true);
- case FLOAT:
- return new FloatingPoint(FloatingPointPrecision.SINGLE);
- case DOUBLE:
- return new FloatingPoint(FloatingPointPrecision.DOUBLE);
- case DECIMAL:
- // Lance uses FIXED_SIZE_BINARY for decimal types
- return new ArrowType.FixedSizeBinary(16); // assuming 16 bytes for
decimal
- case DATE:
- return new ArrowType.Date(DateUnit.DAY);
- case TIME:
- return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
- case TIMESTAMP:
- return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
- case VARCHAR:
- case STRING:
- return new ArrowType.Utf8();
- case FIXED:
- FixedType fixedType = (FixedType) type;
- return new ArrowType.FixedSizeBinary(fixedType.length());
- case BINARY:
- return new ArrowType.Binary();
- case UNPARSED:
- String typeStr = ((UnparsedType) type).unparsedType().toString();
- try {
- Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class);
- if (t instanceof Types.ListType) {
- return ArrowType.List.INSTANCE;
- } else if (t instanceof Types.MapType) {
- return new ArrowType.Map(false);
- } else if (t instanceof Types.StructType) {
- return ArrowType.Struct.INSTANCE;
- } else {
- throw new UnsupportedOperationException(
- "Unsupported UnparsedType conversion: " + t.simpleString());
- }
- } catch (Exception e) {
- // FixedSizeListArray(integer, 3)
- if (typeStr.startsWith("FixedSizeListArray")) {
- int size =
- Integer.parseInt(
- typeStr.substring(typeStr.indexOf(',') + 1,
typeStr.indexOf(')')).trim());
- return new ArrowType.FixedSizeList(size);
- }
- throw new UnsupportedOperationException("Failed to parse
UnparsedType: " + typeStr, e);
- }
- default:
- throw new UnsupportedOperationException("Unsupported Gravitino type: "
+ type.name());
- }
- }
-
- @Override
- public Type toGravitino(ArrowType arrowType) {
- if (arrowType instanceof Bool) {
- return Types.BooleanType.get();
- } else if (arrowType instanceof Int intType) {
- switch (intType.getBitWidth()) {
- case 8 -> {
- return Types.ByteType.get();
- }
- case 16 -> {
- return Types.ShortType.get();
- }
- case 32 -> {
- return Types.IntegerType.get();
- }
- case 64 -> {
- return Types.LongType.get();
- }
- default -> throw new UnsupportedOperationException(
- "Unsupported Int bit width: " + intType.getBitWidth());
- }
- } else if (arrowType instanceof FloatingPoint floatingPoint) {
- switch (floatingPoint.getPrecision()) {
- case SINGLE:
- return Types.FloatType.get();
- case DOUBLE:
- return Types.DoubleType.get();
- default:
- throw new UnsupportedOperationException(
- "Unsupported FloatingPoint precision: " +
floatingPoint.getPrecision());
- }
- } else if (arrowType instanceof ArrowType.FixedSizeBinary) {
- ArrowType.FixedSizeBinary fixedSizeBinary = (ArrowType.FixedSizeBinary)
arrowType;
- return Types.FixedType.of(fixedSizeBinary.getByteWidth());
- } else if (arrowType instanceof ArrowType.Date) {
- return Types.DateType.get();
- } else if (arrowType instanceof ArrowType.Time) {
- return Types.TimeType.get();
- } else if (arrowType instanceof ArrowType.Timestamp) {
- return Types.TimestampType.withoutTimeZone();
- } else if (arrowType instanceof ArrowType.Utf8) {
- return Types.StringType.get();
- } else if (arrowType instanceof ArrowType.Binary) {
- return Types.BinaryType.get();
- // TODO handle complex types like List, Map, Struct
- } else {
- throw new UnsupportedOperationException("Unsupported Arrow type: " +
arrowType);
- }
- }
-
- public List<ArrowType> getChildTypes(Type parentType) {
- if (parentType.name() != Type.Name.UNPARSED) {
- return List.of();
- }
-
- List<ArrowType> arrowTypes = Lists.newArrayList();
- String typeStr = ((UnparsedType) parentType).unparsedType().toString();
- try {
- Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class);
- if (t instanceof Types.ListType listType) {
- arrowTypes.add(fromGravitino(listType.elementType()));
- } else if (t instanceof Types.MapType mapType) {
- arrowTypes.add(fromGravitino(mapType.keyType()));
- arrowTypes.add(fromGravitino(mapType.valueType()));
- } else {
- // TODO support struct type.
- throw new UnsupportedOperationException(
- "Unsupported UnparsedType conversion: " + t.simpleString());
- }
-
- return arrowTypes;
- } catch (Exception e) {
- // FixedSizeListArray(integer, 3)
-
- try {
- if (typeStr.startsWith("FixedSizeListArray")) {
- String type = typeStr.substring(typeStr.indexOf('(') + 1,
typeStr.indexOf(',')).trim();
- Type childType = JsonUtils.anyFieldMapper().readValue("\"" + type +
"\"", Type.class);
- arrowTypes.add(fromGravitino(childType));
-
- return arrowTypes;
- }
- } catch (Exception e1) {
- throw new UnsupportedOperationException("Failed to parse UnparsedType:
" + typeStr, e1);
- }
-
- throw new UnsupportedOperationException("Failed to parse UnparsedType: "
+ typeStr, e);
- }
- }
-}
diff --git a/lance/lance-common/build.gradle.kts
b/lance/lance-common/build.gradle.kts
index 4ec9046e3d..8bb80c1934 100644
--- a/lance/lance-common/build.gradle.kts
+++ b/lance/lance-common/build.gradle.kts
@@ -43,5 +43,6 @@ dependencies {
testImplementation(project(":server-common"))
testImplementation(libs.junit.jupiter.api)
+ testImplementation(libs.junit.jupiter.params)
testRuntimeOnly(libs.junit.jupiter.engine)
}
diff --git
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java
index 49141665a5..36d55c03e9 100644
---
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java
+++
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java
@@ -49,5 +49,6 @@ public interface LanceNamespaceOperations {
void namespaceExists(String namespaceId, String delimiter) throws
LanceNamespaceException;
- ListTablesResponse listTables(String id, String delimiter, String pageToken,
Integer limit);
+ ListTablesResponse listTables(
+ String namespaceId, String delimiter, String pageToken, Integer limit);
}
diff --git
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/arrow/ArrowRecordBatchList.java
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/arrow/ArrowRecordBatchList.java
deleted file mode 100644
index b0c6a089d5..0000000000
---
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/arrow/ArrowRecordBatchList.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.gravitino.lance.common.ops.arrow;
-
-import java.util.List;
-import org.apache.arrow.vector.VectorSchemaRoot;
-import org.apache.arrow.vector.types.pojo.Schema;
-
-public class ArrowRecordBatchList {
- private final Schema schema;
-
- @SuppressWarnings("unused")
- private final List<VectorSchemaRoot> recordBatches;
-
- public Schema getSchema() {
- return schema;
- }
-
- public ArrowRecordBatchList(Schema schema, List<VectorSchemaRoot>
recordBatches) {
- this.schema = schema;
- this.recordBatches = recordBatches;
- }
-}
diff --git
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java
index fe6404a424..d3ddbb0ede 100644
---
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java
+++
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java
@@ -20,8 +20,10 @@ package org.apache.gravitino.lance.common.ops.gravitino;
import static
org.apache.gravitino.lance.common.config.LanceConfig.METALAKE_NAME;
import static
org.apache.gravitino.lance.common.config.LanceConfig.NAMESPACE_BACKEND_URI;
+import static
org.apache.gravitino.lance.common.ops.gravitino.LanceDataTypeConverter.CONVERTER;
import static org.apache.gravitino.rel.Column.DEFAULT_VALUE_NOT_SET;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
@@ -37,12 +39,11 @@ import
com.lancedb.lance.namespace.model.DescribeNamespaceResponse;
import com.lancedb.lance.namespace.model.DescribeTableResponse;
import com.lancedb.lance.namespace.model.DropNamespaceRequest;
import com.lancedb.lance.namespace.model.DropNamespaceResponse;
-import com.lancedb.lance.namespace.model.JsonArrowDataType;
-import com.lancedb.lance.namespace.model.JsonArrowField;
import com.lancedb.lance.namespace.model.JsonArrowSchema;
import com.lancedb.lance.namespace.model.ListNamespacesResponse;
import com.lancedb.lance.namespace.model.ListTablesResponse;
import com.lancedb.lance.namespace.util.CommonUtil;
+import com.lancedb.lance.namespace.util.JsonArrowSchemaConverter;
import com.lancedb.lance.namespace.util.PageUtil;
import java.io.ByteArrayInputStream;
import java.util.ArrayList;
@@ -59,17 +60,8 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
-import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ArrowStreamReader;
-import org.apache.arrow.vector.types.DateUnit;
-import org.apache.arrow.vector.types.FloatingPointPrecision;
-import org.apache.arrow.vector.types.TimeUnit;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
-import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
-import org.apache.arrow.vector.types.pojo.ArrowType.Int;
import org.apache.arrow.vector.types.pojo.Field;
-import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.commons.lang3.StringUtils;
import org.apache.gravitino.Catalog;
import org.apache.gravitino.CatalogChange;
@@ -84,18 +76,12 @@ import
org.apache.gravitino.exceptions.NoSuchSchemaException;
import org.apache.gravitino.exceptions.NonEmptyCatalogException;
import org.apache.gravitino.exceptions.NonEmptySchemaException;
import org.apache.gravitino.exceptions.SchemaAlreadyExistsException;
-import org.apache.gravitino.json.JsonUtils;
import org.apache.gravitino.lance.common.config.LanceConfig;
import org.apache.gravitino.lance.common.ops.LanceNamespaceOperations;
import org.apache.gravitino.lance.common.ops.LanceTableOperations;
import org.apache.gravitino.lance.common.ops.NamespaceWrapper;
-import org.apache.gravitino.lance.common.ops.arrow.ArrowRecordBatchList;
import org.apache.gravitino.rel.Column;
import org.apache.gravitino.rel.Table;
-import org.apache.gravitino.rel.types.Type;
-import org.apache.gravitino.rel.types.Types;
-import org.apache.gravitino.rel.types.Types.FixedType;
-import org.apache.gravitino.rel.types.Types.UnparsedType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -105,6 +91,11 @@ public class GravitinoLanceNamespaceWrapper extends
NamespaceWrapper
private static final Logger LOG =
LoggerFactory.getLogger(GravitinoLanceNamespaceWrapper.class);
private GravitinoClient client;
+ @VisibleForTesting
+ GravitinoLanceNamespaceWrapper() {
+ super(null);
+ }
+
public GravitinoLanceNamespaceWrapper(LanceConfig config) {
super(config);
}
@@ -498,8 +489,8 @@ public class GravitinoLanceNamespaceWrapper extends
NamespaceWrapper
@Override
public ListTablesResponse listTables(
- String id, String delimiter, String pageToken, Integer limit) {
- ObjectIdentifier nsId = ObjectIdentifier.of(id, Pattern.quote(delimiter));
+ String namespaceId, String delimiter, String pageToken, Integer limit) {
+ ObjectIdentifier nsId = ObjectIdentifier.of(namespaceId,
Pattern.quote(delimiter));
Preconditions.checkArgument(
nsId.levels() <= 2, "Expected at most 2-level namespace but got: %s",
nsId.levels());
String catalogName = nsId.levelAtListPos(0);
@@ -508,9 +499,9 @@ public class GravitinoLanceNamespaceWrapper extends
NamespaceWrapper
List<String> tables =
Arrays.stream(catalog.asTableCatalog().listTables(Namespace.of(schemaName)))
.map(ident -> Joiner.on(delimiter).join(catalogName, schemaName,
ident.name()))
+ .sorted()
.collect(Collectors.toList());
- Collections.sort(tables);
PageUtil.Page page = PageUtil.splitPage(tables, pageToken,
PageUtil.normalizePageSize(limit));
ListNamespacesResponse response = new ListNamespacesResponse();
response.setNamespaces(Sets.newHashSet(page.items()));
@@ -540,29 +531,6 @@ public class GravitinoLanceNamespaceWrapper extends
NamespaceWrapper
return response;
}
- private JsonArrowSchema toJsonArrowSchema(Column[] columns) {
- List<JsonArrowField> fields = new ArrayList<>();
- for (Column column : columns) {
- ArrowType arrowType = fromGravitinoType(column.dataType());
- FieldType fieldType = new FieldType(column.nullable(), arrowType, null,
null);
- Field field = new Field(column.name(), fieldType, null);
-
- JsonArrowDataType jsonArrowDataType = new JsonArrowDataType();
- // other filed needs to be set accordingly such as list, map, struct
- jsonArrowDataType.setType(arrowType.toString());
-
- JsonArrowField arrowField = new JsonArrowField();
- arrowField.setName(field.getName());
- arrowField.setType(jsonArrowDataType);
-
- fields.add(arrowField);
- }
-
- JsonArrowSchema jsonArrowSchema = new JsonArrowSchema();
- jsonArrowSchema.setFields(fields);
- return jsonArrowSchema;
- }
-
@Override
public CreateTableResponse createTable(
String tableId,
@@ -585,8 +553,8 @@ public class GravitinoLanceNamespaceWrapper extends
NamespaceWrapper
// Parser column information.
List<Column> columns = Lists.newArrayList();
if (arrowStreamBody != null) {
- ArrowRecordBatchList recordBatchList =
parseArrowIpcStream(arrowStreamBody);
- columns = extractColumns(recordBatchList);
+ org.apache.arrow.vector.types.pojo.Schema schema =
parseArrowIpcStream(arrowStreamBody);
+ columns = extractColumns(schema);
}
String catalogName = nsId.levelAtListPos(0);
@@ -646,157 +614,45 @@ public class GravitinoLanceNamespaceWrapper extends
NamespaceWrapper
return response;
}
- private ArrowRecordBatchList parseArrowIpcStream(byte[] stream) {
+ private JsonArrowSchema toJsonArrowSchema(Column[] columns) {
+ List<Field> fields =
+ Arrays.stream(columns)
+ .map(col -> CONVERTER.toArrowField(col.name(), col.dataType(),
col.nullable()))
+ .collect(Collectors.toList());
+
+ return JsonArrowSchemaConverter.convertToJsonArrowSchema(
+ new org.apache.arrow.vector.types.pojo.Schema(fields));
+ }
+
+ @VisibleForTesting
+ org.apache.arrow.vector.types.pojo.Schema parseArrowIpcStream(byte[] stream)
{
+ org.apache.arrow.vector.types.pojo.Schema schema;
+
try (BufferAllocator allocator = new RootAllocator();
ByteArrayInputStream bais = new ByteArrayInputStream(stream);
ArrowStreamReader reader = new ArrowStreamReader(bais, allocator)) {
-
- org.apache.arrow.vector.types.pojo.Schema schema =
reader.getVectorSchemaRoot().getSchema();
- List<VectorSchemaRoot> batches = new ArrayList<>();
-
- while (reader.loadNextBatch()) {
- VectorSchemaRoot root = reader.getVectorSchemaRoot();
- if (root.getRowCount() > 0) {
- batches.add(root);
- }
- }
- return new ArrowRecordBatchList(schema, batches);
+ schema = reader.getVectorSchemaRoot().getSchema();
} catch (Exception e) {
throw new RuntimeException("Failed to parse Arrow IPC stream", e);
}
+
+ Preconditions.checkArgument(schema != null, "No schema found in Arrow IPC
stream");
+ return schema;
}
- private List<Column> extractColumns(ArrowRecordBatchList recordBatchList) {
+ private List<Column>
extractColumns(org.apache.arrow.vector.types.pojo.Schema arrowSchema) {
List<Column> columns = new ArrayList<>();
- org.apache.arrow.vector.types.pojo.Schema arrowSchema =
recordBatchList.getSchema();
for (org.apache.arrow.vector.types.pojo.Field field :
arrowSchema.getFields()) {
- columns.add(toGravitinoColumn(field));
+ columns.add(
+ Column.of(
+ field.getName(),
+ CONVERTER.toGravitino(field),
+ null,
+ field.isNullable(),
+ false,
+ DEFAULT_VALUE_NOT_SET));
}
return columns;
}
-
- private Column toGravitinoColumn(Field field) {
- return Column.of(
- field.getName(),
- toGravitinoType(field),
- field.getMetadata().get("comment"),
- field.isNullable(),
- false,
- DEFAULT_VALUE_NOT_SET);
- }
-
- private ArrowType fromGravitinoType(Type type) {
- switch (type.name()) {
- case BOOLEAN:
- return Bool.INSTANCE;
- case BYTE:
- return new Int(8, true);
- case SHORT:
- return new Int(16, true);
- case INTEGER:
- return new Int(32, true);
- case LONG:
- return new Int(64, true);
- case FLOAT:
- return new FloatingPoint(FloatingPointPrecision.SINGLE);
- case DOUBLE:
- return new FloatingPoint(FloatingPointPrecision.DOUBLE);
- case DECIMAL:
- // Lance uses FIXED_SIZE_BINARY for decimal types
- return new ArrowType.FixedSizeBinary(16); // assuming 16 bytes for
decimal
- case DATE:
- return new ArrowType.Date(DateUnit.DAY);
- case TIME:
- return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
- case TIMESTAMP:
- return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
- case VARCHAR:
- case STRING:
- return new ArrowType.Utf8();
- case FIXED:
- FixedType fixedType = (FixedType) type;
- return new ArrowType.FixedSizeBinary(fixedType.length());
- case BINARY:
- return new ArrowType.Binary();
- case UNPARSED:
- String typeStr = ((UnparsedType) type).unparsedType().toString();
- try {
- Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class);
- if (t instanceof Types.ListType) {
- return ArrowType.List.INSTANCE;
- } else if (t instanceof Types.MapType) {
- return new ArrowType.Map(false);
- } else if (t instanceof Types.StructType) {
- return ArrowType.Struct.INSTANCE;
- } else {
- throw new UnsupportedOperationException(
- "Unsupported UnparsedType conversion: " + t.simpleString());
- }
- } catch (Exception e) {
- // FixedSizeListArray(integer, 3)
- if (typeStr.startsWith("FixedSizeListArray")) {
- int size =
- Integer.parseInt(
- typeStr.substring(typeStr.indexOf(',') + 1,
typeStr.indexOf(')')).trim());
- return new ArrowType.FixedSizeList(size);
- }
- throw new UnsupportedOperationException("Failed to parse
UnparsedType: " + typeStr, e);
- }
- default:
- throw new UnsupportedOperationException("Unsupported Gravitino type: "
+ type.name());
- }
- }
-
- private Type toGravitinoType(Field field) {
- FieldType parentType = field.getFieldType();
- ArrowType arrowType = parentType.getType();
- if (arrowType instanceof Bool) {
- return Types.BooleanType.get();
- } else if (arrowType instanceof Int) {
- Int intType = (Int) arrowType;
- switch (intType.getBitWidth()) {
- case 8 -> {
- return Types.ByteType.get();
- }
- case 16 -> {
- return Types.ShortType.get();
- }
- case 32 -> {
- return Types.IntegerType.get();
- }
- case 64 -> {
- return Types.LongType.get();
- }
- default -> throw new UnsupportedOperationException(
- "Unsupported Int bit width: " + intType.getBitWidth());
- }
- } else if (arrowType instanceof FloatingPoint) {
- FloatingPoint floatingPoint = (FloatingPoint) arrowType;
- switch (floatingPoint.getPrecision()) {
- case SINGLE:
- return Types.FloatType.get();
- case DOUBLE:
- return Types.DoubleType.get();
- default:
- throw new UnsupportedOperationException(
- "Unsupported FloatingPoint precision: " +
floatingPoint.getPrecision());
- }
- } else if (arrowType instanceof ArrowType.FixedSizeBinary) {
- ArrowType.FixedSizeBinary fixedSizeBinary = (ArrowType.FixedSizeBinary)
arrowType;
- return Types.FixedType.of(fixedSizeBinary.getByteWidth());
- } else if (arrowType instanceof ArrowType.Date) {
- return Types.DateType.get();
- } else if (arrowType instanceof ArrowType.Time) {
- return Types.TimeType.get();
- } else if (arrowType instanceof ArrowType.Timestamp) {
- return Types.TimestampType.withoutTimeZone();
- } else if (arrowType instanceof ArrowType.Utf8) {
- return Types.StringType.get();
- } else if (arrowType instanceof ArrowType.Binary) {
- return Types.BinaryType.get();
- } else {
- return Types.UnparsedType.of(arrowType.toString());
- }
- }
}
diff --git
a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/LanceDataTypeConverter.java
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/LanceDataTypeConverter.java
new file mode 100644
index 0000000000..52d52d38fb
--- /dev/null
+++
b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/LanceDataTypeConverter.java
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.lance.common.ops.gravitino;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.gravitino.connector.DataTypeConverter;
+import org.apache.gravitino.rel.types.Type;
+import org.apache.gravitino.rel.types.Types;
+import org.apache.gravitino.rel.types.Types.FixedType;
+
+public class LanceDataTypeConverter implements DataTypeConverter<ArrowType,
Field> {
+
+ public static final LanceDataTypeConverter CONVERTER = new
LanceDataTypeConverter();
+ private static final ObjectMapper mapper = new ObjectMapper();
+
+ public Field toArrowField(String name, Type type, boolean nullable) {
+ switch (type.name()) {
+ case LIST:
+ Types.ListType listType = (Types.ListType) type;
+ FieldType listField = new FieldType(nullable, ArrowType.List.INSTANCE,
null);
+ return new Field(
+ name,
+ listField,
+ Lists.newArrayList(
+ toArrowField("element", listType.elementType(),
listType.elementNullable())));
+
+ case STRUCT:
+ Types.StructType structType = (Types.StructType) type;
+ FieldType structField = new FieldType(nullable,
ArrowType.Struct.INSTANCE, null);
+ return new Field(
+ name,
+ structField,
+ Arrays.stream(structType.fields())
+ .map(field -> toArrowField(field.name(), field.type(),
field.nullable()))
+ .toList());
+
+ case MAP:
+ Types.MapType mapType = (Types.MapType) type;
+ FieldType mapField = new FieldType(nullable, new ArrowType.Map(false),
null);
+ return new Field(
+ name,
+ mapField,
+ Lists.newArrayList(
+ toArrowField(
+ MapVector.DATA_VECTOR_NAME,
+ Types.StructType.of(
+ Types.StructType.Field.of(
+ // Note: Arrow MapVector requires key field to be
non-nullable
+ MapVector.KEY_NAME,
+ mapType.keyType(),
+ false /*nullable*/,
+ null /*comment*/),
+ Types.StructType.Field.of(
+ MapVector.VALUE_NAME,
+ mapType.valueType(),
+ mapType.valueNullable(),
+ null)),
+ false /*nullable*/)));
+
+ case UNION:
+ Types.UnionType unionType = (Types.UnionType) type;
+ List<Field> types =
+ Arrays.stream(unionType.types())
+ .map(
+ t ->
+ toArrowField(
+ t.simpleString(), t, true /*nullable*/) // union
members are nullable
+ )
+ .toList();
+ int[] typeIds =
+ types.stream()
+ .mapToInt(
+ f ->
+
org.apache.arrow.vector.types.Types.getMinorTypeForArrowType(f.getType())
+ .ordinal())
+ .toArray();
+ FieldType unionField =
+ new FieldType(nullable, new ArrowType.Union(UnionMode.Sparse,
typeIds), null);
+ return new Field(name, unionField, types);
+
+ case EXTERNAL:
+ Types.ExternalType externalType = (Types.ExternalType) type;
+ Field field;
+ try {
+ field = mapper.readValue(externalType.catalogString(), Field.class);
+ } catch (Exception e) {
+ throw new RuntimeException(
+ "Failed to parse external type catalog string: " +
externalType.catalogString(), e);
+ }
+ Preconditions.checkArgument(
+ name.equals(field.getName()),
+ "expected field name %s but got %s",
+ name,
+ field.getName());
+ Preconditions.checkArgument(
+ nullable == field.isNullable(),
+ "expected field nullable %s but got %s",
+ nullable,
+ field.isNullable());
+ return field;
+
+ default:
+ // non-complex type
+ FieldType fieldType = new FieldType(nullable, fromGravitino(type),
null);
+ return new Field(name, fieldType, null);
+ }
+ }
+
+ @Override
+ public ArrowType fromGravitino(Type type) {
+ switch (type.name()) {
+ case BOOLEAN:
+ return Bool.INSTANCE;
+ case BYTE:
+ return new Int(8, ((Types.ByteType) type).signed());
+ case SHORT:
+ return new Int(8 * 2, ((Types.ShortType) type).signed());
+ case INTEGER:
+ return new Int(8 * 4, ((Types.IntegerType) type).signed());
+ case LONG:
+ return new Int(8 * 8, ((Types.LongType) type).signed());
+ case FLOAT:
+ return new FloatingPoint(FloatingPointPrecision.SINGLE);
+ case DOUBLE:
+ return new FloatingPoint(FloatingPointPrecision.DOUBLE);
+ case STRING:
+ return ArrowType.Utf8.INSTANCE;
+ case BINARY:
+ return ArrowType.Binary.INSTANCE;
+ case DECIMAL:
+ Types.DecimalType decimalType = (Types.DecimalType) type;
+ return new ArrowType.Decimal(decimalType.precision(),
decimalType.scale(), 8 * 16);
+ case DATE:
+ return new ArrowType.Date(DateUnit.DAY);
+ case TIMESTAMP:
+ Types.TimestampType timestampType = (Types.TimestampType) type;
+ TimeUnit timeUnit = TimeUnit.MICROSECOND;
+ if (timestampType.hasPrecisionSet()) {
+ timeUnit =
+ switch (timestampType.precision()) {
+ case 0 -> TimeUnit.SECOND;
+ case 3 -> TimeUnit.MILLISECOND;
+ case 6 -> TimeUnit.MICROSECOND;
+ case 9 -> TimeUnit.NANOSECOND;
+ default -> throw new UnsupportedOperationException(
+ "Expected precision to be one of 0, 3, 6, 9 but got: "
+ + timestampType.precision());
+ };
+ }
+ if (timestampType.hasTimeZone()) {
+ // todo: need timeZoneId for timestamp with time zone
+ return new ArrowType.Timestamp(timeUnit, "UTC");
+ }
+ return new ArrowType.Timestamp(timeUnit, null);
+ case TIME:
+ return new ArrowType.Time(TimeUnit.NANOSECOND, 8 * 8);
+ case NULL:
+ return ArrowType.Null.INSTANCE;
+ case INTERVAL_YEAR:
+ return new ArrowType.Interval(IntervalUnit.YEAR_MONTH);
+ case INTERVAL_DAY:
+ return new ArrowType.Duration(TimeUnit.MICROSECOND);
+ case FIXED:
+ FixedType fixedType = (FixedType) type;
+ return new ArrowType.FixedSizeBinary(fixedType.length());
+ default:
+ throw new UnsupportedOperationException("Unsupported Gravitino type: "
+ type.name());
+ }
+ }
+
+ @Override
+ public Type toGravitino(Field arrowField) {
+ FieldType fieldType = arrowField.getFieldType();
+ switch (fieldType.getType().getTypeID()) {
+ case Map:
+ Field structField = arrowField.getChildren().get(0);
+ Type keyType = toGravitino(structField.getChildren().get(0));
+ Type valueType = toGravitino(structField.getChildren().get(1));
+ boolean valueNullable = structField.getChildren().get(1).isNullable();
+ return Types.MapType.of(keyType, valueType, valueNullable);
+
+ case List:
+ Type elementType = toGravitino(arrowField.getChildren().get(0));
+ boolean containsNull = arrowField.getChildren().get(0).isNullable();
+ return Types.ListType.of(elementType, containsNull);
+
+ case Struct:
+ Types.StructType.Field[] fields =
+ arrowField.getChildren().stream()
+ .map(
+ child ->
+ Types.StructType.Field.of(
+ child.getName(),
+ toGravitino(child),
+ child.isNullable(),
+ null /*comment*/))
+ .toArray(Types.StructType.Field[]::new);
+ return Types.StructType.of(fields);
+
+ case Union:
+ List<Type> types =
arrowField.getChildren().stream().map(this::toGravitino).toList();
+ return Types.UnionType.of(types.toArray(new Type[0]));
+
+ case Bool:
+ return Types.BooleanType.get();
+
+ case Int:
+ Int intType = (Int) fieldType.getType();
+ switch (intType.getBitWidth()) {
+ case 8:
+ return intType.getIsSigned() ? Types.ByteType.get() :
Types.ByteType.unsigned();
+ case 8 * 2:
+ return intType.getIsSigned() ? Types.ShortType.get() :
Types.ShortType.unsigned();
+ case 8 * 4:
+ return intType.getIsSigned() ? Types.IntegerType.get() :
Types.IntegerType.unsigned();
+ case 8 * 8:
+ return intType.getIsSigned() ? Types.LongType.get() :
Types.LongType.unsigned();
+ }
+ break;
+
+ case FloatingPoint:
+ FloatingPoint floatingPoint = (FloatingPoint) fieldType.getType();
+ switch (floatingPoint.getPrecision()) {
+ case SINGLE:
+ return Types.FloatType.get();
+ case DOUBLE:
+ return Types.DoubleType.get();
+ default:
+ // fallthrough
+ }
+ break;
+
+ case Utf8:
+ return Types.StringType.get();
+ case Binary:
+ return Types.BinaryType.get();
+ case Decimal:
+ ArrowType.Decimal decimalType = (ArrowType.Decimal)
fieldType.getType();
+ return Types.DecimalType.of(decimalType.getPrecision(),
decimalType.getScale());
+ case Date:
+ if (((ArrowType.Date) fieldType.getType()).getUnit() == DateUnit.DAY) {
+ return Types.DateType.get();
+ }
+ break;
+ case Timestamp:
+ ArrowType.Timestamp timestampType = (ArrowType.Timestamp)
fieldType.getType();
+ int precision =
+ switch (timestampType.getUnit()) {
+ case SECOND -> 0;
+ case MILLISECOND -> 3;
+ case MICROSECOND -> 6;
+ case NANOSECOND -> 9;
+ };
+ boolean hasTimeZone = timestampType.getTimezone() != null;
+ return hasTimeZone
+ ? Types.TimestampType.withTimeZone(precision)
+ : Types.TimestampType.withoutTimeZone(precision);
+ case Time:
+ ArrowType.Time timeType = (ArrowType.Time) fieldType.getType();
+ if (timeType.getUnit() == TimeUnit.NANOSECOND &&
timeType.getBitWidth() == 8 * 8) {
+ return Types.TimeType.get();
+ }
+ break;
+ case Null:
+ return Types.NullType.get();
+ case Interval:
+ IntervalUnit intervalUnit = ((ArrowType.Interval)
fieldType.getType()).getUnit();
+ if (intervalUnit == IntervalUnit.YEAR_MONTH) {
+ return Types.IntervalYearType.get();
+ }
+ break;
+ case Duration:
+ TimeUnit timeUnit = ((ArrowType.Duration)
fieldType.getType()).getUnit();
+ if (timeUnit == TimeUnit.MICROSECOND) {
+ return Types.IntervalDayType.get();
+ }
+ break;
+ case FixedSizeBinary:
+ ArrowType.FixedSizeBinary fixedSizeBinary =
(ArrowType.FixedSizeBinary) fieldType.getType();
+ return Types.FixedType.of(fixedSizeBinary.getByteWidth());
+ default:
+ // fallthrough
+ }
+
+ String typeString;
+ try {
+ typeString = mapper.writeValueAsString(arrowField);
+ } catch (Exception e) {
+ throw new RuntimeException("Failed to serialize Arrow field to string.",
e);
+ }
+ return Types.ExternalType.of(typeString);
+ }
+}
diff --git
a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/TestArrowIPC.java
b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/TestArrowIPC.java
deleted file mode 100644
index 71f1bfc587..0000000000
---
a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/TestArrowIPC.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.gravitino.lance.common;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.util.Arrays;
-import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.memory.RootAllocator;
-import org.apache.arrow.vector.IntVector;
-import org.apache.arrow.vector.VarCharVector;
-import org.apache.arrow.vector.VectorSchemaRoot;
-import org.apache.arrow.vector.ipc.ArrowStreamWriter;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-import org.apache.arrow.vector.types.pojo.Field;
-import org.apache.arrow.vector.types.pojo.Schema;
-import org.apache.arrow.vector.util.Text;
-import org.junit.jupiter.api.Test;
-
-public class TestArrowIPC {
-
- private static final String FILENAME = "/tmp/initial_data.arrow";
- private static final int RECORD_COUNT = 3;
-
- @Test
- void testIPC() throws Exception {
- try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
-
- Schema schema =
- new Schema(
- Arrays.asList(
- Field.nullable("id", new ArrowType.Int(32, true)),
- Field.nullable("value", new ArrowType.Utf8())));
-
- try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator))
{
- IntVector idVector = (IntVector) root.getVector("id");
- VarCharVector valueVector = (VarCharVector) root.getVector("value");
-
- idVector.allocateNew();
- valueVector.allocateNew();
-
- for (int i = 0; i < RECORD_COUNT; i++) {
- idVector.setSafe(i, i + 1);
- valueVector.setSafe(i, new Text("Row_" + (i + 1)));
- }
-
- idVector.setValueCount(RECORD_COUNT);
- valueVector.setValueCount(RECORD_COUNT);
- root.setRowCount(RECORD_COUNT);
-
- File outFile = new File(FILENAME);
- try (FileOutputStream fos = new FileOutputStream(outFile);
- ArrowStreamWriter writer = new ArrowStreamWriter(root, null, fos))
{
-
- writer.start();
- writer.writeBatch();
- writer.end();
- }
-
- System.out.println(
- "✅ Successfully generated Arrow IPC Stream file: " +
outFile.getAbsolutePath());
- System.out.println("--- Ready for cURL test ---");
- }
- }
- }
-}
diff --git
a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestGravitinoLanceNamespaceWrapper.java
b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestGravitinoLanceNamespaceWrapper.java
new file mode 100644
index 0000000000..b0ddb980ab
--- /dev/null
+++
b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestGravitinoLanceNamespaceWrapper.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.lance.common.ops.gravitino;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.Arrays;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class TestGravitinoLanceNamespaceWrapper {
+
+ @Test
+ public void testParseArrowIpcStream() throws Exception {
+ Schema schema =
+ new Schema(
+ Arrays.asList(
+ Field.nullable("id", new ArrowType.Int(32, true)),
+ Field.nullable("value", new ArrowType.Utf8())));
+
+ GravitinoLanceNamespaceWrapper wrapper = new
GravitinoLanceNamespaceWrapper();
+ byte[] ipcStream = generateIpcStream(schema);
+ Schema parsedSchema = wrapper.parseArrowIpcStream(ipcStream);
+
+ Assertions.assertEquals(schema, parsedSchema);
+ }
+
+ private byte[] generateIpcStream(Schema arrowSchema) throws IOException {
+ try (BufferAllocator allocator = new RootAllocator()) {
+
+ // Create an empty VectorSchemaRoot with the schema
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(arrowSchema,
allocator)) {
+ // Allocate empty vectors (0 rows)
+ root.allocateNew();
+ root.setRowCount(0);
+
+ // Write to IPC stream
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ try (ArrowStreamWriter writer =
+ new ArrowStreamWriter(root, null,
Channels.newChannel(outputStream))) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ return outputStream.toByteArray();
+ }
+ } catch (Exception e) {
+ throw new IOException("Failed to create empty Arrow IPC stream: " +
e.getMessage(), e);
+ }
+ }
+}
diff --git
a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestLanceDataTypeConverter.java
b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestLanceDataTypeConverter.java
new file mode 100644
index 0000000000..9908f8feff
--- /dev/null
+++
b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestLanceDataTypeConverter.java
@@ -0,0 +1,473 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.lance.common.ops.gravitino;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.gravitino.rel.types.Type;
+import org.apache.gravitino.rel.types.Types;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.MethodSource;
+
+public class TestLanceDataTypeConverter {
+ private static final LanceDataTypeConverter CONVERTER =
LanceDataTypeConverter.CONVERTER;
+
+ // Gravitino complex type definitions for testing
+ private static final Types.StructType SIMPLE_STRUCT =
+ Types.StructType.of(
+ Types.StructType.Field.of("id", Types.LongType.get(), false, null),
+ Types.StructType.Field.of("name", Types.StringType.get(), true,
null));
+
+ private static final Types.StructType NESTED_STRUCT =
+ Types.StructType.of(
+ Types.StructType.Field.of("id", Types.LongType.get(), false, null),
+ Types.StructType.Field.of(
+ "address",
+ Types.StructType.of(
+ Types.StructType.Field.of("street", Types.StringType.get(),
false, null),
+ Types.StructType.Field.of("city", Types.StringType.get(),
false, null)),
+ true,
+ null));
+ private static final String NESTED_STRUCT_JSON =
+
"{\"name\":\"person_nested_json\",\"nullable\":false,\"type\":{\"name\":\"struct\"},\"children\":["
+ +
"{\"name\":\"id\",\"nullable\":false,\"type\":{\"name\":\"int\",\"bitWidth\":64,\"isSigned\":true},\"children\":[]},"
+ +
"{\"name\":\"address\",\"nullable\":true,\"type\":{\"name\":\"struct\"},\"children\":["
+ +
"{\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"},\"children\":[]},"
+ +
"{\"name\":\"city\",\"nullable\":false,\"type\":{\"name\":\"utf8\"},\"children\":[]}"
+ + "]}"
+ + "]}";
+
+ private static final Types.ListType LIST_OF_STRUCTS =
+ Types.ListType.of(
+ Types.StructType.of(
+ Types.StructType.Field.of("sku", Types.StringType.get(), false,
null),
+ Types.StructType.Field.of("quantity", Types.IntegerType.get(),
false, null)),
+ true);
+
+ // Field validators for Arrow conversion tests
+ private static Consumer<Field> INT_VALIDATOR =
+ field -> assertInstanceOf(ArrowType.Int.class,
field.getFieldType().getType());
+ private static Consumer<Field> STRING_VALIDATOR =
+ field -> assertInstanceOf(ArrowType.Utf8.class,
field.getFieldType().getType());
+ private static Consumer<Field> LARGE_UTF8_VALIDATOR =
+ field -> assertInstanceOf(ArrowType.LargeUtf8.class,
field.getFieldType().getType());
+ private static Consumer<Field> BOOLEAN_VALIDATOR =
+ field -> assertInstanceOf(ArrowType.Bool.class,
field.getFieldType().getType());
+ private static Consumer<Field> DECIMAL_VALIDATOR =
+ field -> {
+ assertInstanceOf(ArrowType.Decimal.class,
field.getFieldType().getType());
+ ArrowType.Decimal decimal = (ArrowType.Decimal)
field.getFieldType().getType();
+
+ assertEquals(10, decimal.getPrecision());
+ assertEquals(2, decimal.getScale());
+ };
+ private static Consumer<Field> LIST_VALIDATOR =
+ field -> {
+ assertInstanceOf(ArrowType.List.class, field.getFieldType().getType());
+ assertEquals(1, field.getChildren().size());
+
+ Field elementField = field.getChildren().get(0);
+ assertEquals("element", elementField.getName());
+ assertTrue(elementField.isNullable());
+ assertInstanceOf(ArrowType.Int.class,
elementField.getFieldType().getType());
+ };
+ private static Consumer<Field> MAP_VALIDATOR =
+ field -> {
+ assertInstanceOf(ArrowType.Map.class, field.getFieldType().getType());
+ assertEquals(1, field.getChildren().size());
+
+ Field structField = field.getChildren().get(0);
+ assertEquals(MapVector.DATA_VECTOR_NAME, structField.getName());
+ assertEquals(2, structField.getChildren().size());
+
+ Field keyField = structField.getChildren().get(0);
+ assertEquals(MapVector.KEY_NAME, keyField.getName());
+ assertFalse(keyField.isNullable());
+ assertInstanceOf(ArrowType.Utf8.class,
keyField.getFieldType().getType());
+
+ Field valueField = structField.getChildren().get(1);
+ assertEquals(MapVector.VALUE_NAME, valueField.getName());
+ assertTrue(valueField.isNullable());
+ assertInstanceOf(ArrowType.Int.class,
valueField.getFieldType().getType());
+ };
+ private static Consumer<Field> STRUCT_VALIDATOR =
+ field -> {
+ assertInstanceOf(ArrowType.Struct.class,
field.getFieldType().getType());
+ assertEquals(2, field.getChildren().size());
+
+ Field idField = field.getChildren().get(0);
+ assertEquals("id", idField.getName());
+ assertFalse(idField.isNullable());
+ assertInstanceOf(ArrowType.Int.class,
idField.getFieldType().getType());
+
+ Field nameField = field.getChildren().get(1);
+ assertEquals("name", nameField.getName());
+ assertTrue(nameField.isNullable());
+ assertInstanceOf(ArrowType.Utf8.class,
nameField.getFieldType().getType());
+ };
+ private static Consumer<Field> NESTED_STRUCT_VALIDATOR =
+ field -> {
+ assertInstanceOf(ArrowType.Struct.class,
field.getFieldType().getType());
+ assertEquals(2, field.getChildren().size());
+
+ Field addressField = field.getChildren().get(1);
+ assertEquals("address", addressField.getName());
+ assertTrue(addressField.isNullable());
+
+ assertInstanceOf(ArrowType.Struct.class,
addressField.getFieldType().getType());
+ assertEquals(2, addressField.getChildren().size());
+ };
+ private static Consumer<Field> LIST_OF_STRUCTS_VALIDATOR =
+ field -> {
+ assertInstanceOf(ArrowType.List.class, field.getFieldType().getType());
+ assertEquals(1, field.getChildren().size());
+
+ Field elementField = field.getChildren().get(0);
+ assertEquals("element", elementField.getName());
+ assertTrue(elementField.isNullable());
+ assertInstanceOf(ArrowType.Struct.class,
elementField.getFieldType().getType());
+ assertEquals(2, elementField.getChildren().size());
+ };
+ private static Consumer<Field> UNION_VALIDATOR =
+ field -> {
+ assertInstanceOf(ArrowType.Union.class,
field.getFieldType().getType());
+ ArrowType.Union unionType = (ArrowType.Union)
field.getFieldType().getType();
+ assertEquals(UnionMode.Sparse, unionType.getMode());
+ assertEquals(2, field.getChildren().size());
+ assertInstanceOf(ArrowType.Int.class,
field.getChildren().get(0).getFieldType().getType());
+ assertInstanceOf(ArrowType.Utf8.class,
field.getChildren().get(1).getFieldType().getType());
+ };
+
+ @ParameterizedTest
+ @DisplayName("Test conversion of Integer types (Byte, Short, Integer, Long)")
+ @CsvSource({"BYTE, 8, true", "SHORT, 16, true", "INTEGER, 32, true", "LONG,
64, true"})
+ public void testFromGravitinoIntegerTypes(
+ String typeName, int expectedBitWidth, boolean expectedSigned) {
+ Type type =
+ switch (typeName) {
+ case "BYTE" -> Types.ByteType.get();
+ case "SHORT" -> Types.ShortType.get();
+ case "INTEGER" -> Types.IntegerType.get();
+ case "LONG" -> Types.LongType.get();
+ default -> throw new IllegalArgumentException("Unknown type: " +
typeName);
+ };
+
+ ArrowType arrowType = CONVERTER.fromGravitino(type);
+ assertInstanceOf(ArrowType.Int.class, arrowType);
+
+ ArrowType.Int intType = (ArrowType.Int) arrowType;
+ assertEquals(expectedBitWidth, intType.getBitWidth());
+ assertEquals(expectedSigned, intType.getIsSigned());
+ }
+
+ @Test
+ public void testFromGravitinoTimestampWithTz() {
+ Types.TimestampType timestampType = Types.TimestampType.withTimeZone();
+ ArrowType arrowType = CONVERTER.fromGravitino(timestampType);
+ assertInstanceOf(ArrowType.Timestamp.class, arrowType);
+
+ ArrowType.Timestamp tsArrow = (ArrowType.Timestamp) arrowType;
+ assertEquals(TimeUnit.MICROSECOND, tsArrow.getUnit());
+ assertEquals("UTC", tsArrow.getTimezone());
+ }
+
+ @Test
+ public void testExternalTypeConversion() {
+ String expectedColumnName = "col_name";
+ boolean expectedNullable = true;
+ Types.ExternalType externalType =
+ Types.ExternalType.of(
+ "{\"name\":\"col_name\",\"nullable\":true,"
+ + "\"type\":{\"name\":\"largeutf8\"},\"children\":[]}");
+ Field arrowField = CONVERTER.toArrowField(expectedColumnName,
externalType, expectedNullable);
+ assertEquals(expectedColumnName, arrowField.getName());
+ assertEquals(expectedNullable, arrowField.isNullable());
+ assertInstanceOf(ArrowType.LargeUtf8.class,
arrowField.getFieldType().getType());
+
+ externalType =
+ Types.ExternalType.of(
+ "{\"name\":\"col_name\",\"nullable\":true,"
+ + "\"type\":{\"name\":\"largebinary\"},\"children\":[]}");
+ arrowField = CONVERTER.toArrowField(expectedColumnName, externalType,
expectedNullable);
+ assertEquals(expectedColumnName, arrowField.getName());
+ assertEquals(expectedNullable, arrowField.isNullable());
+ assertInstanceOf(ArrowType.LargeBinary.class,
arrowField.getFieldType().getType());
+
+ externalType =
+ Types.ExternalType.of(
+ "{\"name\":\"col_name\",\"nullable\":true,"
+ + "\"type\":{\"name\":\"largelist\"},"
+ + "\"children\":["
+ + "{\"name\":\"element\",\"nullable\":true,"
+ + "\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\":
true},"
+ + "\"children\":[]}]}");
+ arrowField = CONVERTER.toArrowField(expectedColumnName, externalType,
expectedNullable);
+ assertEquals(expectedColumnName, arrowField.getName());
+ assertEquals(expectedNullable, arrowField.isNullable());
+ assertInstanceOf(ArrowType.LargeList.class,
arrowField.getFieldType().getType());
+
+ externalType =
+ Types.ExternalType.of(
+ "{\"name\":\"col_name\",\"nullable\":true,"
+ + "\"type\":{\"name\":\"fixedsizelist\", \"listSize\":10},"
+ + "\"children\":["
+ + "{\"name\":\"element\",\"nullable\":true,"
+ + "\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\":
true},"
+ + "\"children\":[]}]}");
+ arrowField = CONVERTER.toArrowField(expectedColumnName, externalType,
expectedNullable);
+ assertEquals(expectedColumnName, arrowField.getName());
+ assertEquals(expectedNullable, arrowField.isNullable());
+ assertInstanceOf(ArrowType.FixedSizeList.class,
arrowField.getFieldType().getType());
+ assertEquals(10, ((ArrowType.FixedSizeList)
arrowField.getFieldType().getType()).getListSize());
+ }
+
+ @ParameterizedTest(name = "[{index}] name={0}, type={1}, nullable={2}")
+ @MethodSource("toArrowFieldArguments")
+ @DisplayName("Test toArrowField for various types")
+ public void testToArrowField(
+ String name, Type gravitinoType, boolean nullable, Consumer<Field>
validator) {
+ Field field = CONVERTER.toArrowField(name, gravitinoType, nullable);
+
+ assertEquals(name, field.getName());
+ assertEquals(nullable, field.isNullable());
+ validator.accept(field);
+ }
+
+ @Test
+ void testUnsupportedTypeThrowsException() {
+ Types.UnparsedType unparsedType = Types.UnparsedType.of("UNKNOWN_TYPE");
+ assertThrows(UnsupportedOperationException.class, () ->
CONVERTER.fromGravitino(unparsedType));
+ }
+
+ @ParameterizedTest(name = "[{index}] {0}")
+ @MethodSource("toGravitinoArguments")
+ void testToGravitino(String testName, Field arrowField, Type
expectedGravitinoType) {
+ Type convertedType = CONVERTER.toGravitino(arrowField);
+ assertEquals(expectedGravitinoType, convertedType);
+ }
+
+ private static Stream<Arguments> toArrowFieldArguments() {
+ return Stream.of(
+ // Simple types
+ Arguments.of("age", Types.IntegerType.get(), true, INT_VALIDATOR),
+ Arguments.of("id", Types.LongType.get(), false, INT_VALIDATOR),
+ Arguments.of("name", Types.StringType.get(), true, STRING_VALIDATOR),
+ Arguments.of(
+ "description",
+ Types.ExternalType.of(
+ "{\n"
+ + " \"name\": \"description\",\n"
+ + " \"nullable\": true,\n"
+ + " \"type\": {\n"
+ + " \"name\": \"largeutf8\"\n"
+ + " }\n"
+ + "}"),
+ true,
+ LARGE_UTF8_VALIDATOR),
+ Arguments.of("active", Types.BooleanType.get(), false,
BOOLEAN_VALIDATOR),
+ // Decimal
+ Arguments.of("price", Types.DecimalType.of(10, 2), false,
DECIMAL_VALIDATOR),
+ // List
+ Arguments.of(
+ "numbers", Types.ListType.of(Types.IntegerType.get(), true),
false, LIST_VALIDATOR),
+ // Map
+ Arguments.of(
+ "properties",
+ Types.MapType.of(Types.StringType.get(), Types.IntegerType.get(),
true),
+ true,
+ MAP_VALIDATOR),
+ // Struct
+ Arguments.of("person", SIMPLE_STRUCT, true, STRUCT_VALIDATOR),
+ // Nested Struct
+ Arguments.of("person_nested", NESTED_STRUCT, false,
NESTED_STRUCT_VALIDATOR),
+ Arguments.of(
+ "person_nested_json",
+ Types.ExternalType.of(NESTED_STRUCT_JSON),
+ false,
+ NESTED_STRUCT_VALIDATOR),
+ // List of Structs
+ Arguments.of("items", LIST_OF_STRUCTS, false,
LIST_OF_STRUCTS_VALIDATOR),
+ // Union
+ Arguments.of(
+ "union_field",
+ Types.UnionType.of(Types.IntegerType.get(),
Types.StringType.get()),
+ true,
+ UNION_VALIDATOR));
+ }
+
+ private static Stream<Arguments> toGravitinoArguments() {
+ return Stream.of(
+ // Simple Types
+ Arguments.of(
+ "Boolean",
+ new Field("bool_col", new FieldType(true, ArrowType.Bool.INSTANCE,
null), null),
+ Types.BooleanType.get()),
+ Arguments.of(
+ "Byte",
+ new Field("byte_col", new FieldType(true, new ArrowType.Int(8,
true), null), null),
+ Types.ByteType.get()),
+ Arguments.of(
+ "Short",
+ new Field("short_col", new FieldType(true, new ArrowType.Int(16,
true), null), null),
+ Types.ShortType.get()),
+ Arguments.of(
+ "Integer",
+ new Field("int_col", new FieldType(true, new ArrowType.Int(32,
true), null), null),
+ Types.IntegerType.get()),
+ Arguments.of(
+ "Long",
+ new Field("long_col", new FieldType(true, new ArrowType.Int(64,
true), null), null),
+ Types.LongType.get()),
+ Arguments.of(
+ "Float",
+ new Field(
+ "float_col",
+ new FieldType(
+ true, new
ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null),
+ null),
+ Types.FloatType.get()),
+ Arguments.of(
+ "Double",
+ new Field(
+ "double_col",
+ new FieldType(
+ true, new
ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null),
+ null),
+ Types.DoubleType.get()),
+ Arguments.of(
+ "String",
+ new Field("string_col", new FieldType(true,
ArrowType.Utf8.INSTANCE, null), null),
+ Types.StringType.get()),
+ Arguments.of(
+ "Binary",
+ new Field("binary_col", new FieldType(true,
ArrowType.Binary.INSTANCE, null), null),
+ Types.BinaryType.get()),
+ Arguments.of(
+ "Decimal",
+ new Field(
+ "decimal_col", new FieldType(true, new ArrowType.Decimal(10,
2, 128), null), null),
+ Types.DecimalType.of(10, 2)),
+ Arguments.of(
+ "Date",
+ new Field(
+ "date_col", new FieldType(true, new
ArrowType.Date(DateUnit.DAY), null), null),
+ Types.DateType.get()),
+ Arguments.of(
+ "Timestamp without TZ",
+ new Field(
+ "ts_col",
+ new FieldType(true, new
ArrowType.Timestamp(TimeUnit.MICROSECOND, null), null),
+ null),
+ Types.TimestampType.withoutTimeZone(6)),
+ Arguments.of(
+ "Timestamp with TZ",
+ new Field(
+ "tstz_col",
+ new FieldType(true, new
ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), null),
+ null),
+ Types.TimestampType.withTimeZone(3)),
+ Arguments.of(
+ "Time",
+ new Field(
+ "time_col",
+ new FieldType(true, new ArrowType.Time(TimeUnit.NANOSECOND,
64), null),
+ null),
+ Types.TimeType.get()),
+ Arguments.of(
+ "Fixed",
+ new Field(
+ "fixed_col", new FieldType(true, new
ArrowType.FixedSizeBinary(16), null), null),
+ Types.FixedType.of(16)),
+ // Complex Types
+ Arguments.of(
+ "List",
+ new Field(
+ "list_col",
+ new FieldType(false, ArrowType.List.INSTANCE, null),
+ Collections.singletonList(
+ new Field(
+ "element", new FieldType(true, new ArrowType.Int(32,
true), null), null))),
+ Types.ListType.of(Types.IntegerType.get(), true)),
+ Arguments.of(
+ "Map",
+ new Field(
+ "map_col",
+ new FieldType(true, new ArrowType.Map(false), null),
+ Collections.singletonList(
+ new Field(
+ MapVector.DATA_VECTOR_NAME,
+ new FieldType(false, ArrowType.Struct.INSTANCE, null),
+ Arrays.asList(
+ new Field(
+ MapVector.KEY_NAME,
+ new FieldType(false, ArrowType.Utf8.INSTANCE,
null),
+ null),
+ new Field(
+ MapVector.VALUE_NAME,
+ new FieldType(true, new ArrowType.Int(32,
true), null),
+ null))))),
+ Types.MapType.of(Types.StringType.get(), Types.IntegerType.get(),
true)),
+ Arguments.of(
+ "Struct",
+ new Field(
+ "struct_col",
+ new FieldType(true, ArrowType.Struct.INSTANCE, null),
+ Arrays.asList(
+ new Field("id", new FieldType(false, new ArrowType.Int(64,
true), null), null),
+ new Field("name", new FieldType(true,
ArrowType.Utf8.INSTANCE, null), null))),
+ SIMPLE_STRUCT),
+ Arguments.of(
+ "Union",
+ new Field(
+ "union_col",
+ new FieldType(true, new ArrowType.Union(UnionMode.Sparse, new
int[] {1, 2}), null),
+ Arrays.asList(
+ new Field(
+ "integer", new FieldType(true, new ArrowType.Int(32,
true), null), null),
+ new Field("string", new FieldType(true,
ArrowType.Utf8.INSTANCE, null), null))),
+ Types.UnionType.of(Types.IntegerType.get(),
Types.StringType.get())),
+ // External Type
+ Arguments.of(
+ "External (LargeUtf8)",
+ new Field(
+ "external_col", new FieldType(true,
ArrowType.LargeUtf8.INSTANCE, null), null),
+ Types.ExternalType.of(
+
"{\"name\":\"external_col\",\"nullable\":true,\"type\":{\"name\":\"largeutf8\"},\"children\":[]}")));
+ }
+}
diff --git
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java
index 493d0acace..3acf4cb92e 100644
---
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java
+++
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java
@@ -165,7 +165,7 @@ public class LanceNamespaceOperations {
@Path("{id}/table/list")
public Response listTables(
@PathParam("id") String namespaceId,
- @DefaultValue("$") @QueryParam("delimiter") String delimiter,
+ @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter")
String delimiter,
@QueryParam("page_token") String pageToken,
@QueryParam("limit") Integer limit) {
try {
diff --git
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
index 359fc94c42..17b22710b5 100644
---
a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
+++
b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java
@@ -18,6 +18,8 @@
*/
package org.apache.gravitino.lance.service.rest;
+import static
org.apache.gravitino.lance.common.ops.NamespaceWrapper.NAMESPACE_DELIMITER_DEFAULT;
+
import com.fasterxml.jackson.core.type.TypeReference;
import com.lancedb.lance.namespace.model.CreateTableResponse;
import com.lancedb.lance.namespace.model.DescribeTableResponse;
@@ -54,7 +56,7 @@ public class LanceTableOperations {
@Path("/describe")
public Response describeTable(
@PathParam("id") String tableId,
- @DefaultValue("$") @QueryParam("delimiter") String delimiter) {
+ @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter")
String delimiter) {
try {
DescribeTableResponse response =
lanceNamespace.asTableOps().describeTable(tableId, delimiter);
@@ -71,14 +73,14 @@ public class LanceTableOperations {
public Response createTable(
@PathParam("id") String tableId,
@QueryParam("mode") @DefaultValue("create") String mode, // create,
exist_ok, overwrite
- @QueryParam("delimiter") @DefaultValue("$") String delimiter,
+ @QueryParam("delimiter") @DefaultValue(NAMESPACE_DELIMITER_DEFAULT)
String delimiter,
@HeaderParam("x-lance-table-location") String tableLocation,
@HeaderParam("x-lance-table-properties") String tableProperties,
@HeaderParam("x-lance-root-catalog") String rootCatalog,
byte[] arrowStreamBody) {
try {
Map<String, String> props =
- JsonUtil.mapper().readValue(tableProperties, new
TypeReference<Map<String, String>>() {});
+ JsonUtil.mapper().readValue(tableProperties, new TypeReference<>()
{});
CreateTableResponse response =
lanceNamespace
.asTableOps()
@@ -95,7 +97,7 @@ public class LanceTableOperations {
public Response createEmptyTable(
@PathParam("id") String tableId,
@QueryParam("mode") @DefaultValue("create") String mode, // create,
exist_ok, overwrite
- @QueryParam("delimiter") @DefaultValue("$") String delimiter,
+ @QueryParam("delimiter") @DefaultValue(NAMESPACE_DELIMITER_DEFAULT)
String delimiter,
@HeaderParam("x-lance-table-location") String tableLocation,
@HeaderParam("x-lance-root-catalog") String rootCatalog,
@HeaderParam("x-lance-table-properties") String tableProperties) {
@@ -103,8 +105,7 @@ public class LanceTableOperations {
Map<String, String> props =
StringUtils.isBlank(tableProperties)
? Map.of()
- : JsonUtil.mapper()
- .readValue(tableProperties, new TypeReference<Map<String,
String>>() {});
+ : JsonUtil.mapper().readValue(tableProperties, new
TypeReference<>() {});
CreateTableResponse response =
lanceNamespace
.asTableOps()