This is an automated email from the ASF dual-hosted git repository. yuqi4733 pushed a commit to branch internal-main in repository https://gitbox.apache.org/repos/asf/gravitino.git
commit 8a65fe76d8de7b9c5ed9e635033ee5e261560822 Author: geyanggang <[email protected]> AuthorDate: Mon Jan 19 11:17:56 2026 +0800 [#115]Optimize BigQuery catalog JDBC driver packaging and type handling (#116) * Optimize driver packaging and type handling. * Optimize type handling. --- catalogs/catalog-jdbc-bigquery/build.gradle.kts | 27 ++++++------------- .../BigQueryColumnDefaultValueConverter.java | 14 ++-------- .../bigquery/converter/BigQueryTypeConverter.java | 30 +++++++++------------- .../TestBigQueryColumnDefaultValueConverter.java | 5 ++-- .../converter/TestBigQueryTypeConverter.java | 26 ++++++++++++++----- docs/jdbc-bigquery-catalog.md | 22 +++++++++++++--- 6 files changed, 64 insertions(+), 60 deletions(-) diff --git a/catalogs/catalog-jdbc-bigquery/build.gradle.kts b/catalogs/catalog-jdbc-bigquery/build.gradle.kts index 0bac853e48..099eb458e0 100644 --- a/catalogs/catalog-jdbc-bigquery/build.gradle.kts +++ b/catalogs/catalog-jdbc-bigquery/build.gradle.kts @@ -107,42 +107,33 @@ dependencies { testImplementation(libs.mockito.core) testImplementation(libs.awaitility) + // Simba JDBC driver for compile and test only + // Users must download and install the driver manually in production val simbaJdbcDriver = files( simbaExtractDir.asFileTree.matching { include("*.jar") } ) - implementation(simbaJdbcDriver) + compileOnly(simbaJdbcDriver) + testImplementation(simbaJdbcDriver) testRuntimeOnly(libs.junit.jupiter.engine) } tasks { val runtimeJars by registering(Copy::class) { - dependsOn("jar", extractSimbaDriver) + dependsOn("jar") from(configurations.runtimeClasspath) into("build/libs") duplicatesStrategy = DuplicatesStrategy.EXCLUDE - - doFirst { - logger.lifecycle("Copying runtime dependencies to build/libs") - logger.lifecycle("Including Simba JDBC driver files:") - fileTree(simbaExtractDir).matching { include("*.jar") }.forEach { jar -> - logger.lifecycle(" - ${jar.name}") - } - } } val copyCatalogLibs by registering(Copy::class) { - dependsOn("jar", runtimeJars) + dependsOn("jar", "runtimeJars") from("build/libs") { exclude("guava-*.jar") exclude("log4j-*.jar") exclude("slf4j-*.jar") - exclude("grpc-google-cloud-bigquerystorage-v1beta*.jar") - exclude("proto-google-cloud-bigquerystorage-v1beta*.jar") - exclude("error_prone_annotations-2.33.0.jar") - exclude("failureaccess-1.0.2.jar") } into("$rootDir/distribution/package/catalogs/jdbc-bigquery/libs") @@ -185,8 +176,6 @@ tasks.test { environment("SIMBA_JDBC_DRIVER_PATH", simbaExtractDir.asFile.absolutePath) } -afterEvaluate { - tasks.getByName("generateMetadataFileForMavenJavaPublication") { - dependsOn(tasks.getByName("runtimeJars")) - } +tasks.getByName("generateMetadataFileForMavenJavaPublication") { + dependsOn("runtimeJars") } diff --git a/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryColumnDefaultValueConverter.java b/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryColumnDefaultValueConverter.java index 2b298e465a..2d44dfb45b 100644 --- a/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryColumnDefaultValueConverter.java +++ b/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryColumnDefaultValueConverter.java @@ -66,20 +66,9 @@ public class BigQueryColumnDefaultValueConverter extends JdbcColumnDefaultValueC } case BigQueryTypeConverter.NUMERIC: - case BigQueryTypeConverter.BIGNUMERIC: try { Integer precision = type.getColumnSize(); Integer scale = type.getScale(); - // Gravitino Decimal supports precision up to 38, but BigQuery BIGNUMERIC supports up to - // 76.76 - // For BIGNUMERIC with precision > 38, we cap it at 38 for Gravitino compatibility - if (precision != null && precision > 38) { - precision = 38; - // Adjust scale proportionally if needed - if (scale != null && scale > 38) { - scale = 38; - } - } if (precision != null && scale != null) { return Literals.decimalLiteral(Decimal.of(trimmedValue, precision, scale)); } else { @@ -100,7 +89,8 @@ public class BigQueryColumnDefaultValueConverter extends JdbcColumnDefaultValueC return Literals.stringLiteral(trimmedValue); default: - // For other types, return as unparsed expression + // For other types (including BIGNUMERIC, GEOGRAPHY, JSON, STRUCT, RANGE), + // return as unparsed expression return UnparsedExpression.of(columnDefaultValue); } } diff --git a/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryTypeConverter.java b/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryTypeConverter.java index eb30aafbf7..3b2ec0d4ab 100644 --- a/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryTypeConverter.java +++ b/catalogs/catalog-jdbc-bigquery/src/main/java/org/apache/gravitino/catalog/bigquery/converter/BigQueryTypeConverter.java @@ -23,6 +23,8 @@ public class BigQueryTypeConverter extends JdbcTypeConverter { static final String TIMESTAMP = "timestamp"; static final String GEOGRAPHY = "geography"; static final String JSON = "json"; + static final String STRUCT = "struct"; + static final String RANGE = "range"; @Override public Type toGravitino(JdbcTypeBean typeBean) { @@ -45,21 +47,6 @@ public class BigQueryTypeConverter extends JdbcTypeConverter { } else { return Types.DecimalType.of(38, 9); } - case BIGNUMERIC: - // BigQuery BIGNUMERIC has precision up to 76.76, scale up to 38 by default - // But Gravitino DecimalType is limited to precision 38, so we limit it to fit Gravitino's - // constraints - Integer bigNumericPrecision = typeBean.getColumnSize(); - Integer bigNumericScale = typeBean.getScale(); - if (bigNumericPrecision != null && bigNumericScale != null) { - // Limit precision to Gravitino's maximum (38 digits) - int limitedPrecision = Math.min(bigNumericPrecision, 38); - int limitedScale = Math.min(bigNumericScale, limitedPrecision); - return Types.DecimalType.of(limitedPrecision, limitedScale); - } else { - // Use Gravitino's maximum precision - return Types.DecimalType.of(38, 38); - } case STRING: return Types.StringType.get(); case BYTES: @@ -78,12 +65,20 @@ public class BigQueryTypeConverter extends JdbcTypeConverter { return Optional.ofNullable(typeBean.getDatetimePrecision()) .map(Types.TimestampType::withTimeZone) .orElseGet(Types.TimestampType::withTimeZone); + case BIGNUMERIC: case GEOGRAPHY: case JSON: - // Handle GEOGRAPHY and JSON as external types with uppercase type name + case STRUCT: + case RANGE: + // Handle BIGNUMERIC, GEOGRAPHY, JSON, STRUCT, and RANGE as external types + // BIGNUMERIC: Avoid precision loss (BigQuery supports approximately 76.8 digits, + // Gravitino Decimal only up to 38) + // GEOGRAPHY, JSON, STRUCT, RANGE: Complex types not directly supported by Gravitino + // Preserve the full type definition with uppercase type name return Types.ExternalType.of(typeBean.getTypeName().toUpperCase()); default: - // For complex types like ARRAY, STRUCT, RANGE, preserve the full type definition + // For complex types like ARRAY<T>, STRUCT<...>, RANGE<T>, preserve the full type + // definition // The typeName from JDBC should contain the complete type like "ARRAY<STRING>" // We need to preserve this for proper SQL generation return Types.ExternalType.of(typeBean.getTypeName().toUpperCase()); @@ -133,7 +128,6 @@ public class BigQueryTypeConverter extends JdbcTypeConverter { : baseType; } else if (type instanceof Types.DecimalType decimalType) { // BigQuery NUMERIC: precision 1-38, scale 0-9 (or 0-precision) - // BigQuery BIGNUMERIC: precision 1-76, scale 0-38 (or 0-precision) // For Gravitino DecimalType (max precision 38), always use NUMERIC return String.format("NUMERIC(%d, %d)", decimalType.precision(), decimalType.scale()); } else if (type instanceof Types.ExternalType) { diff --git a/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryColumnDefaultValueConverter.java b/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryColumnDefaultValueConverter.java index b2d8b79c9e..039bd7684d 100644 --- a/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryColumnDefaultValueConverter.java +++ b/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryColumnDefaultValueConverter.java @@ -96,10 +96,11 @@ public class TestBigQueryColumnDefaultValueConverter { Expression result = converter.toGravitino(numericTypeBean, "123.45", false, true); assertTrue(result instanceof Literals.LiteralImpl); - // BIGNUMERIC type + // BIGNUMERIC type - now treated as external type, returns unparsed expression JdbcTypeConverter.JdbcTypeBean bigNumericTypeBean = createTypeBean("bignumeric", 20, 5, null); result = converter.toGravitino(bigNumericTypeBean, "12345.67890", false, true); - assertTrue(result instanceof Literals.LiteralImpl); + assertTrue(result instanceof UnparsedExpression); + assertEquals("12345.67890", ((UnparsedExpression) result).unparsedExpression()); // Invalid decimal should return unparsed expression result = converter.toGravitino(numericTypeBean, "invalid_decimal", false, true); diff --git a/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryTypeConverter.java b/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryTypeConverter.java index c42b662d3e..14a394ed3f 100644 --- a/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryTypeConverter.java +++ b/catalogs/catalog-jdbc-bigquery/src/test/java/org/apache/gravitino/catalog/bigquery/converter/TestBigQueryTypeConverter.java @@ -74,7 +74,7 @@ public class TestBigQueryTypeConverter { assertEquals("NUMERIC(38, 2)", typeConverter.fromGravitino(Types.DecimalType.of(38, 2))); // Note: Gravitino DecimalType is limited to precision 38 - // So we always use NUMERIC, never BIGNUMERIC + // For BIGNUMERIC, use ExternalType or UnparsedType } @Test @@ -192,14 +192,13 @@ public class TestBigQueryTypeConverter { Types.DecimalType.of(38, 9), typeConverter.toGravitino(createTypeBean("numeric", null, null, null))); - // BIGNUMERIC with precision and scale (limited to Gravitino's max precision) + // BIGNUMERIC is now treated as ExternalType to avoid precision loss assertEquals( - Types.DecimalType.of(38, 10), + Types.ExternalType.of("BIGNUMERIC"), typeConverter.toGravitino(createTypeBean("bignumeric", 38, 10, null))); - // BIGNUMERIC with default precision and scale (limited to Gravitino's max precision) assertEquals( - Types.DecimalType.of(38, 38), + Types.ExternalType.of("BIGNUMERIC"), typeConverter.toGravitino(createTypeBean("bignumeric", null, null, null))); } @@ -215,16 +214,31 @@ public class TestBigQueryTypeConverter { Types.ExternalType.of("JSON"), typeConverter.toGravitino(createTypeBean("json", null, null, null))); + // STRUCT + assertEquals( + Types.ExternalType.of("STRUCT"), + typeConverter.toGravitino(createTypeBean("struct", null, null, null))); + + // RANGE + assertEquals( + Types.ExternalType.of("RANGE"), + typeConverter.toGravitino(createTypeBean("range", null, null, null))); + // Array type assertEquals( Types.ExternalType.of("ARRAY<STRING>"), typeConverter.toGravitino(createTypeBean("array<string>", null, null, null))); - // Struct type + // Struct type with full definition assertEquals( Types.ExternalType.of("STRUCT<NAME STRING, AGE INT64>"), typeConverter.toGravitino( createTypeBean("struct<name string, age int64>", null, null, null))); + + // Range type with full definition + assertEquals( + Types.ExternalType.of("RANGE<DATE>"), + typeConverter.toGravitino(createTypeBean("range<date>", null, null, null))); } @Test diff --git a/docs/jdbc-bigquery-catalog.md b/docs/jdbc-bigquery-catalog.md index b50aeeda6c..130fb0d865 100644 --- a/docs/jdbc-bigquery-catalog.md +++ b/docs/jdbc-bigquery-catalog.md @@ -18,6 +18,19 @@ Apache Gravitino provides the ability to manage Google BigQuery metadata through Gravitino saves some system information in schema and table comment, like `(From Gravitino, DO NOT EDIT: gravitino.v1.uid1078334182909406185)`, please don't change or remove this message. ::: +### JDBC Driver Installation + +The BigQuery JDBC driver is not included in the Gravitino distribution. You must download and install it manually: + +1. Download the Simba JDBC Driver for Google BigQuery from the [official website](https://cloud.google.com/bigquery/docs/reference/odbc-jdbc-drivers) +2. Extract the downloaded ZIP file +3. Copy all JAR files (except Jackson JARs to avoid conflicts) to `${GRAVITINO_HOME}/catalogs/jdbc-bigquery/libs/` +4. Restart the Gravitino server + +:::note +The recommended driver version is 1.6.5.1001 or later. Make sure to exclude Jackson-related JARs from the driver package to avoid dependency conflicts with Gravitino. +::: + ## Catalog ### Catalog capabilities @@ -138,14 +151,16 @@ Refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metada | `Geography` | `GEOGRAPHY` | | `Json` | `JSON` | | `Range<T>` | `RANGE<T>` | +| `BigNumeric` | `BIGNUMERIC` | :::info BigQuery doesn't support Gravitino `Fixed` `IntervalDay` `IntervalYear` `Union` `UUID` type. Meanwhile, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type. +**Note on BIGNUMERIC**: BigQuery's BIGNUMERIC type supports precision of approximately 76.8 digits (the 77th digit is partial), which exceeds Gravitino's DecimalType maximum precision of 38. To avoid precision loss, BIGNUMERIC is mapped to ExternalType and preserved as-is. Use the API with ExternalType or UnparsedType to work with BIGNUMERIC columns. + Unsupported types will be optimized in future versions. The following types are recommended to use `string` type as a workaround: - INTERVAL -- BIGNUMERIC (Note: BIGNUMERIC precision above 38 is truncated to fit Gravitino's DecimalType limits) ::: ### Table properties @@ -302,9 +317,10 @@ Gravitino supports these table alteration operations: - External tables, views, table clones not supported - Dataset properties like `failover_reservation`, `is_primary`, `primary_replica`, `collate` not supported - Web UI does not support table partitioning and clustering (API only) - - Web UI does not support complex data types (ARRAY, STRUCT, GEOGRAPHY, JSON) (API only) - - BIGNUMERIC precision above (38,9) has precision loss; INTERVAL type currently not supported + - Web UI does not support complex data types (ARRAY, STRUCT, GEOGRAPHY, JSON, RANGE, BIGNUMERIC) (API only) + - INTERVAL type currently not supported - Unsupported data types display as "external" in Web UI + - JDBC driver must be manually downloaded and installed (not included in distribution) 2. **Performance Considerations** - Table metadata loading uses JDBC which may be slower than native API calls
