This is an automated email from the ASF dual-hosted git repository. voonhous pushed a commit to tag rfc-105-pre-cleanup in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 77b10f539814997b4ffd15787ec88b1dd9a84a20 Author: voon <[email protected]> AuthorDate: Sat May 23 01:38:03 2026 +0800 fix(trino): port hudi-trino-plugin to Trino 480 and slim bundle deps Two concerns interleave here so they land together: Source porting (hudi-trino-plugin): - NodeVersion moved from io.trino.plugin.hive to io.trino.spi. - TranslateHiveViews annotation removed; bind no longer needed (hive.hive-views.enabled config drives this in Trino 480). - ClosingBinder moved from io.trino.plugin.base to io.airlift.bootstrap. - HiveMetastoreModule now takes (Optional<HiveMetastore>, boolean impersonationEnabled); pass false. - FileSystemModule now takes (catalogName, ConnectorContext, boolean); drop the NodeManager/OpenTelemetry extraction and pass context through. - ConnectorPageSource.getNextPage() renamed to getNextSourcePage() returning SourcePage. Updated HudiBaseFileOnlyPageSource, HudiPageSource, and HudiTrinoReaderContext (unwraps SourcePage to Page via getPage() to preserve the serializer's Block accessors). - Type.getObjectValue(...) dropped its first ConnectorSession arg. - ParquetReaderOptions made the no-arg ctor private and the withIgnoreStatistics chained setter inaccessible; switched to ParquetReaderOptions.builder(options)...build(). - ParquetReader constructor added two args (appendRowNumberColumn at position 3, Optional<FileDecryptionContext> as last). Lambda becomes (fields, appendRowNumberColumn). - HivePageSourceProvider.projectBaseColumns and ReaderColumns gone; pass columns directly to createParquetPageSource which now handles projection internally via TransformConnectorPageSource. - ConnectorMetadata.getInfo signature gained a ConnectorSession arg. - ConnectorSplit.getSplitInfo removed from the SPI; dropped the @Override and kept the method for local diagnostics. - Root pom: split trino.connector.version (480, main) and a new trino.connector.test.version (480-SNAPSHOT, test-jar classifier). Trino does not publish test-jars for tagged releases, so the test scaffolding deps have to track a snapshot. Bundle dep hygiene (packaging/hudi-trino-bundle): - hudi-hadoop-mr-bundle swapped for the plain hudi-hadoop-mr (which is what the shade artifactSet was already configured to include). The fat-jar variant dragged lance-core, arrow-dataset, and rocksdbjni transitively (~280MB) into the Trino server tar. - Broad SPI exclusions on hudi-trino-plugin (jackson, airlift, trino, guice, opentelemetry, jol) keep transitive SPI-provided libs out of consumers (trino-maven-plugin:check-spi-dependencies enforces this). - Jackson exclusions extended to hudi-hadoop-mr, hudi-client-common, hudi-java-client, and avro (avro brings jackson-core/databind). - rocksdb, arrow, lance exclusions on hudi-trino-plugin, hudi-hadoop-mr, and hudi-java-client (the connector does not use these and they were inflating the consumer's transitive closure). - hudi-timeline-service excluded from hudi-client-common (it pulls Jetty + javax.servlet-api which Trino's enforcer bans). - README: split-version cleanup notes for publish readiness. --- hudi-trino-plugin/pom.xml | 8 +- .../plugin/hudi/HudiBaseFileOnlyPageSource.java | 7 +- .../io/trino/plugin/hudi/HudiConnectorFactory.java | 12 +- .../java/io/trino/plugin/hudi/HudiMetadata.java | 2 +- .../main/java/io/trino/plugin/hudi/HudiModule.java | 6 +- .../java/io/trino/plugin/hudi/HudiPageSource.java | 5 +- .../trino/plugin/hudi/HudiPageSourceProvider.java | 24 ++- .../main/java/io/trino/plugin/hudi/HudiSplit.java | 2 +- .../plugin/hudi/reader/HudiTrinoReaderContext.java | 7 +- .../trino/plugin/hudi/util/HudiAvroSerializer.java | 2 +- packaging/hudi-trino-bundle/README.md | 35 +++- packaging/hudi-trino-bundle/pom.xml | 192 ++++++++++++++++++++- pom.xml | 8 +- 13 files changed, 271 insertions(+), 39 deletions(-) diff --git a/hudi-trino-plugin/pom.xml b/hudi-trino-plugin/pom.xml index 0db6338fbaa3..5f699d03a110 100644 --- a/hudi-trino-plugin/pom.xml +++ b/hudi-trino-plugin/pom.xml @@ -33,6 +33,9 @@ <properties> <!-- Trino SPI / library version this connector compiles against. Bumped via root pom. --> <dep.trino.version>${trino.connector.version}</dep.trino.version> + <!-- Tests reuse Trino's internal test scaffolding, which only ships under the + test-jar classifier in snapshot builds. See root pom for details. --> + <dep.trino.test.version>${trino.connector.test.version}</dep.trino.test.version> <trino.parquet.version>1.15.2</trino.parquet.version> <!-- Connector requires JDK 25 to compile and run (matches Trino runtime requirement). --> <hudi.trino.java.version>25</hudi.trino.java.version> @@ -340,6 +343,7 @@ <dependency> <groupId>io.trino</groupId> <artifactId>trino-filesystem</artifactId> + <version>${dep.trino.test.version}</version> <type>test-jar</type> <scope>test</scope> </dependency> @@ -353,6 +357,7 @@ <dependency> <groupId>io.trino</groupId> <artifactId>trino-hive</artifactId> + <version>${dep.trino.test.version}</version> <type>test-jar</type> <scope>test</scope> </dependency> @@ -366,6 +371,7 @@ <dependency> <groupId>io.trino</groupId> <artifactId>trino-main</artifactId> + <version>${dep.trino.test.version}</version> <type>test-jar</type> <scope>test</scope> </dependency> @@ -380,7 +386,7 @@ <!-- BOM does not manage test-jar coordinate; declare the version explicitly. --> <groupId>io.trino</groupId> <artifactId>trino-spi</artifactId> - <version>${dep.trino.version}</version> + <version>${dep.trino.test.version}</version> <type>test-jar</type> <scope>test</scope> </dependency> diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java index 1180638a7cc4..a8955408ebf4 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java @@ -19,6 +19,7 @@ import io.trino.plugin.hudi.util.SynthesizedColumnHandler; import io.trino.spi.Page; import io.trino.spi.block.Block; import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.connector.SourcePage; import java.io.IOException; import java.util.HashMap; @@ -84,9 +85,9 @@ public class HudiBaseFileOnlyPageSource } @Override - public Page getNextPage() + public SourcePage getNextSourcePage() { - Page physicalSourcePage = dataPageSource.getNextPage(); + SourcePage physicalSourcePage = dataPageSource.getNextSourcePage(); if (physicalSourcePage == null) { return null; } @@ -108,7 +109,7 @@ public class HudiBaseFileOnlyPageSource outputBlocks[i] = synthesizedColumnHandler.createRleSynthesizedBlock(outputColumn, positionCount); } } - return new Page(outputBlocks); + return SourcePage.create(new Page(outputBlocks)); } @Override diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java index 0db65192baf8..226c7c507b4c 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiConnectorFactory.java @@ -30,9 +30,9 @@ import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorSplitManager; import io.trino.plugin.base.classloader.ClassLoaderSafeNodePartitioningProvider; import io.trino.plugin.base.jmx.MBeanServerModule; import io.trino.plugin.base.session.SessionPropertiesProvider; -import io.trino.plugin.hive.NodeVersion; import io.trino.plugin.hive.metastore.HiveMetastoreModule; import io.trino.spi.NodeManager; +import io.trino.spi.NodeVersion; import io.trino.spi.catalog.CatalogName; import io.trino.spi.classloader.ThreadContextClassLoader; import io.trino.spi.connector.Connector; @@ -80,7 +80,7 @@ public class HudiConnectorFactory new MBeanModule(), new JsonModule(), new HudiModule(), - new HiveMetastoreModule(Optional.empty()), + new HiveMetastoreModule(Optional.empty(), false), new HudiFileSystemModule(catalogName, context), new MBeanServerModule(), module.orElse(EMPTY_MODULE), @@ -123,21 +123,19 @@ public class HudiConnectorFactory extends AbstractConfigurationAwareModule { private final String catalogName; - private final NodeManager nodeManager; - private final OpenTelemetry openTelemetry; + private final ConnectorContext context; public HudiFileSystemModule(String catalogName, ConnectorContext context) { this.catalogName = requireNonNull(catalogName, "catalogName is null"); - this.nodeManager = context.getNodeManager(); - this.openTelemetry = context.getOpenTelemetry(); + this.context = requireNonNull(context, "context is null"); } @Override protected void setup(Binder binder) { boolean metadataCacheEnabled = buildConfigObject(HudiConfig.class).isMetadataCacheEnabled(); - install(new FileSystemModule(catalogName, nodeManager, openTelemetry, metadataCacheEnabled)); + install(new FileSystemModule(catalogName, context, metadataCacheEnabled)); } } } diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiMetadata.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiMetadata.java index 90bdd2c6e2fd..9f49718f77ba 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiMetadata.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiMetadata.java @@ -278,7 +278,7 @@ public class HudiMetadata } @Override - public Optional<Object> getInfo(ConnectorTableHandle tableHandle) + public Optional<Object> getInfo(ConnectorSession session, ConnectorTableHandle tableHandle) { HudiTableHandle table = (HudiTableHandle) tableHandle; return Optional.of(new HudiTableInfo(table.getSchemaTableName(), table.getTableType().name(), table.getBasePath())); diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiModule.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiModule.java index bd3c1923ebad..a696cd8218d9 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiModule.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiModule.java @@ -26,7 +26,6 @@ import io.trino.plugin.base.session.SessionPropertiesProvider; import io.trino.plugin.hive.HideDeltaLakeTables; import io.trino.plugin.hive.HiveNodePartitioningProvider; import io.trino.plugin.hive.HiveTransactionHandle; -import io.trino.plugin.hive.metastore.thrift.TranslateHiveViews; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.plugin.hive.parquet.ParquetWriterConfig; import io.trino.plugin.hudi.cache.HudiCacheKeyProvider; @@ -44,7 +43,7 @@ import static com.google.inject.multibindings.Multibinder.newSetBinder; import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; import static io.airlift.concurrent.Threads.daemonThreadsNamed; import static io.airlift.configuration.ConfigBinder.configBinder; -import static io.trino.plugin.base.ClosingBinder.closingBinder; +import static io.airlift.bootstrap.ClosingBinder.closingBinder; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; import static org.weakref.jmx.guice.ExportBinder.newExporter; @@ -59,7 +58,8 @@ public class HudiModule configBinder(binder).bindConfig(HudiConfig.class); - binder.bind(boolean.class).annotatedWith(TranslateHiveViews.class).toInstance(false); + // Trino 480 removed the TranslateHiveViews annotation; thrift metastore now reads + // hive.hive-views.enabled config directly. No binding needed. binder.bind(boolean.class).annotatedWith(HideDeltaLakeTables.class).toInstance(false); newSetBinder(binder, SessionPropertiesProvider.class).addBinding().to(HudiSessionProperties.class).in(Scopes.SINGLETON); diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSource.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSource.java index 5186341b0a50..a4c4b151c548 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSource.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSource.java @@ -20,6 +20,7 @@ import io.trino.plugin.hudi.util.SynthesizedColumnHandler; import io.trino.spi.Page; import io.trino.spi.PageBuilder; import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.connector.SourcePage; import io.trino.spi.metrics.Metrics; import org.apache.avro.generic.IndexedRecord; import org.apache.hudi.common.table.read.HoodieFileGroupReader; @@ -103,7 +104,7 @@ public class HudiPageSource } @Override - public Page getNextPage() + public SourcePage getNextSourcePage() { checkState(pageBuilder.isEmpty(), "PageBuilder is not empty at the beginning of a new page"); while (recordIterator.hasNext()) { @@ -112,7 +113,7 @@ public class HudiPageSource Page newPage = pageBuilder.build(); pageBuilder.reset(); - return newPage; + return SourcePage.create(newPage); } @Override diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java index 980739af6dcd..bae76b737a31 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java @@ -34,7 +34,6 @@ import io.trino.parquet.reader.ParquetReader; import io.trino.parquet.reader.RowGroupInfo; import io.trino.plugin.base.metrics.FileFormatDataSourceStats; import io.trino.plugin.hive.HiveColumnHandle; -import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.plugin.hudi.file.HudiBaseFile; import io.trino.plugin.hudi.reader.HudiTrinoReaderContext; @@ -81,7 +80,6 @@ import static io.trino.parquet.ParquetTypeUtils.getDescriptors; import static io.trino.parquet.predicate.PredicateUtils.buildPredicate; import static io.trino.parquet.predicate.PredicateUtils.getFilteredRowGroups; import static io.trino.plugin.hive.HiveColumnHandle.partitionColumnHandle; -import static io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns; import static io.trino.plugin.hive.parquet.ParquetPageSourceFactory.ParquetReaderProvider; import static io.trino.plugin.hive.parquet.ParquetPageSourceFactory.createDataSource; import static io.trino.plugin.hive.parquet.ParquetPageSourceFactory.createParquetPageSource; @@ -105,7 +103,6 @@ import static io.trino.plugin.hudi.HudiUtil.getLatestTableSchema; import static io.trino.plugin.hudi.HudiUtil.prependHudiMetaAndOrderingColumns; import static java.lang.String.format; import static java.util.Objects.requireNonNull; -import static java.util.stream.Collectors.toUnmodifiableList; public class HudiPageSourceProvider implements ConnectorPageSourceProvider @@ -191,14 +188,15 @@ public class HudiPageSourceProvider hudiSplit, fileSystem.newInputFile(Location.of(hudiBaseFileOpt.get().getPath()), hudiBaseFileOpt.get().getFileSize()), dataSourceStats, - options + ParquetReaderOptions.builder(options) .withIgnoreStatistics(isParquetIgnoreStatistics(session)) .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) .withSmallFileThreshold(getParquetSmallFileThreshold(session)) .withUseColumnIndex(isParquetUseColumnIndex(session)) .withBloomFilter(useParquetBloomFilter(session)) - .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)), + .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)) + .build(), timeZone, dynamicFilter, isBaseFileOnly); SynthesizedColumnHandler synthesizedColumnHandler = SynthesizedColumnHandler.create(hudiSplit); @@ -307,17 +305,14 @@ public class HudiPageSourceProvider DOMAIN_COMPACTION_THRESHOLD, options); - Optional<ReaderColumns> readerProjections = projectBaseColumns(columns); - List<HiveColumnHandle> baseColumns = readerProjections.map(projection -> - projection.get().stream() - .map(HiveColumnHandle.class::cast) - .collect(toUnmodifiableList())) - .orElse(columns); + // Trino 480 dropped projectBaseColumns/ReaderColumns; createParquetPageSource handles + // base-column projection internally via TransformConnectorPageSource. ParquetDataSourceId dataSourceId = dataSource.getId(); ParquetDataSource finalDataSource = dataSource; - ParquetReaderProvider parquetReaderProvider = fields -> new ParquetReader( + ParquetReaderProvider parquetReaderProvider = (fields, appendRowNumberColumn) -> new ParquetReader( Optional.ofNullable(fileMetaData.getCreatedBy()), fields, + appendRowNumberColumn, rowGroups, finalDataSource, timeZone, @@ -325,8 +320,9 @@ public class HudiPageSourceProvider options, exception -> handleException(dataSourceId, exception), Optional.of(parquetPredicate), - Optional.empty()); - return createParquetPageSource(baseColumns, fileSchema, messageColumn, useColumnNames, parquetReaderProvider); + Optional.empty(), + parquetMetadata.getDecryptionContext()); + return createParquetPageSource(columns, fileSchema, messageColumn, useColumnNames, parquetReaderProvider); } catch (IOException | RuntimeException e) { try { diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiSplit.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiSplit.java index e2de10ca98a4..0f7b520e4e64 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiSplit.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiSplit.java @@ -80,7 +80,7 @@ public class HudiSplit this.cachingHostAddresses = requireNonNull(cachingHostAddresses, "cachingHostAddresses is null"); } - @Override + // Trino 480 removed getSplitInfo from ConnectorSplit. Method retained for local diagnostics. public Map<String, String> getSplitInfo() { return ImmutableMap.<String, String>builder() diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/reader/HudiTrinoReaderContext.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/reader/HudiTrinoReaderContext.java index cb1bc2e09a83..ed9ddb261e4a 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/reader/HudiTrinoReaderContext.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/reader/HudiTrinoReaderContext.java @@ -18,6 +18,7 @@ import io.trino.plugin.hudi.util.HudiAvroSerializer; import io.trino.plugin.hudi.util.SynthesizedColumnHandler; import io.trino.spi.Page; import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.connector.SourcePage; import org.apache.avro.generic.IndexedRecord; import org.apache.hudi.avro.AvroRecordContext; import org.apache.hudi.common.config.RecordMergeMode; @@ -119,8 +120,10 @@ public class HudiTrinoReaderContext return false; } - // Get next page and reset currentPosition - currentPage = pageSource.getNextPage(); + // Get next page and reset currentPosition. Unwrap the SourcePage to the + // underlying Page so the serializer's Block accessors keep working. + SourcePage nextSourcePage = pageSource.getNextSourcePage(); + currentPage = nextSourcePage == null ? null : nextSourcePage.getPage(); currentPosition = 0; // If no more pages are available diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/util/HudiAvroSerializer.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/util/HudiAvroSerializer.java index 2fc020020252..24c653511fe0 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/util/HudiAvroSerializer.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/util/HudiAvroSerializer.java @@ -135,7 +135,7 @@ public class HudiAvroSerializer public Object getValue(Page sourcePage, int channel, int position) { - return columnTypes.get(channel).getObjectValue(null, sourcePage.getBlock(channel), position); + return columnTypes.get(channel).getObjectValue(sourcePage.getBlock(channel), position); } public void buildRecordInPage(PageBuilder pageBuilder, IndexedRecord record) diff --git a/packaging/hudi-trino-bundle/README.md b/packaging/hudi-trino-bundle/README.md index 8f7d99dad154..e73f2dd0903e 100644 --- a/packaging/hudi-trino-bundle/README.md +++ b/packaging/hudi-trino-bundle/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + # hudi-trino-bundle Shaded jar that ships the Trino-Hudi connector. Published as `org.apache.hudi:hudi-trino-bundle` (RFC-105). The Trino-side `HudiPlugin` shim depends on this artifact and loads `io.trino.plugin.hudi.HudiConnectorFactory` from it. @@ -67,4 +84,20 @@ JAVA_HOME=$(/usr/libexec/java_home -v 25) \ mvn deploy -Phudi-trino -pl hudi-trino-plugin,packaging/hudi-trino-bundle -DskipTests ``` -For release candidates, include this bundle in the RC validation step alongside the other published Hudi bundles. \ No newline at end of file +For release candidates, include this bundle in the RC validation step alongside the other published Hudi bundles. + +### Before publishing: switch Trino version off SNAPSHOT + +The root pom currently sets `<trino.connector.version>480-SNAPSHOT</trino.connector.version>` because the connector tests depend on `trino-*-tests.jar` artifacts, and Trino only publishes the test-jar classifier for snapshot builds (not for tagged releases). Shipping a bundle whose installed POM references a Trino SNAPSHOT version is awkward for downstream consumers. + +Before cutting a release, split the property so main jars resolve to a Trino release while test-jars stay on snapshot: + +1. Add a sibling property to the root pom: + ``` + <trino.connector.version>480</trino.connector.version> + <trino.connector.test.version>480-SNAPSHOT</trino.connector.test.version> + ``` +2. In `hudi-trino-plugin/pom.xml`, change each `<type>test-jar</type>` dependency to use `${dep.trino.test.version}` instead of inheriting from the main version. +3. Verify with `mvn dependency:tree -Phudi-trino -pl hudi-trino-plugin` that compile deps resolve at `480` and test deps at `480-SNAPSHOT`. + +Alternative if the release pipeline must not touch SNAPSHOT artifacts at all: build with `-Dmaven.test.skip=true` so test-jar deps are never resolved. Trade-off is no unit test run during the release build; CI should still exercise tests separately under the snapshot path. \ No newline at end of file diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml index c58d7ae3c80b..8c22384b9551 100644 --- a/packaging/hudi-trino-bundle/pom.xml +++ b/packaging/hudi-trino-bundle/pom.xml @@ -31,8 +31,23 @@ <checkstyle.skip>true</checkstyle.skip> <main.basedir>${project.parent.basedir}</main.basedir> <skipTests>true</skipTests> + <dep.trino.version>${trino.connector.version}</dep.trino.version> </properties> + <dependencyManagement> + <dependencies> + <!-- Import Trino's parent POM as a BOM so the SPI artifacts declared as + provided below resolve to versions matching the connector code. --> + <dependency> + <groupId>io.trino</groupId> + <artifactId>trino-root</artifactId> + <version>${dep.trino.version}</version> + <type>pom</type> + <scope>import</scope> + </dependency> + </dependencies> + </dependencyManagement> + <build> <plugins> <plugin> @@ -170,14 +185,95 @@ <dependencies> <!-- Hoodie --> <dependency> + <!-- Plugin classes are shaded into this bundle. Its compile-scope deps on Trino, + Airlift, Jackson, Guice, OpenTelemetry are SPI-provided at Trino runtime and + must NOT leak into the bundle's transitive closure (trino-maven-plugin's + check-spi-dependencies enforces this on the consumer side). --> <groupId>org.apache.hudi</groupId> <artifactId>hudi-trino-plugin</artifactId> <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.module</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.datatype</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>io.airlift</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>io.trino</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.inject</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>io.opentelemetry</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.openjdk.jol</groupId> + <artifactId>*</artifactId> + </exclusion> + <!-- The connector does not use rocksdb (Hudi's local state stores), arrow, or lance. + These leak via hudi-common's compile deps and bloat the Trino server tar. --> + <exclusion> + <groupId>org.rocksdb</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.arrow</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.lance</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> + <!-- Use the plain hudi-hadoop-mr artifact (already listed in the shade artifactSet). + hudi-hadoop-mr-bundle pulled lance-core, arrow-dataset, and rocksdbjni transitively + (~280MB), which the connector does not use and which blew the trino-server tar size cap. --> <groupId>org.apache.hudi</groupId> - <artifactId>hudi-hadoop-mr-bundle</artifactId> + <artifactId>hudi-hadoop-mr</artifactId> <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.module</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.datatype</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.rocksdb</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.arrow</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.lance</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.apache.hudi</groupId> @@ -188,12 +284,56 @@ <artifactId>guava</artifactId> <groupId>com.google.guava</groupId> </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.module</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.datatype</groupId> + <artifactId>*</artifactId> + </exclusion> + <!-- hudi-timeline-service pulls Jetty + javax.servlet-api, banned by Trino's enforcer. + The Trino connector does not use the timeline service. --> + <exclusion> + <groupId>org.apache.hudi</groupId> + <artifactId>hudi-timeline-service</artifactId> + </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.hudi</groupId> <artifactId>hudi-java-client</artifactId> <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.module</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.datatype</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.rocksdb</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.arrow</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>org.lance</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <!-- Kryo --> @@ -218,6 +358,16 @@ <artifactId>avro</artifactId> <version>${avro.version}</version> <scope>compile</scope> + <exclusions> + <!-- Jackson is SPI-provided by Trino at runtime. Avro's transitive jackson-core/databind + must not leak into consumers (check-spi-dependencies). The shaded avro classes within + this bundle reference unrelocated com.fasterxml.jackson.* which the Trino runtime + supplies. --> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <!--protobuf needs to be shaded because HBase 1.2.3 + native HFile reader needs it at runtime, @@ -229,6 +379,46 @@ <scope>${trino.bundle.bootstrap.scope}</scope> </dependency> + <!-- Trino SPI artifacts. Declared here as provided to override the compile-scope + transitives pulled in via hudi-trino-plugin -> trino-hive/etc. Trino's server + supplies these at runtime; bundling them would cause split-classloader bugs + and breaks the trino-maven-plugin:check-spi-dependencies enforcer. --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>io.airlift</groupId> + <artifactId>slice</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>io.opentelemetry</groupId> + <artifactId>opentelemetry-api</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>io.opentelemetry</groupId> + <artifactId>opentelemetry-api-incubator</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>io.opentelemetry</groupId> + <artifactId>opentelemetry-context</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>io.trino</groupId> + <artifactId>trino-spi</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.openjdk.jol</groupId> + <artifactId>jol-core</artifactId> + <scope>provided</scope> + </dependency> + </dependencies> <profiles> <profile> diff --git a/pom.xml b/pom.xml index 75866f586f60..66b57428ee69 100644 --- a/pom.xml +++ b/pom.xml @@ -86,7 +86,7 @@ <maven-surefire-plugin.version>3.5.4</maven-surefire-plugin.version> <maven-failsafe-plugin.version>3.5.4</maven-failsafe-plugin.version> <!-- bump to 3.5.3 to fix MSHADE-461 --> - <maven-shade-plugin.version>3.5.3</maven-shade-plugin.version> + <maven-shade-plugin.version>3.6.2</maven-shade-plugin.version> <maven-javadoc-plugin.version>3.12.0</maven-javadoc-plugin.version> <maven-compiler-plugin.version>3.14.1</maven-compiler-plugin.version> <maven-deploy-plugin.version>2.4</maven-deploy-plugin.version> @@ -129,8 +129,12 @@ <hive.avro.version>1.11.4</hive.avro.version> <presto.version>0.273</presto.version> <trino.version>390</trino.version> - <!-- Trino SPI version that hudi-trino-plugin (RFC-105) compiles against. --> + <!-- Trino SPI version that hudi-trino-plugin main code compiles against. --> <trino.connector.version>480</trino.connector.version> + <!-- Trino version used only for *-tests.jar classifier deps. Trino does not publish + test-jars for tagged releases, so this tracks a snapshot. Built locally via the + Trino source tree; consumers running tests need the snapshot in their m2. --> + <trino.connector.test.version>480-SNAPSHOT</trino.connector.test.version> <hive.exec.classifier>core</hive.exec.classifier> <metrics.version>4.1.1</metrics.version> <orc.spark.version>1.6.0</orc.spark.version>
