XComp commented on code in PR #21349: URL: https://github.com/apache/flink/pull/21349#discussion_r1182404771
########## tools/ci/verify_bundled_optional.sh: ########## @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## Checks that all bundled dependencies are marked as optional in the poms +MVN_CLEAN_COMPILE_OUT=$1 +CI_DIR=$2 +FLINK_ROOT=$3 + +source "${CI_DIR}/maven-utils.sh" + +cd "$FLINK_ROOT" || exit + +dependency_plugin_output=${CI_DIR}/optional_dep.txt + +run_mvn dependency:tree -B >> "${dependency_plugin_output}" Review Comment: nit: Why do we append here? Shouldn't we expect the file to not exist, yet? My concern is that we're assuming something about the environment in which this script is called (that it operates on a clean directory) that might lead to different results if this assumption is not met without the script failing. Does it make sense to add a Precondition here? ########## tools/ci/flink-ci-tools/src/main/java/org/apache/flink/tools/ci/optional/ShadeOptionalChecker.java: ########## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.tools.ci.optional; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.tools.ci.utils.dependency.DependencyParser; +import org.apache.flink.tools.ci.utils.shade.ShadeParser; +import org.apache.flink.tools.ci.utils.shared.Dependency; +import org.apache.flink.tools.ci.utils.shared.DependencyTree; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Verifies that all dependencies bundled with the shade-plugin are marked as optional in the pom. + * This ensures compatibility with later maven versions and in general simplifies dependency + * management as transitivity is no longer dependent on the shade-plugin. + */ +public class ShadeOptionalChecker { + private static final Logger LOG = LoggerFactory.getLogger(ShadeOptionalChecker.class); + + public static void main(String[] args) throws IOException { + if (args.length < 2) { + System.out.println( + "Usage: ShadeOptionalChecker <pathShadeBuildOutput> <pathMavenDependencyOutput>"); + System.exit(1); + } + + final Path shadeOutputPath = Paths.get(args[0]); + final Path dependencyOutputPath = Paths.get(args[1]); + + final Map<String, Set<Dependency>> bundledDependenciesByModule = + ShadeParser.parseShadeOutput(shadeOutputPath); + final Map<String, DependencyTree> dependenciesByModule = + DependencyParser.parseDependencyTreeOutput(dependencyOutputPath); + + final Map<String, Set<Dependency>> violations = + checkOptionalFlags(bundledDependenciesByModule, dependenciesByModule); + + if (!violations.isEmpty()) { + LOG.error( + "{} modules bundle in total {} dependencies without them being marked as optional in the pom.", + violations.keySet().size(), + violations.size()); + LOG.error( + "\tIn order for shading to properly work within Flink we require all bundled dependencies to be marked as optional in the pom."); + LOG.error( + "\tFor verification purposes we require the dependency tree from the dependency-plugin to show the dependency as either:"); + LOG.error("\t\ta) an optional dependency,"); + LOG.error("\t\tb) a transitive dependency of another optional dependency."); + LOG.error( + "\tIn most cases adding '<optional>${flink.markBundledAsOptional}</optional>' to the bundled dependency is sufficient."); + LOG.error( + "\tThere are some edge cases where a transitive dependency might be associated with the \"wrong\" dependency in the tree, for example if a test dependency also requires it."); + LOG.error( + "\tIn such cases you need to adjust the poms so that the dependency shows up in the right spot. This may require adding an explicit dependency (Management) entry, excluding dependencies, or at times even reordering dependencies in the pom."); + LOG.error( + "\tSee the Dependencies page in the wiki for details: https://cwiki.apache.org/confluence/display/FLINK/Dependencies"); + + for (String moduleWithViolations : violations.keySet()) { + final Collection<Dependency> dependencyViolations = + violations.get(moduleWithViolations); + LOG.error( + "\tModule {} ({} violation{}):", + moduleWithViolations, + dependencyViolations.size(), + dependencyViolations.size() == 1 ? "" : "s"); + for (Dependency dependencyViolation : dependencyViolations) { + LOG.error("\t\t{}", dependencyViolation); + } + } + + System.exit(1); + } + } + + private static Map<String, Set<Dependency>> checkOptionalFlags( + Map<String, Set<Dependency>> bundledDependenciesByModule, + Map<String, DependencyTree> dependenciesByModule) { + + final Map<String, Set<Dependency>> allViolations = new HashMap<>(); + + for (String module : bundledDependenciesByModule.keySet()) { + LOG.debug("Checking module '{}'.", module); + if (!dependenciesByModule.containsKey(module)) { + throw new IllegalStateException( + String.format( + "Module %s listed by shade-plugin, but not dependency-plugin.", + module)); + } + + final Collection<Dependency> bundledDependencies = + bundledDependenciesByModule.get(module); + final DependencyTree dependencyTree = dependenciesByModule.get(module); + + final Set<Dependency> violations = + checkOptionalFlags(module, bundledDependencies, dependencyTree); + + if (!violations.isEmpty()) { + allViolations.put(module, violations); + } + } + + return allViolations; + } + + @VisibleForTesting + static Set<Dependency> checkOptionalFlags( + String module, + Collection<Dependency> bundledDependencies, + DependencyTree dependencyTree) { + + bundledDependencies = + bundledDependencies.stream() + // force-shading isn't relevant for this check but breaks some shortcuts + .filter( + dependency -> + !dependency + .getArtifactId() + .equals("flink-shaded-force-shading")) + .collect(Collectors.toSet()); + + final Set<Dependency> violations = new HashSet<>(); + + if (bundledDependencies.isEmpty()) { + LOG.debug("\tModule is not bundling any dependencies."); + return violations; + } + + // If a module has no transitive dependencies we can shortcut the optional flag checks as + // we will not require additional flags in any case. + // This reduces noise on CI. + // It also avoids some edge-cases; since a dependency can only occur once in the dependency + // tree (on the shortest path to said dependency) it can happen that a compile dependency + // is shown as a transitive dependency of a test dependency. + final List<Dependency> directTransitiveDependencies = + dependencyTree.getDirectDependencies().stream() Review Comment: Why is a direct dependency a transitive dependency? Isn't a transitive dependency a dependency of a direct dependency? In general, I struggle to understand this part: A "direct transitive dependency" (based on the local variable name) is a direct dependency that's not optional (i.e. not bundled), not provided and not in test scope with `flink-shaded-force-shading`, `jsr305` and the `slf4j` dependencies being ignored. How does this make the direct dependency a transitive one? :thinking: Wouldn't `directDependenciesWithPotentialTransitiveDependencies` be a better term (ignoring the fact that it's quite a long one :grin: )? ########## tools/ci/flink-ci-tools/src/main/java/org/apache/flink/tools/ci/optional/ShadeOptionalChecker.java: ########## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.tools.ci.optional; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.tools.ci.utils.dependency.DependencyParser; +import org.apache.flink.tools.ci.utils.shade.ShadeParser; +import org.apache.flink.tools.ci.utils.shared.Dependency; +import org.apache.flink.tools.ci.utils.shared.DependencyTree; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Verifies that all dependencies bundled with the shade-plugin are marked as optional in the pom. + * This ensures compatibility with later maven versions and in general simplifies dependency + * management as transitivity is no longer dependent on the shade-plugin. + */ +public class ShadeOptionalChecker { + private static final Logger LOG = LoggerFactory.getLogger(ShadeOptionalChecker.class); + + public static void main(String[] args) throws IOException { + if (args.length < 2) { + System.out.println( + "Usage: ShadeOptionalChecker <pathShadeBuildOutput> <pathMavenDependencyOutput>"); + System.exit(1); + } + + final Path shadeOutputPath = Paths.get(args[0]); + final Path dependencyOutputPath = Paths.get(args[1]); + + final Map<String, Set<Dependency>> bundledDependenciesByModule = + ShadeParser.parseShadeOutput(shadeOutputPath); + final Map<String, DependencyTree> dependenciesByModule = + DependencyParser.parseDependencyTreeOutput(dependencyOutputPath); + + final Map<String, Set<Dependency>> violations = + checkOptionalFlags(bundledDependenciesByModule, dependenciesByModule); + + if (!violations.isEmpty()) { + LOG.error( + "{} modules bundle in total {} dependencies without them being marked as optional in the pom.", + violations.keySet().size(), + violations.size()); + LOG.error( + "\tIn order for shading to properly work within Flink we require all bundled dependencies to be marked as optional in the pom."); + LOG.error( + "\tFor verification purposes we require the dependency tree from the dependency-plugin to show the dependency as either:"); + LOG.error("\t\ta) an optional dependency,"); + LOG.error("\t\tb) a transitive dependency of another optional dependency."); + LOG.error( + "\tIn most cases adding '<optional>${flink.markBundledAsOptional}</optional>' to the bundled dependency is sufficient."); + LOG.error( + "\tThere are some edge cases where a transitive dependency might be associated with the \"wrong\" dependency in the tree, for example if a test dependency also requires it."); + LOG.error( + "\tIn such cases you need to adjust the poms so that the dependency shows up in the right spot. This may require adding an explicit dependency (Management) entry, excluding dependencies, or at times even reordering dependencies in the pom."); + LOG.error( + "\tSee the Dependencies page in the wiki for details: https://cwiki.apache.org/confluence/display/FLINK/Dependencies"); + + for (String moduleWithViolations : violations.keySet()) { + final Collection<Dependency> dependencyViolations = + violations.get(moduleWithViolations); + LOG.error( + "\tModule {} ({} violation{}):", + moduleWithViolations, + dependencyViolations.size(), + dependencyViolations.size() == 1 ? "" : "s"); + for (Dependency dependencyViolation : dependencyViolations) { + LOG.error("\t\t{}", dependencyViolation); + } + } + + System.exit(1); + } + } + + private static Map<String, Set<Dependency>> checkOptionalFlags( + Map<String, Set<Dependency>> bundledDependenciesByModule, + Map<String, DependencyTree> dependenciesByModule) { + + final Map<String, Set<Dependency>> allViolations = new HashMap<>(); + + for (String module : bundledDependenciesByModule.keySet()) { + LOG.debug("Checking module '{}'.", module); + if (!dependenciesByModule.containsKey(module)) { + throw new IllegalStateException( + String.format( + "Module %s listed by shade-plugin, but not dependency-plugin.", + module)); + } + + final Collection<Dependency> bundledDependencies = + bundledDependenciesByModule.get(module); + final DependencyTree dependencyTree = dependenciesByModule.get(module); + + final Set<Dependency> violations = + checkOptionalFlags(module, bundledDependencies, dependencyTree); + + if (!violations.isEmpty()) { + allViolations.put(module, violations); + } + } + + return allViolations; + } + + @VisibleForTesting + static Set<Dependency> checkOptionalFlags( + String module, + Collection<Dependency> bundledDependencies, + DependencyTree dependencyTree) { + + bundledDependencies = + bundledDependencies.stream() + // force-shading isn't relevant for this check but breaks some shortcuts + .filter( + dependency -> + !dependency + .getArtifactId() + .equals("flink-shaded-force-shading")) + .collect(Collectors.toSet()); + + final Set<Dependency> violations = new HashSet<>(); + + if (bundledDependencies.isEmpty()) { + LOG.debug("\tModule is not bundling any dependencies."); + return violations; + } + + // If a module has no transitive dependencies we can shortcut the optional flag checks as + // we will not require additional flags in any case. + // This reduces noise on CI. + // It also avoids some edge-cases; since a dependency can only occur once in the dependency + // tree (on the shortest path to said dependency) it can happen that a compile dependency + // is shown as a transitive dependency of a test dependency. Review Comment: Isn't that an argument to not filter out the test dependencies to avoid missing transitive dependencies which we might want to process? I have the feeling that I'm missing the point of this comment or the comment is misleading? :thinking: ########## tools/ci/flink-ci-tools/src/main/java/org/apache/flink/tools/ci/optional/ShadeOptionalChecker.java: ########## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.tools.ci.optional; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.tools.ci.utils.dependency.DependencyParser; +import org.apache.flink.tools.ci.utils.shade.ShadeParser; +import org.apache.flink.tools.ci.utils.shared.Dependency; +import org.apache.flink.tools.ci.utils.shared.DependencyTree; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Verifies that all dependencies bundled with the shade-plugin are marked as optional in the pom. + * This ensures compatibility with later maven versions and in general simplifies dependency + * management as transitivity is no longer dependent on the shade-plugin. + */ +public class ShadeOptionalChecker { + private static final Logger LOG = LoggerFactory.getLogger(ShadeOptionalChecker.class); + + public static void main(String[] args) throws IOException { + if (args.length < 2) { + System.out.println( + "Usage: ShadeOptionalChecker <pathShadeBuildOutput> <pathMavenDependencyOutput>"); + System.exit(1); + } + + final Path shadeOutputPath = Paths.get(args[0]); + final Path dependencyOutputPath = Paths.get(args[1]); + + final Map<String, Set<Dependency>> bundledDependenciesByModule = + ShadeParser.parseShadeOutput(shadeOutputPath); + final Map<String, DependencyTree> dependenciesByModule = + DependencyParser.parseDependencyTreeOutput(dependencyOutputPath); + + final Map<String, Set<Dependency>> violations = + checkOptionalFlags(bundledDependenciesByModule, dependenciesByModule); + + if (!violations.isEmpty()) { + LOG.error( + "{} modules bundle in total {} dependencies without them being marked as optional in the pom.", + violations.keySet().size(), + violations.size()); + LOG.error( + "\tIn order for shading to properly work within Flink we require all bundled dependencies to be marked as optional in the pom."); + LOG.error( + "\tFor verification purposes we require the dependency tree from the dependency-plugin to show the dependency as either:"); + LOG.error("\t\ta) an optional dependency,"); + LOG.error("\t\tb) a transitive dependency of another optional dependency."); + LOG.error( + "\tIn most cases adding '<optional>${flink.markBundledAsOptional}</optional>' to the bundled dependency is sufficient."); + LOG.error( + "\tThere are some edge cases where a transitive dependency might be associated with the \"wrong\" dependency in the tree, for example if a test dependency also requires it."); + LOG.error( + "\tIn such cases you need to adjust the poms so that the dependency shows up in the right spot. This may require adding an explicit dependency (Management) entry, excluding dependencies, or at times even reordering dependencies in the pom."); + LOG.error( + "\tSee the Dependencies page in the wiki for details: https://cwiki.apache.org/confluence/display/FLINK/Dependencies"); + + for (String moduleWithViolations : violations.keySet()) { + final Collection<Dependency> dependencyViolations = + violations.get(moduleWithViolations); + LOG.error( + "\tModule {} ({} violation{}):", + moduleWithViolations, + dependencyViolations.size(), + dependencyViolations.size() == 1 ? "" : "s"); + for (Dependency dependencyViolation : dependencyViolations) { + LOG.error("\t\t{}", dependencyViolation); + } + } + + System.exit(1); + } + } + + private static Map<String, Set<Dependency>> checkOptionalFlags( + Map<String, Set<Dependency>> bundledDependenciesByModule, + Map<String, DependencyTree> dependenciesByModule) { + + final Map<String, Set<Dependency>> allViolations = new HashMap<>(); + + for (String module : bundledDependenciesByModule.keySet()) { + LOG.debug("Checking module '{}'.", module); + if (!dependenciesByModule.containsKey(module)) { + throw new IllegalStateException( + String.format( + "Module %s listed by shade-plugin, but not dependency-plugin.", + module)); + } + + final Collection<Dependency> bundledDependencies = + bundledDependenciesByModule.get(module); + final DependencyTree dependencyTree = dependenciesByModule.get(module); + + final Set<Dependency> violations = + checkOptionalFlags(module, bundledDependencies, dependencyTree); + + if (!violations.isEmpty()) { + allViolations.put(module, violations); + } + } + + return allViolations; + } + + @VisibleForTesting + static Set<Dependency> checkOptionalFlags( + String module, + Collection<Dependency> bundledDependencies, + DependencyTree dependencyTree) { + + bundledDependencies = + bundledDependencies.stream() + // force-shading isn't relevant for this check but breaks some shortcuts + .filter( + dependency -> + !dependency + .getArtifactId() + .equals("flink-shaded-force-shading")) + .collect(Collectors.toSet()); + + final Set<Dependency> violations = new HashSet<>(); + + if (bundledDependencies.isEmpty()) { + LOG.debug("\tModule is not bundling any dependencies."); + return violations; + } + + // If a module has no transitive dependencies we can shortcut the optional flag checks as + // we will not require additional flags in any case. + // This reduces noise on CI. + // It also avoids some edge-cases; since a dependency can only occur once in the dependency + // tree (on the shortest path to said dependency) it can happen that a compile dependency + // is shown as a transitive dependency of a test dependency. + final List<Dependency> directTransitiveDependencies = + dependencyTree.getDirectDependencies().stream() + .filter( + dependency -> + !(dependency.isOptional().orElse(false) + || "provided" + .equals(dependency.getScope().orElse(null)) + || "test".equals(dependency.getScope().orElse(null)) + || "flink-shaded-force-shading" + .equals(dependency.getArtifactId()) + || "jsr305".equals(dependency.getArtifactId()) + || "slf4j-api".equals(dependency.getArtifactId()))) + .collect(Collectors.toList()); + + if (directTransitiveDependencies.isEmpty()) { + LOG.debug( + "Skipping deep-check of module {} because all direct dependencies are not transitive.", + module); + return violations; + } else { Review Comment: nit: the else is not really necessary. ########## tools/ci/flink-ci-tools/src/main/java/org/apache/flink/tools/ci/optional/ShadeOptionalChecker.java: ########## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.tools.ci.optional; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.tools.ci.utils.dependency.DependencyParser; +import org.apache.flink.tools.ci.utils.shade.ShadeParser; +import org.apache.flink.tools.ci.utils.shared.Dependency; +import org.apache.flink.tools.ci.utils.shared.DependencyTree; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Verifies that all dependencies bundled with the shade-plugin are marked as optional in the pom. + * This ensures compatibility with later maven versions and in general simplifies dependency + * management as transitivity is no longer dependent on the shade-plugin. Review Comment: It feels like [this PR's description](https://github.com/apache/flink/pull/21349#issue-1455547923) deserves to be included in this class' JavaDoc. ########## flink-formats/flink-sql-parquet/pom.xml: ########## @@ -42,12 +42,14 @@ under the License. <groupId>org.apache.flink</groupId> <artifactId>flink-parquet</artifactId> <version>${project.version}</version> + <optional>${flink.markBundledAsOptional}</optional> </dependency> <dependency> <groupId>org.apache.parquet</groupId> <artifactId>parquet-avro</artifactId> <version>${flink.format.parquet.version}</version> + <optional>${flink.markBundledAsOptional}</optional> Review Comment: do we really need this? this dependency is not shaded... :thinking: ########## flink-connectors/flink-sql-connector-hive-3.1.3/pom.xml: ########## @@ -82,13 +84,15 @@ under the License. <groupId>org.antlr</groupId> <artifactId>antlr-runtime</artifactId> <version>3.5.2</version> + <optional>${flink.markBundledAsOptional}</optional> </dependency> <!-- hadoop dependency to make the copied code compile --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>3.1.0</version> + <optional>${flink.markBundledAsOptional}</optional> Review Comment: This one doesn't show up in the shade plugin's configuration (see [code](https://github.com/apache/flink/blob/378e218f7ec2575a4e3b120d54f9fd5b21895545/flink-connectors/flink-sql-connector-hive-3.1.3/pom.xml#L190-L195)) but is still marked as bundled/optional? Was my assumption wrong that the shade plugin's configuration and the optional dependencies should kind of match wrong? ########## flink-dist/pom.xml: ########## Review Comment: With this one I struggle as well: The `flink-dist` shade-plugin execution has `<createDependencyReducedPom/>` disable which (AFAIU) means that all the dependencies are kept as dependencies in the pom. Why do we label all the dependencies as optional then? :thinking: ########## tools/ci/flink-ci-tools/src/test/java/org/apache/flink/tools/ci/optional/ShadeOptionalCheckerTest.java: ########## @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.tools.ci.optional; + +import org.apache.flink.tools.ci.utils.shared.Dependency; +import org.apache.flink.tools.ci.utils.shared.DependencyTree; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +class ShadeOptionalCheckerTest { + private static final String MODULE = "module"; + + @Test + void testNonBundledDependencyIsIgnored() { + final Dependency dependency = createMandatoryDependency("a"); + final Set<Dependency> bundled = Collections.emptySet(); + final DependencyTree dependencyTree = new DependencyTree().addDirectDependency(dependency); + + final Set<Dependency> violations = + ShadeOptionalChecker.checkOptionalFlags(MODULE, bundled, dependencyTree); + + assertThat(violations).isEmpty(); + } + + @Test + void testNonBundledDependencyIsIgnoredEvenIfOthersAreBundled() { + final Dependency dependencyA = createMandatoryDependency("a"); + final Dependency dependencyB = createMandatoryDependency("B"); + final Set<Dependency> bundled = Collections.singleton(dependencyB); + final DependencyTree dependencyTree = + new DependencyTree() + .addDirectDependency(dependencyA) + .addDirectDependency(dependencyB); + + final Set<Dependency> violations = + ShadeOptionalChecker.checkOptionalFlags(MODULE, bundled, dependencyTree); + + assertThat(violations).containsExactly(dependencyB); + } + + @Test + void testDirectBundledOptionalDependencyIsAccepted() { + final Dependency dependency = createOptionalDependency("a"); + final Set<Dependency> bundled = Collections.singleton(dependency); + final DependencyTree dependencyTree = new DependencyTree().addDirectDependency(dependency); + + final Set<Dependency> violations = + ShadeOptionalChecker.checkOptionalFlags(MODULE, bundled, dependencyTree); + + assertThat(violations).isEmpty(); + } + + @Test + void testDirectBundledDependencyMustBeOptional() { + final Dependency dependency = createMandatoryDependency("a"); + final Set<Dependency> bundled = Collections.singleton(dependency); + final DependencyTree dependencyTree = new DependencyTree().addDirectDependency(dependency); + + final Set<Dependency> violations = + ShadeOptionalChecker.checkOptionalFlags(MODULE, bundled, dependencyTree); + + assertThat(violations).containsExactly(dependency); + } + + @Test + void testTransitiveBundledOptionalDependencyIsAccepted() { + final Dependency dependencyA = createMandatoryDependency("a"); + final Dependency dependencyB = createOptionalDependency("b"); + final Set<Dependency> bundled = Collections.singleton(dependencyB); + final DependencyTree dependencyTree = + new DependencyTree() + .addDirectDependency(dependencyA) + .addTransitiveDependencyTo(dependencyB, dependencyA); + + final Set<Dependency> violations = + ShadeOptionalChecker.checkOptionalFlags(MODULE, bundled, dependencyTree); + + assertThat(violations).isEmpty(); + } + + @Test + void testTransitiveBundledDependencyMustBeOptional() { + final Dependency dependencyA = createMandatoryDependency("a"); + final Dependency dependencyB = createMandatoryDependency("b"); + final Set<Dependency> bundled = Collections.singleton(dependencyB); + final DependencyTree dependencyTree = + new DependencyTree() + .addDirectDependency(dependencyA) + .addTransitiveDependencyTo(dependencyB, dependencyA); + + final Set<Dependency> violations = + ShadeOptionalChecker.checkOptionalFlags(MODULE, bundled, dependencyTree); + + assertThat(violations).containsExactly(dependencyB); + } + + @Test + void testTransitiveBundledDependencyMayNotBeOptionalIfParentIsOptional() { + final Dependency dependencyA = createOptionalDependency("a"); + final Dependency dependencyB = createMandatoryDependency("b"); + final Set<Dependency> bundled = Collections.singleton(dependencyB); + final DependencyTree dependencyTree = + new DependencyTree() + .addDirectDependency(dependencyA) + .addTransitiveDependencyTo(dependencyB, dependencyA); + + final Set<Dependency> violations = + ShadeOptionalChecker.checkOptionalFlags(MODULE, bundled, dependencyTree); + + assertThat(violations).isEmpty(); + } + + @Test + void testTestDependenciesAreIgnored() { + final Dependency dependencyA = createOptionalDependency("a"); Review Comment: That's not a test dependency. `testTransitiveBundledDependencyMayNotBeOptionalIfParentIsOptional` and `testTestDependenciesAreIgnored` have identical test code ;-) ########## tools/ci/flink-ci-tools/src/main/java/org/apache/flink/tools/ci/optional/ShadeOptionalChecker.java: ########## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.tools.ci.optional; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.tools.ci.utils.dependency.DependencyParser; +import org.apache.flink.tools.ci.utils.shade.ShadeParser; +import org.apache.flink.tools.ci.utils.shared.Dependency; +import org.apache.flink.tools.ci.utils.shared.DependencyTree; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Verifies that all dependencies bundled with the shade-plugin are marked as optional in the pom. + * This ensures compatibility with later maven versions and in general simplifies dependency + * management as transitivity is no longer dependent on the shade-plugin. + */ +public class ShadeOptionalChecker { + private static final Logger LOG = LoggerFactory.getLogger(ShadeOptionalChecker.class); + + public static void main(String[] args) throws IOException { + if (args.length < 2) { + System.out.println( + "Usage: ShadeOptionalChecker <pathShadeBuildOutput> <pathMavenDependencyOutput>"); + System.exit(1); + } + + final Path shadeOutputPath = Paths.get(args[0]); + final Path dependencyOutputPath = Paths.get(args[1]); + + final Map<String, Set<Dependency>> bundledDependenciesByModule = + ShadeParser.parseShadeOutput(shadeOutputPath); + final Map<String, DependencyTree> dependenciesByModule = + DependencyParser.parseDependencyTreeOutput(dependencyOutputPath); + + final Map<String, Set<Dependency>> violations = + checkOptionalFlags(bundledDependenciesByModule, dependenciesByModule); + + if (!violations.isEmpty()) { + LOG.error( + "{} modules bundle in total {} dependencies without them being marked as optional in the pom.", + violations.keySet().size(), + violations.size()); + LOG.error( + "\tIn order for shading to properly work within Flink we require all bundled dependencies to be marked as optional in the pom."); + LOG.error( + "\tFor verification purposes we require the dependency tree from the dependency-plugin to show the dependency as either:"); + LOG.error("\t\ta) an optional dependency,"); + LOG.error("\t\tb) a transitive dependency of another optional dependency."); + LOG.error( + "\tIn most cases adding '<optional>${flink.markBundledAsOptional}</optional>' to the bundled dependency is sufficient."); + LOG.error( + "\tThere are some edge cases where a transitive dependency might be associated with the \"wrong\" dependency in the tree, for example if a test dependency also requires it."); + LOG.error( + "\tIn such cases you need to adjust the poms so that the dependency shows up in the right spot. This may require adding an explicit dependency (Management) entry, excluding dependencies, or at times even reordering dependencies in the pom."); + LOG.error( + "\tSee the Dependencies page in the wiki for details: https://cwiki.apache.org/confluence/display/FLINK/Dependencies"); + + for (String moduleWithViolations : violations.keySet()) { + final Collection<Dependency> dependencyViolations = + violations.get(moduleWithViolations); + LOG.error( + "\tModule {} ({} violation{}):", + moduleWithViolations, + dependencyViolations.size(), + dependencyViolations.size() == 1 ? "" : "s"); + for (Dependency dependencyViolation : dependencyViolations) { + LOG.error("\t\t{}", dependencyViolation); + } + } + + System.exit(1); + } + } + + private static Map<String, Set<Dependency>> checkOptionalFlags( + Map<String, Set<Dependency>> bundledDependenciesByModule, + Map<String, DependencyTree> dependenciesByModule) { + + final Map<String, Set<Dependency>> allViolations = new HashMap<>(); + + for (String module : bundledDependenciesByModule.keySet()) { + LOG.debug("Checking module '{}'.", module); + if (!dependenciesByModule.containsKey(module)) { + throw new IllegalStateException( + String.format( + "Module %s listed by shade-plugin, but not dependency-plugin.", + module)); + } + + final Collection<Dependency> bundledDependencies = + bundledDependenciesByModule.get(module); + final DependencyTree dependencyTree = dependenciesByModule.get(module); + + final Set<Dependency> violations = + checkOptionalFlags(module, bundledDependencies, dependencyTree); + + if (!violations.isEmpty()) { + allViolations.put(module, violations); + } + } + + return allViolations; + } + + @VisibleForTesting + static Set<Dependency> checkOptionalFlags( + String module, + Collection<Dependency> bundledDependencies, + DependencyTree dependencyTree) { + + bundledDependencies = + bundledDependencies.stream() + // force-shading isn't relevant for this check but breaks some shortcuts + .filter( + dependency -> + !dependency + .getArtifactId() + .equals("flink-shaded-force-shading")) + .collect(Collectors.toSet()); + + final Set<Dependency> violations = new HashSet<>(); + + if (bundledDependencies.isEmpty()) { + LOG.debug("\tModule is not bundling any dependencies."); + return violations; + } + + // If a module has no transitive dependencies we can shortcut the optional flag checks as + // we will not require additional flags in any case. + // This reduces noise on CI. + // It also avoids some edge-cases; since a dependency can only occur once in the dependency + // tree (on the shortest path to said dependency) it can happen that a compile dependency + // is shown as a transitive dependency of a test dependency. + final List<Dependency> directTransitiveDependencies = + dependencyTree.getDirectDependencies().stream() + .filter( + dependency -> + !(dependency.isOptional().orElse(false) + || "provided" + .equals(dependency.getScope().orElse(null)) + || "test".equals(dependency.getScope().orElse(null)) + || "flink-shaded-force-shading" + .equals(dependency.getArtifactId()) + || "jsr305".equals(dependency.getArtifactId()) + || "slf4j-api".equals(dependency.getArtifactId()))) Review Comment: Could be that it's just my personal view but I find this condition hard to read. What about moving each of the different conditions into local boolean variables with a meaningful name (e.g. `final boolean isProvided = "provided".equals(dependency.getScope().orElse(null)`) ########## tools/ci/flink-ci-tools/src/main/java/org/apache/flink/tools/ci/optional/ShadeOptionalChecker.java: ########## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.tools.ci.optional; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.tools.ci.utils.dependency.DependencyParser; +import org.apache.flink.tools.ci.utils.shade.ShadeParser; +import org.apache.flink.tools.ci.utils.shared.Dependency; +import org.apache.flink.tools.ci.utils.shared.DependencyTree; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Verifies that all dependencies bundled with the shade-plugin are marked as optional in the pom. + * This ensures compatibility with later maven versions and in general simplifies dependency + * management as transitivity is no longer dependent on the shade-plugin. + */ +public class ShadeOptionalChecker { + private static final Logger LOG = LoggerFactory.getLogger(ShadeOptionalChecker.class); + + public static void main(String[] args) throws IOException { + if (args.length < 2) { + System.out.println( + "Usage: ShadeOptionalChecker <pathShadeBuildOutput> <pathMavenDependencyOutput>"); + System.exit(1); + } + + final Path shadeOutputPath = Paths.get(args[0]); + final Path dependencyOutputPath = Paths.get(args[1]); + + final Map<String, Set<Dependency>> bundledDependenciesByModule = + ShadeParser.parseShadeOutput(shadeOutputPath); + final Map<String, DependencyTree> dependenciesByModule = + DependencyParser.parseDependencyTreeOutput(dependencyOutputPath); + + final Map<String, Set<Dependency>> violations = + checkOptionalFlags(bundledDependenciesByModule, dependenciesByModule); + + if (!violations.isEmpty()) { + LOG.error( + "{} modules bundle in total {} dependencies without them being marked as optional in the pom.", + violations.keySet().size(), + violations.size()); + LOG.error( + "\tIn order for shading to properly work within Flink we require all bundled dependencies to be marked as optional in the pom."); + LOG.error( + "\tFor verification purposes we require the dependency tree from the dependency-plugin to show the dependency as either:"); + LOG.error("\t\ta) an optional dependency,"); + LOG.error("\t\tb) a transitive dependency of another optional dependency."); + LOG.error( + "\tIn most cases adding '<optional>${flink.markBundledAsOptional}</optional>' to the bundled dependency is sufficient."); + LOG.error( + "\tThere are some edge cases where a transitive dependency might be associated with the \"wrong\" dependency in the tree, for example if a test dependency also requires it."); + LOG.error( + "\tIn such cases you need to adjust the poms so that the dependency shows up in the right spot. This may require adding an explicit dependency (Management) entry, excluding dependencies, or at times even reordering dependencies in the pom."); + LOG.error( + "\tSee the Dependencies page in the wiki for details: https://cwiki.apache.org/confluence/display/FLINK/Dependencies"); + + for (String moduleWithViolations : violations.keySet()) { + final Collection<Dependency> dependencyViolations = + violations.get(moduleWithViolations); + LOG.error( + "\tModule {} ({} violation{}):", + moduleWithViolations, + dependencyViolations.size(), + dependencyViolations.size() == 1 ? "" : "s"); + for (Dependency dependencyViolation : dependencyViolations) { + LOG.error("\t\t{}", dependencyViolation); + } + } + + System.exit(1); + } + } + + private static Map<String, Set<Dependency>> checkOptionalFlags( + Map<String, Set<Dependency>> bundledDependenciesByModule, + Map<String, DependencyTree> dependenciesByModule) { + + final Map<String, Set<Dependency>> allViolations = new HashMap<>(); + + for (String module : bundledDependenciesByModule.keySet()) { + LOG.debug("Checking module '{}'.", module); + if (!dependenciesByModule.containsKey(module)) { + throw new IllegalStateException( + String.format( + "Module %s listed by shade-plugin, but not dependency-plugin.", + module)); + } + + final Collection<Dependency> bundledDependencies = + bundledDependenciesByModule.get(module); + final DependencyTree dependencyTree = dependenciesByModule.get(module); + + final Set<Dependency> violations = + checkOptionalFlags(module, bundledDependencies, dependencyTree); + + if (!violations.isEmpty()) { + allViolations.put(module, violations); + } + } + + return allViolations; + } + + @VisibleForTesting + static Set<Dependency> checkOptionalFlags( + String module, + Collection<Dependency> bundledDependencies, + DependencyTree dependencyTree) { + + bundledDependencies = + bundledDependencies.stream() + // force-shading isn't relevant for this check but breaks some shortcuts + .filter( + dependency -> + !dependency + .getArtifactId() + .equals("flink-shaded-force-shading")) + .collect(Collectors.toSet()); + + final Set<Dependency> violations = new HashSet<>(); + + if (bundledDependencies.isEmpty()) { + LOG.debug("\tModule is not bundling any dependencies."); + return violations; + } + + // If a module has no transitive dependencies we can shortcut the optional flag checks as + // we will not require additional flags in any case. + // This reduces noise on CI. + // It also avoids some edge-cases; since a dependency can only occur once in the dependency + // tree (on the shortest path to said dependency) it can happen that a compile dependency + // is shown as a transitive dependency of a test dependency. + final List<Dependency> directTransitiveDependencies = + dependencyTree.getDirectDependencies().stream() + .filter( + dependency -> + !(dependency.isOptional().orElse(false) + || "provided" + .equals(dependency.getScope().orElse(null)) + || "test".equals(dependency.getScope().orElse(null)) + || "flink-shaded-force-shading" + .equals(dependency.getArtifactId()) + || "jsr305".equals(dependency.getArtifactId()) + || "slf4j-api".equals(dependency.getArtifactId()))) + .collect(Collectors.toList()); + + if (directTransitiveDependencies.isEmpty()) { + LOG.debug( + "Skipping deep-check of module {} because all direct dependencies are not transitive.", + module); + return violations; + } else { + LOG.debug( + "Running deep-check of module {} because there are direct dependencies that are transitive: {}", + module, + directTransitiveDependencies); + } + + for (Dependency bundledDependency : bundledDependencies) { + LOG.debug("\tChecking dependency '{}'.", bundledDependency); + + final List<Dependency> dependencyPath = dependencyTree.getPathTo(bundledDependency); + + final boolean isOptional = + dependencyPath.stream().anyMatch(parent -> parent.isOptional().orElse(false)); + + if (!isOptional) { + violations.add(bundledDependency); + } + } + if (violations.isEmpty()) { + LOG.info("OK: {}", module); Review Comment: nit: this log message feels misplaced and should rather live in the calling method where we check for emptiness as well. ########## README.md: ########## Review Comment: Should we add a paragraph about the newly introduced profile "intellij" to the intellij section of the README? Or is it good enough to mentioned that we recommend using Intellij over Eclipse Scala IDE? :thinking: To me it feels like it would be worth a sentence. ########## flink-filesystems/flink-s3-fs-presto/pom.xml: ########## @@ -45,6 +45,14 @@ under the License. <scope>provided</scope> </dependency> + <!-- S3 base (bundled) --> Review Comment: This was moved up to make it's transitive dependencies appear in the right place in the dependency tree, wasn't it? ########## flink-python/pom.xml: ########## Review Comment: I assumed that we would have to add the optional flag to the dependencies under `<dependencyManagement/>` as well? ...at least, we did this in the [flink-s3-fs-presto](https://github.com/apache/flink/pull/21349/files?show-viewed-files=true&file-filters%5B%5D=#diff-fb356423d357acefd8a5681ae41320dbe8fe7c88d6aaf138e1c502b66aa638abR358) module as well. (we neither do it in `flink-rpc-akky`) ########## flink-end-to-end-tests/flink-sql-client-test/pom.xml: ########## @@ -91,6 +91,7 @@ under the License. <artifactId>maven-shade-plugin</artifactId> <executions> <execution> + <id>e2e-dependencies</id> Review Comment: Does it make sense to add this as a comment? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
