This is an automated email from the ASF dual-hosted git repository. abstractdog pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tez.git
The following commit(s) were added to refs/heads/master by this push: new 51ad67183 TEZ-4598: JDK-17: Migrate Tez to jdk17 (compile & runtime) (#407) (Mahesh Raju Somalaraju reviewed by Ayush Saxena, Laszlo Bodor) 51ad67183 is described below commit 51ad671833472482bb53bf2e72568f3007b50ce2 Author: Mahesh Raju Somalaraju <maheshra...@cloudera.com> AuthorDate: Tue May 13 17:37:33 2025 +0530 TEZ-4598: JDK-17: Migrate Tez to jdk17 (compile & runtime) (#407) (Mahesh Raju Somalaraju reviewed by Ayush Saxena, Laszlo Bodor) --- .github/workflows/build.yml | 2 +- BUILDING.txt | 2 +- Jenkinsfile | 6 +- build-tools/docker/Dockerfile | 35 ++++------- pom.xml | 15 +++-- .../java/org/apache/tez/client/TezClientUtils.java | 1 + .../org/apache/tez/dag/api/TezConfiguration.java | 2 + .../org/apache/tez/client/TestTezClientUtils.java | 11 ++-- tez-dag/pom.xml | 1 + tez-ext-service-tests/pom.xml | 1 + .../app/rm/TezTestServiceTaskSchedulerService.java | 6 +- .../shuffle/impl/SimpleFetchedInputAllocator.java | 4 +- .../impl/TestSimpleFetchedInputAllocator.java | 68 ++++++++++++++++------ tez-tests/pom.xml | 1 + 14 files changed, 97 insertions(+), 58 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f81c1ad8b..1148df52c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,7 +27,7 @@ jobs: build: strategy: matrix: - java-version: [8, 11, 17] + java-version: [17] os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: diff --git a/BUILDING.txt b/BUILDING.txt index 16632dd84..733dcb09c 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -8,7 +8,7 @@ Requirements: * JDK 1.8+ * Maven 3.6.3 or later -* spotbugs 4.2.2 or later (if running spotbugs) +* spotbugs 4.9.3 or later (if running spotbugs) * ProtocolBuffer 3.21.1 * Internet connection for first build (to fetch all dependencies) * Hadoop version should be 2.7.0 or higher. diff --git a/Jenkinsfile b/Jenkinsfile index d83e58918..aa6263fa2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -147,10 +147,8 @@ pipeline { # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") - # test with Java 8 and 11 - YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64") - YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64") - YETUS_ARGS+=("--multijdktests=compile") + # test with Java 17 + YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-17-openjdk-amd64") YETUS_ARGS+=("--debug") # write Yetus report as GitHub comment (YETUS-1102) diff --git a/build-tools/docker/Dockerfile b/build-tools/docker/Dockerfile index 20c6a26a2..d4cc574ed 100644 --- a/build-tools/docker/Dockerfile +++ b/build-tools/docker/Dockerfile @@ -165,35 +165,24 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] #### #### -# OpenJDK 8 -#### +# OpenJDK 17 # hadolint ignore=DL3008 -RUN apt-get -q update && apt-get -q install --no-install-recommends -y openjdk-8-jdk-headless \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get -q update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y openjdk-17-jdk \ + && apt-get clean && rm -rf /var/lib/apt/lists/* -#### -# OpenJDK 11 (but keeps default to JDK8) -# NOTE: This default only works when Apache Yetus is launched -# _in_ the container and not outside of it! -#### -# hadolint ignore=DL3008 -RUN apt-get -q update && apt-get -q install --no-install-recommends -y default-jre-headless openjdk-11-jdk-headless \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && update-java-alternatives -s java-1.8.0-openjdk-amd64 || : -# since update alternatives might fail on executables that we don't really need (e.g. appletviewer) -# and return with exit code <0 (actually: 2), we can simply do a sanity check if the version is -# as expected for "java" executable after the update and go on -RUN java -version 2>&1 | grep "1.8.0" && rm -f /usr/lib/jvm/default-java \ - && ln -s java-8-openjdk-amd64 /usr/lib/jvm/default-java -ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 +# Set JAVA_HOME and PATH environment variables +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 +ENV PATH="${JAVA_HOME}/bin:${PATH}" + +# Set the default Java version using update-alternatives +RUN update-alternatives --install /usr/bin/java java /usr/lib/jvm/java-17-openjdk-amd64/bin/java 1 ####### -# Install SpotBugs 4.2.2 +# Install SpotBugs 4.9.3 ####### RUN mkdir -p /opt/spotbugs \ - && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.2.2/spotbugs-4.2.2.tgz \ + && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.9.3/spotbugs-4.9.3.tgz \ -o /opt/spotbugs.tgz \ && tar xzf /opt/spotbugs.tgz --strip-components 1 -C /opt/spotbugs \ && chmod +x /opt/spotbugs/bin/* diff --git a/pom.xml b/pom.xml index 737e63561..93f761ec5 100644 --- a/pom.xml +++ b/pom.xml @@ -41,8 +41,8 @@ <properties> <!-- Build Properties --> - <maven.compiler.source>1.8</maven.compiler.source> - <maven.compiler.target>1.8</maven.compiler.target> + <maven.compiler.source>17</maven.compiler.source> + <maven.compiler.target>17</maven.compiler.target> <maven.test.redirectTestOutputToFile>true</maven.test.redirectTestOutputToFile> <surefire.version>3.0.0-M4</surefire.version> <failIfNoTests>false</failIfNoTests> @@ -70,8 +70,8 @@ <commons-lang.version>2.6</commons-lang.version> <clover.license>${user.home}/clover.license</clover.license> <dependency-check-maven.version>3.2.0</dependency-check-maven.version> - <spotbugs.version>4.2.2</spotbugs.version> - <spotbugs-maven-plugin.version>4.2.0</spotbugs-maven-plugin.version> + <spotbugs.version>4.9.3</spotbugs.version> + <spotbugs-maven-plugin.version>4.9.3.0</spotbugs-maven-plugin.version> <frontend-maven-plugin.version>1.8.0</frontend-maven-plugin.version> <guava.version>32.0.1-jre</guava.version> <hadoop.version>3.4.1</hadoop.version> @@ -102,6 +102,11 @@ <snappy-java.version>1.1.10.4</snappy-java.version> <test.build.data>${project.build.directory}/tmp</test.build.data> <wro4j-maven-plugin.version>1.7.9</wro4j-maven-plugin.version> + <test.jvm.args> + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens java.base/java.io=ALL-UNNAMED + </test.jvm.args> <maven.javadoc.skip>true</maven.javadoc.skip> <!-- enabled only in relevant modules separately --> </properties> @@ -922,7 +927,9 @@ <forkCount>1</forkCount> <reuseForks>false</reuseForks> <forkedProcessTimeoutInSeconds>900</forkedProcessTimeoutInSeconds> + <testFailureIgnore>true</testFailureIgnore> <argLine>-Xmx1024m -XX:+HeapDumpOnOutOfMemoryError</argLine> + <argLine>${test.jvm.args}</argLine> <environmentVariables> <JAVA_HOME>${java.home}</JAVA_HOME> <MALLOC_ARENA_MAX>4</MALLOC_ARENA_MAX> diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java index eff4d4bef..1969554a5 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java @@ -1063,6 +1063,7 @@ public final class TezClientUtils { amOpts = amOpts + tezConf.get(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT); + amOpts = amOpts + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT; amOpts = maybeAddDefaultMemoryJavaOpts(amOpts, capability, tezConf.getDouble(TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION, TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_DEFAULT)); diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index 8862f4b7d..95ae107a8 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -399,6 +399,8 @@ public class TezConfiguration extends Configuration { public static final String TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS_DEFAULT = "-server -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN"; + public static final String TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT = + " --add-opens java.base/java.lang=ALL-UNNAMED"; /** * String value. Command line options provided during the launch of the Tez * AppMaster process. Its recommended to not set any Xmx or Xms in these launch opts so that diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java index a52cb3e49..84f0a4bc2 100644 --- a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java +++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java @@ -534,7 +534,8 @@ public class TestTezClientUtils { TezClientUtils.constructAMLaunchOpts(tezConf, Resource.newInstance(1024, 1)); assertEquals(tmpOpts + " " + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS_DEFAULT + " " - + amCommandOpts, + + amCommandOpts + + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); // Test2: Setup cluster-default command opts explicitly @@ -543,7 +544,8 @@ public class TestTezClientUtils { tezConf.set(TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS, clusterDefaultCommandOpts); amOptsConstructed = TezClientUtils.constructAMLaunchOpts(tezConf, Resource.newInstance(1024, 1)); - assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts, amOptsConstructed); + assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts + + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); // Test3: Don't setup Xmx explicitly @@ -555,7 +557,7 @@ public class TestTezClientUtils { // It's OK for the Xmx value to show up before cluster default options, since Xmx will not be replaced if it already exists. assertEquals( " -Xmx" + ((int) (1024 * factor)) + "m" + " " + tmpOpts + " " + clusterDefaultCommandOpts + " " + - amCommandOpts, + amCommandOpts + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); // Test4: Ensure admin options with Xmx does not cause them to be overridden. This should almost never be done though. @@ -564,7 +566,8 @@ public class TestTezClientUtils { tezConf.set(TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS, clusterDefaultCommandOpts); amOptsConstructed = TezClientUtils.constructAMLaunchOpts(tezConf, Resource.newInstance(1024, 1)); - assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts, amOptsConstructed); + assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts + + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); } @Test(timeout = 5000) diff --git a/tez-dag/pom.xml b/tez-dag/pom.xml index c1c246529..a765fb708 100644 --- a/tez-dag/pom.xml +++ b/tez-dag/pom.xml @@ -200,6 +200,7 @@ <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <configuration> + <argLine>${test.jvm.args}</argLine> <environmentVariables> <LOG_DIRS>${test.log.dir}</LOG_DIRS> </environmentVariables> diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml index b56945526..c85f54228 100644 --- a/tez-ext-service-tests/pom.xml +++ b/tez-ext-service-tests/pom.xml @@ -161,6 +161,7 @@ <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <configuration> + <argLine>${test.jvm.args}</argLine> <environmentVariables> <LOG_DIRS>${test.log.dir}</LOG_DIRS> </environmentVariables> diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java index eafedef11..31165c19a 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java @@ -60,7 +60,7 @@ public class TezTestServiceTaskSchedulerService extends TaskScheduler { // AppIdIdentifier to avoid conflicts with other containers in the system. // Per instance - private final int memoryPerInstance; + private final long memoryPerInstance; private final int coresPerInstance; private final int executorsPerInstance; @@ -84,7 +84,7 @@ public class TezTestServiceTaskSchedulerService extends TaskScheduler { throw new TezUncheckedException(e); } this.memoryPerInstance = conf - .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1); + .getLong(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1); Preconditions.checkArgument(memoryPerInstance > 0, TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB + " must be configured"); @@ -145,7 +145,7 @@ public class TezTestServiceTaskSchedulerService extends TaskScheduler { @Override public Resource getTotalResources() { return Resource - .newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance), + .newInstance(serviceHosts.size() * memoryPerInstance, serviceHosts.size() * coresPerInstance); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java index 6072c039c..d9af3c403 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java @@ -20,6 +20,7 @@ package org.apache.tez.runtime.library.common.shuffle.impl; import java.io.IOException; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -56,7 +57,8 @@ public class SimpleFetchedInputAllocator implements FetchedInputAllocator, // Configuration parameters private final long memoryLimit; - private final long maxSingleShuffleLimit; + @VisibleForTesting + final long maxSingleShuffleLimit; private final long maxAvailableTaskMemory; private final long initialMemoryAvailable; diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java index 01faa5df7..77bb0e4e0 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java @@ -19,6 +19,7 @@ package org.apache.tez.runtime.library.common.shuffle.impl; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; @@ -36,55 +37,88 @@ import org.junit.Test; public class TestSimpleFetchedInputAllocator { private static final Logger LOG = LoggerFactory.getLogger(TestSimpleFetchedInputAllocator.class); - + @Test(timeout = 5000) public void testInMemAllocation() throws IOException { File localDirs = new File(System.getProperty("test.build.data", "/tmp"), this.getClass().getName()); Configuration conf = new Configuration(); - - long jvmMax = Runtime.getRuntime().maxMemory(); + + long jvmMax = 954728448L; LOG.info("jvmMax: " + jvmMax); - + float bufferPercent = 0.1f; conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, bufferPercent); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 1.0f); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs.getAbsolutePath()); - + long inMemThreshold = (long) (bufferPercent * jvmMax); LOG.info("InMemThreshold: " + inMemThreshold); SimpleFetchedInputAllocator inputManager = new SimpleFetchedInputAllocator( - "srcName", UUID.randomUUID().toString(), 123, conf, - Runtime.getRuntime().maxMemory(), inMemThreshold); + "srcName", UUID.randomUUID().toString(), 123, conf, + jvmMax, inMemThreshold); long requestSize = (long) (0.4f * inMemThreshold); - long compressedSize = 1l; + long compressedSize = 1L; LOG.info("RequestSize: " + requestSize); - + FetchedInput fi1 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(1, 1)); assertEquals(FetchedInput.Type.MEMORY, fi1.getType()); - - + FetchedInput fi2 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(2, 1)); assertEquals(FetchedInput.Type.MEMORY, fi2.getType()); - - + // Over limit by this point. Next reserve should give back a DISK allocation FetchedInput fi3 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(3, 1)); assertEquals(FetchedInput.Type.DISK, fi3.getType()); - - + // Freed one memory allocation. Next should be mem again. fi1.abort(); fi1.free(); FetchedInput fi4 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(4, 1)); assertEquals(FetchedInput.Type.MEMORY, fi4.getType()); - - // Freed one disk allocation. Next sould be disk again (no mem freed) + + // Freed one disk allocation. Next should be disk again (no mem freed) fi3.abort(); fi3.free(); FetchedInput fi5 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(4, 1)); assertEquals(FetchedInput.Type.DISK, fi5.getType()); } + /** + * This method tests the allocation behavior of SimpleFetchedInputAllocator when + * a high `maxMemory` is reported by the Runtime.The allocation results in a + * DISK input because the `requestSize` exceeds the `maxSingleShuffleLimit`. + */ + @Test(timeout = 5000) + public void testInMemAllocationWithJvmMaxMemory() throws IOException { + File localDirs = new File(System.getProperty("test.build.data", "/tmp"), this.getClass().getName()); + Configuration conf = new Configuration(); + + long jvmMax = Runtime.getRuntime().maxMemory(); + LOG.info("jvmMax: " + jvmMax); + + float bufferPercent = 0.1f; + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, bufferPercent); + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 1.0f); + conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs.getAbsolutePath()); + + long inMemThreshold = (long) (bufferPercent * jvmMax); + LOG.info("InMemThreshold: " + inMemThreshold); + + SimpleFetchedInputAllocator inputManager = new SimpleFetchedInputAllocator( + "srcName", UUID.randomUUID().toString(), 123, conf, + jvmMax, inMemThreshold); + + long requestSize = (long) (0.4f * inMemThreshold) + 100L; + long compressedSize = 1L; + LOG.info("RequestSize: " + requestSize); + + // check if requestSize is greater than maxSingleShuffleLimit + assertTrue(requestSize > inputManager.maxSingleShuffleLimit); + + // requestSize is greater than the maxSingleShuffleLimit, so allocation is from DISK + FetchedInput fi1 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(1, 1)); + assertEquals(FetchedInput.Type.DISK, fi1.getType()); + } } diff --git a/tez-tests/pom.xml b/tez-tests/pom.xml index 6c8e9b4a5..25e82c66b 100644 --- a/tez-tests/pom.xml +++ b/tez-tests/pom.xml @@ -160,6 +160,7 @@ <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <configuration> + <argLine>${test.jvm.args}</argLine> <environmentVariables> <LOG_DIRS>${test.log.dir}</LOG_DIRS> </environmentVariables>