damccorm commented on code in PR #31796: URL: https://github.com/apache/beam/pull/31796#discussion_r1671798896
########## runners/prism/java/src/test/java/org/apache/beam/runners/prism/PrismLocatorTest.java: ########## @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.prism; + +import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.runners.prism.PrismRunnerTest.getLocalPrismBuildOrIgnoreTest; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link PrismLocator}. */ +@RunWith(JUnit4.class) +public class PrismLocatorTest { + + @Test + public void givenVersionOverride_thenResolves() throws IOException { + PrismPipelineOptions options = options(); + options.setPrismVersionOverride("2.57.0"); + PrismLocator underTest = new PrismLocator(options); + String got = underTest.resolve(); + assertThat(got).contains(".apache_beam/cache/prism/bin/"); + assertThat(got).contains("2.57.0"); + Path gotPath = Paths.get(got); + assertThat(Files.exists(gotPath)).isTrue(); + Files.delete(gotPath); + } + + @Test + public void givenHttpPrismLocationOption_thenResolves() throws IOException { + PrismPipelineOptions options = options(); + options.setPrismLocation( + "https://github.com/apache/beam/releases/download/v2.57.0/apache_beam-v2.57.0-prism-darwin-arm64.zip"); + PrismLocator underTest = new PrismLocator(options); + String got = underTest.resolve(); + assertThat(got).contains(".apache_beam/cache/prism/bin/"); + Path gotPath = Paths.get(got); + assertThat(Files.exists(gotPath)).isTrue(); + Files.delete(gotPath); + } + + @Test + public void givenFilePrismLocationOption_thenResolves() throws IOException { Review Comment: I don't think this actually effectively tests that we're pulling the local prism runner. It just tests that we're getting something, but it could be downloaded, right? Or even cached? Could we instead: a) clear the cache (probably this should be a setup step for each test) b) write a local temp file and give that as the location c) compare that the files are identical ########## runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java: ########## @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.prism; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Iterator; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import org.apache.beam.sdk.util.ReleaseInfo; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.HashCode; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.Hashing; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.ByteStreams; + +/** + * Locates a Prism executable based on a user's default operating system and architecture + * environment or a {@link PrismPipelineOptions#getPrismLocation()} override. Handles the download, + * unzip, {@link PosixFilePermissions}, as needed. For {@link #GITHUB_DOWNLOAD_PREFIX} sources, + * additionally performs a SHA512 verification. + */ +class PrismLocator { + static final String OS_NAME_PROPERTY = "os.name"; + static final String ARCH_PROPERTY = "os.arch"; + static final String USER_HOME_PROPERTY = "user.home"; + + private static final String ZIP_EXT = "zip"; + private static final String SHA512_EXT = "sha512"; + private static final ReleaseInfo RELEASE_INFO = ReleaseInfo.getReleaseInfo(); + private static final String PRISM_BIN_PATH = ".apache_beam/cache/prism/bin"; + private static final Set<PosixFilePermission> PERMS = + PosixFilePermissions.fromString("rwxr-xr-x"); + private static final String GITHUB_DOWNLOAD_PREFIX = + "https://github.com/apache/beam/releases/download"; + private static final String GITHUB_TAG_PREFIX = "https://github.com/apache/beam/releases/tag"; + + private final PrismPipelineOptions options; + + PrismLocator(PrismPipelineOptions options) { + this.options = options; + } + + /** + * Downloads and prepares a Prism executable for use with the {@link PrismRunner}, executed by the + * {@link PrismExecutor}. The returned {@link String} is the absolute path to the Prism + * executable. + */ + String resolve() throws IOException { + + String from = + String.format("%s/v%s/%s.zip", GITHUB_DOWNLOAD_PREFIX, getSDKVersion(), buildFileName()); + + if (!Strings.isNullOrEmpty(options.getPrismLocation())) { + checkArgument( + !options.getPrismLocation().startsWith(GITHUB_TAG_PREFIX), + "Provided --prismLocation URL is not an Apache Beam Github " + + "Release page URL or download URL: ", + from); + + from = options.getPrismLocation(); + } + + String fromFileName = getNameWithoutExtension(from); + Path to = Paths.get(userHome(), PRISM_BIN_PATH, fromFileName); + + if (Files.exists(to)) { + return to.toString(); + } + + createDirectoryIfNeeded(to); + + if (from.startsWith("http")) { Review Comment: If `options.getPrismLocation()` is a filepath, won't we fail on the following assert: ``` checkArgument( !options.getPrismLocation().startsWith(GITHUB_TAG_PREFIX), "Provided --prismLocation URL is not an Apache Beam Github " + "Release page URL or download URL: ", from); ``` Note, I added a comment below because I don't think we're correctly testing this behavior ########## runners/prism/java/src/test/java/org/apache/beam/runners/prism/PrismLocatorTest.java: ########## @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.prism; + +import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.runners.prism.PrismRunnerTest.getLocalPrismBuildOrIgnoreTest; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link PrismLocator}. */ +@RunWith(JUnit4.class) +public class PrismLocatorTest { + + @Test + public void givenVersionOverride_thenResolves() throws IOException { + PrismPipelineOptions options = options(); + options.setPrismVersionOverride("2.57.0"); + PrismLocator underTest = new PrismLocator(options); + String got = underTest.resolve(); + assertThat(got).contains(".apache_beam/cache/prism/bin/"); + assertThat(got).contains("2.57.0"); + Path gotPath = Paths.get(got); + assertThat(Files.exists(gotPath)).isTrue(); + Files.delete(gotPath); + } + + @Test + public void givenHttpPrismLocationOption_thenResolves() throws IOException { + PrismPipelineOptions options = options(); + options.setPrismLocation( + "https://github.com/apache/beam/releases/download/v2.57.0/apache_beam-v2.57.0-prism-darwin-arm64.zip"); + PrismLocator underTest = new PrismLocator(options); + String got = underTest.resolve(); + assertThat(got).contains(".apache_beam/cache/prism/bin/"); + Path gotPath = Paths.get(got); + assertThat(Files.exists(gotPath)).isTrue(); + Files.delete(gotPath); + } + + @Test + public void givenFilePrismLocationOption_thenResolves() throws IOException { Review Comment: Note that deleting the file at the end of the test doesn't necessarily resolve the caching issue since a different version of the file could be there if: 1) This is the first test to run and another codeblock populated the cache 2) A previous test failed in the middle and didn't delete the file So it is better to delete it at the start if it exists (and still do cleanup at the end) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
