lostluck commented on a change in pull request #16153: URL: https://github.com/apache/beam/pull/16153#discussion_r766031870
########## File path: sdks/go/pkg/beam/core/runtime/xlangx/expansionx/download.go ########## @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package expansionx contains utilities for starting expansion services for +// cross-language transforms. +package expansionx + +import ( + "errors" + "fmt" + "io" + "net/http" + "os" + "os/user" + "path/filepath" + "strings" +) + +// URL is a type used to differentiate between strings and URLs +type URL string Review comment: Is it necessary for this type to be exported? Can or should it be replaced with the [net/url.URL](https://pkg.go.dev/net/[email protected]#URL) type instead? (Probably not, frankly) ########## File path: sdks/go/pkg/beam/core/runtime/xlangx/expansionx/download.go ########## @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package expansionx contains utilities for starting expansion services for +// cross-language transforms. +package expansionx + +import ( + "errors" + "fmt" + "io" + "net/http" + "os" + "os/user" + "path/filepath" + "strings" +) + +// URL is a type used to differentiate between strings and URLs +type URL string + +const ( + apacheRepository = URL("https://repo.maven.apache.org/maven2") + beamGroupID = "org/apache/beam" + jarCache = "~/.apache_beam/cache/jars" +) Review comment: Nit: In Java or Python, it's dictated that consts should be always at the top of the file. In Go, it's preferable to define these closer to where they're used, since they are not hard to find anyway. Consider moving the const block to just above the`getURLForBeamJar` function. ########## File path: sdks/go/pkg/beam/core/runtime/xlangx/expansionx/download.go ########## @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package expansionx contains utilities for starting expansion services for +// cross-language transforms. +package expansionx + +import ( + "errors" + "fmt" + "io" + "net/http" + "os" + "os/user" + "path/filepath" + "strings" +) + +// URL is a type used to differentiate between strings and URLs +type URL string + +const ( + apacheRepository = URL("https://repo.maven.apache.org/maven2") + beamGroupID = "org/apache/beam" + jarCache = "~/.apache_beam/cache/jars" +) + +// GetBeamJar checks a temporary directory for the desired Beam JAR, downloads the +// appropriate JAR from Maven if not present, then returns the file path to the +// JAR. +func GetBeamJar(gradleTarget, version string) (string, error) { + strippedTarget := dropEndOfGradleTarget(gradleTarget) + baseURL, jarName := getURLForBeamJar(strippedTarget, version) + + fullURL := baseURL + URL(jarName) + + resp, err := http.Get(string(fullURL)) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return "", fmt.Errorf("Received non 200 response code, got %v", resp.StatusCode) + } + + cacheDir := getCacheDir() + err = checkDir(cacheDir) + if err != nil { + return "", err + } + + jarPath := filepath.Join(cacheDir, jarName) + + if jarExists(jarPath) { + return jarPath, nil + } + + file, err := os.Create(jarPath) + if err != nil { + return "", err + } + + _, err = io.Copy(file, resp.Body) + if err != nil { + return "", err + } + + return jarPath, nil +} + +// getURLForBeamJar builds the Maven URL for the JAR and the JAR name, returning both +// separately so the JAR name can be used for saving the file later. +func getURLForBeamJar(gradleTarget, version string) (URL, string) { + baseURL := URL(apacheRepository + "/" + beamGroupID + "/") + fullTarget := "beam" + gradleTarget + targetPath := strings.ReplaceAll(fullTarget, ":", "-") + jarName := strings.ReplaceAll(fullTarget, ":", "-") + "-" + version + ".jar" + return baseURL + URL(targetPath) + "/" + URL(version) + "/", jarName +} + +// dropEndOfGradleTarget drops the last substring off of the gradle target. This +// is used to build the Maven target and JAR name (the last substring on the gradle) +// command is usually a directive, not a reference to the desired JAR.) +func dropEndOfGradleTarget(gradleTarget string) string { + elms := strings.Split(gradleTarget, ":") + droppedSuffix := elms[:len(elms)-1] + return strings.Join(droppedSuffix, ":") +} + +// getCacheDir returns the absolute file path of the JAR cache from the user's HOME +// directory. +func getCacheDir() string { + usr, _ := user.Current() + return filepath.Join(usr.HomeDir, jarCache[2:]) +} + +// checkDir checks that a given directory exists, then creates the directory and +// parent directories if it does not. Returns another error if the returned +// error from os.Stat is not an ErrNotExist error. +func checkDir(dirPath string) error { Review comment: I agree with Danny. This isn't doing anything different from MkdirAll. If you look at it's code, it's very similar: https://cs.opensource.google/go/go/+/refs/tags/go1.17.5:src/os/path.go;l=18 (Thanks for the drive by comments Danny! I don't think I've seen you around before, so welcome to Beam!) ########## File path: sdks/go/pkg/beam/core/runtime/xlangx/expansionx/download.go ########## @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package expansionx contains utilities for starting expansion services for +// cross-language transforms. +package expansionx + +import ( + "errors" + "fmt" + "io" + "net/http" + "os" + "os/user" + "path/filepath" + "strings" +) + +// URL is a type used to differentiate between strings and URLs +type URL string + +const ( + apacheRepository = URL("https://repo.maven.apache.org/maven2") + beamGroupID = "org/apache/beam" + jarCache = "~/.apache_beam/cache/jars" +) + +// GetBeamJar checks a temporary directory for the desired Beam JAR, downloads the +// appropriate JAR from Maven if not present, then returns the file path to the +// JAR. +func GetBeamJar(gradleTarget, version string) (string, error) { Review comment: While it's unlikely to matter too much, for making code like this (which refers to constants and such) more testable, consider making a struct to hold the constants, and the rest to be methods. This way, one can substitute the constants out in tests (without needing to go for approaches like re-assigning variables, which can be brittle and non-hermetic). Eg with local addresses. In particular, it would allow for something I suggested to you offline: Being able to fully test this with a fake in process HTTP server. That would be going the extra mile on testing, and I don't expect that in this instance. The exported function `GetBeamJar` can still be a package level function, but then it's doing something like this. ``` func GetBeamJar(gradleTarget, version string) (string, error) { return cacher{ repo: apacheRepository, groupID: beamGroupID, jarCache: jarCache, }.getJar(gradleTarget, version) } ``` With all the implementation handled in the getJar method. ########## File path: sdks/go/pkg/beam/core/runtime/xlangx/expansionx/download.go ########## @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package expansionx contains utilities for starting expansion services for +// cross-language transforms. +package expansionx + +import ( + "errors" + "fmt" + "io" + "net/http" + "os" + "os/user" + "path/filepath" + "strings" +) + +// URL is a type used to differentiate between strings and URLs +type URL string + +const ( + apacheRepository = URL("https://repo.maven.apache.org/maven2") + beamGroupID = "org/apache/beam" + jarCache = "~/.apache_beam/cache/jars" +) + +// GetBeamJar checks a temporary directory for the desired Beam JAR, downloads the +// appropriate JAR from Maven if not present, then returns the file path to the +// JAR. +func GetBeamJar(gradleTarget, version string) (string, error) { + strippedTarget := dropEndOfGradleTarget(gradleTarget) + fullURL, jarName := getURLForBeamJar(strippedTarget, version) + + resp, err := http.Get(string(fullURL)) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return "", fmt.Errorf("Received non 200 response code, got %v", resp.StatusCode) + } + + cacheDir := getCacheDir() + err = checkDir(cacheDir) + if err != nil { + return "", err + } + + jarPath := filepath.Join(cacheDir, jarName) + + if jarExists(jarPath) { + return jarPath, nil + } Review comment: This should be the first part of the function, before the HTTP request. Part of the benefit of the cache is that one should be able to avoid making any HTTP requests if the cached jar is present. This also lets one unit test the cache functionality in the "cache hit" case, and not worry about the HTTP call. (Hurray coverage!) Now ideally, we'd be doing proper validation, checksum/hash validation, but this is a simple, trusting cache. (eg. If the jar is present, we should in principle be able to look up the checksum/hash with a different request to maven, and then if that also fails, fall back to re-downloading it, but that's not a v0 task.) ########## File path: sdks/go/pkg/beam/core/runtime/xlangx/expansionx/download_test.go ########## @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expansionx + +import ( + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestGetURLForBeamJar(t *testing.T) { + tests := []struct { + name string + target string + version string + expectedURL URL + }{ + { + "base", + ":sdks:java:fake", + "VERSION", + "https://repo.maven.apache.org/maven2/org/apache/beam/beam-sdks-java-fake/VERSION/beam-sdks-java-fake-VERSION.jar", + }, + { + "versioned", + ":sdks:java:fake", + "1.2.3", + "https://repo.maven.apache.org/maven2/org/apache/beam/beam-sdks-java-fake/1.2.3/beam-sdks-java-fake-1.2.3.jar", + }, + } + for _, test := range tests { + madeURL, _ := getURLForBeamJar(test.target, test.version) + if madeURL != test.expectedURL { + t.Errorf("test %v failed: wanted URL %v, got %v", test.name, test.expectedURL, madeURL) + } + } +} + +func TestDropEndOfGradleTarget(t *testing.T) { + target := ":sdks:java:fake:runFake" + expected := ":sdks:java:fake" + returned := dropEndOfGradleTarget(target) + if returned != expected { + t.Errorf("wanted %v, got %v", expected, returned) + } +} + +func TestGetCacheDir(t *testing.T) { + cacheDir := getCacheDir() + if !strings.Contains(cacheDir, jarCache[2:]) { + t.Errorf("failed to get cache directory: wanted %v, got %v", jarCache[:2], cacheDir) + } +} + +func TestCheckDir(t *testing.T) { + d, err := ioutil.TempDir(os.Getenv("TEST_TMPDIR"), "expansionx-*") + if err != nil { + t.Fatalf("failed to make temp directory, got %v", err) + } + defer os.RemoveAll(d) Review comment: While in this instance, it should be identical, in tests, prefer using `t.Cleanup()`, since that allows the test framework to ensure the cleanup happens, rather than function exit. Here and the other test defers. eg. `t.Cleanup(func() { os.RemoveAll(d) })` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
