tvalentyn commented on code in PR #16658:
URL: https://github.com/apache/beam/pull/16658#discussion_r973484944


##########
sdks/python/container/boot.go:
##########
@@ -137,46 +145,49 @@ func main() {
 
        options, err := provision.ProtoToJSON(info.GetPipelineOptions())
        if err != nil {
-               log.Fatalf("Failed to convert pipeline options: %v", err)
+               return fmt.Errorf("Failed to convert pipeline options: %v", err)
        }
 
        // (2) Retrieve and install the staged packages.
        //
-       // Guard from concurrent artifact retrieval and installation,
-       // when called by child processes in a worker pool.
+       // No log.Fatalf() from here on, otherwise deferred cleanups will not 
be called!
 
-       materializeArtifactsFunc := func() {
-               dir := filepath.Join(*semiPersistDir, "staged")
+       // Trap signals, so we can clean up properly.
+       signalChannel := make(chan os.Signal, 1)
+       signal.Notify(signalChannel, syscall.SIGHUP, syscall.SIGINT, 
syscall.SIGTERM)
 
-               files, err := artifact.Materialize(ctx, *artifactEndpoint, 
info.GetDependencies(), info.GetRetrievalToken(), dir)
-               if err != nil {
-                       log.Fatalf("Failed to retrieve staged files: %v", err)
-               }
+       venvDir, err := setupVenv(filepath.Join(*semiPersistDir, "beam-venv"), 
*id)
+       if err != nil {
+               return fmt.Errorf("Failed to initialize Python venv.")
+       }
+       cleanupFunc := func() {
+               os.RemoveAll(venvDir)
+               log.Printf("Cleaned up temporary venv for worker %v.", *id)
+       }
+       defer cleanupFunc()
 
-               // TODO(herohde): the packages to install should be specified 
explicitly. It
-               // would also be possible to install the SDK in the Dockerfile.
-               fileNames := make([]string, len(files))
-               requirementsFiles := []string{requirementsFile}
-               for i, v := range files {
-                       name, _ := artifact.MustExtractFilePayload(v)
-                       log.Printf("Found artifact: %s", name)
-                       fileNames[i] = name
-
-                       if v.RoleUrn == artifact.URNPipRequirementsFile {
-                               requirementsFiles = append(requirementsFiles, 
name)
-                       }
-               }
+       dir := filepath.Join(*semiPersistDir, "staged")
+       files, err := artifact.Materialize(ctx, *artifactEndpoint, 
info.GetDependencies(), info.GetRetrievalToken(), dir)
+       if err != nil {
+               return fmt.Errorf("Failed to retrieve staged files: %v", err)
+       }
+
+       // TODO(herohde): the packages to install should be specified 
explicitly. It
+       // would also be possible to install the SDK in the Dockerfile.
+       fileNames := make([]string, len(files))
+       requirementsFiles := []string{requirementsFile}
+       for i, v := range files {
+               name, _ := artifact.MustExtractFilePayload(v)
+               log.Printf("Found artifact: %s", name)
+               fileNames[i] = name
 
-               if setupErr := installSetupPackages(fileNames, dir, 
requirementsFiles); setupErr != nil {
-                       log.Fatalf("Failed to install required packages: %v", 
setupErr)
+               if v.RoleUrn == artifact.URNPipRequirementsFile {
+                       requirementsFiles = append(requirementsFiles, name)
                }
        }
 
-       workerPoolId := os.Getenv(workerPoolIdEnv)
-       if workerPoolId != "" {
-               multiProcessExactlyOnce(materializeArtifactsFunc, 
"beam.install.complete."+workerPoolId)

Review Comment:
   Is my understanding correct that we only need these changes in the flow for 
the scenario where `workerPool == true` ? 
     



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to