tvalentyn commented on code in PR #16658:
URL: https://github.com/apache/beam/pull/16658#discussion_r973484944


##########
sdks/python/container/boot.go:
##########
@@ -137,46 +145,49 @@ func main() {
 
        options, err := provision.ProtoToJSON(info.GetPipelineOptions())
        if err != nil {
-               log.Fatalf("Failed to convert pipeline options: %v", err)
+               return fmt.Errorf("Failed to convert pipeline options: %v", err)
        }
 
        // (2) Retrieve and install the staged packages.
        //
-       // Guard from concurrent artifact retrieval and installation,
-       // when called by child processes in a worker pool.
+       // No log.Fatalf() from here on, otherwise deferred cleanups will not 
be called!
 
-       materializeArtifactsFunc := func() {
-               dir := filepath.Join(*semiPersistDir, "staged")
+       // Trap signals, so we can clean up properly.
+       signalChannel := make(chan os.Signal, 1)
+       signal.Notify(signalChannel, syscall.SIGHUP, syscall.SIGINT, 
syscall.SIGTERM)
 
-               files, err := artifact.Materialize(ctx, *artifactEndpoint, 
info.GetDependencies(), info.GetRetrievalToken(), dir)
-               if err != nil {
-                       log.Fatalf("Failed to retrieve staged files: %v", err)
-               }
+       venvDir, err := setupVenv(filepath.Join(*semiPersistDir, "beam-venv"), 
*id)
+       if err != nil {
+               return fmt.Errorf("Failed to initialize Python venv.")
+       }
+       cleanupFunc := func() {
+               os.RemoveAll(venvDir)
+               log.Printf("Cleaned up temporary venv for worker %v.", *id)
+       }
+       defer cleanupFunc()
 
-               // TODO(herohde): the packages to install should be specified 
explicitly. It
-               // would also be possible to install the SDK in the Dockerfile.
-               fileNames := make([]string, len(files))
-               requirementsFiles := []string{requirementsFile}
-               for i, v := range files {
-                       name, _ := artifact.MustExtractFilePayload(v)
-                       log.Printf("Found artifact: %s", name)
-                       fileNames[i] = name
-
-                       if v.RoleUrn == artifact.URNPipRequirementsFile {
-                               requirementsFiles = append(requirementsFiles, 
name)
-                       }
-               }
+       dir := filepath.Join(*semiPersistDir, "staged")
+       files, err := artifact.Materialize(ctx, *artifactEndpoint, 
info.GetDependencies(), info.GetRetrievalToken(), dir)
+       if err != nil {
+               return fmt.Errorf("Failed to retrieve staged files: %v", err)
+       }
+
+       // TODO(herohde): the packages to install should be specified 
explicitly. It
+       // would also be possible to install the SDK in the Dockerfile.
+       fileNames := make([]string, len(files))
+       requirementsFiles := []string{requirementsFile}
+       for i, v := range files {
+               name, _ := artifact.MustExtractFilePayload(v)
+               log.Printf("Found artifact: %s", name)
+               fileNames[i] = name
 
-               if setupErr := installSetupPackages(fileNames, dir, 
requirementsFiles); setupErr != nil {
-                       log.Fatalf("Failed to install required packages: %v", 
setupErr)
+               if v.RoleUrn == artifact.URNPipRequirementsFile {
+                       requirementsFiles = append(requirementsFiles, name)
                }
        }
 
-       workerPoolId := os.Getenv(workerPoolIdEnv)
-       if workerPoolId != "" {
-               multiProcessExactlyOnce(materializeArtifactsFunc, 
"beam.install.complete."+workerPoolId)

Review Comment:
   Is my understanding correct that the change in this PR is motivated by the 
scenario where `workerPool == true` ? I mean that the  `workerPool == true` 
case is the execution mode that is described in BEAM-12792 and that's the 
execution mode that benefits from having the environment created separately for 
each sdk worker.
      



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to