Nataneljpwd commented on code in PR #58827:
URL: https://github.com/apache/airflow/pull/58827#discussion_r2574928382
##########
go-sdk/pkg/bundles/shared/discovery.go:
##########
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context)
error {
if err != nil {
self = ""
}
+ maxProcesses := runtime.GOMAXPROCS(0)
+ g, ctx := errgroup.WithContext(ctx)
Review Comment:
Why not just use plain goroutines with a channel sending the results instead?
##########
go-sdk/pkg/bundles/shared/discovery.go:
##########
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context)
error {
if err != nil {
self = ""
}
+ maxProcesses := runtime.GOMAXPROCS(0)
+ g, ctx := errgroup.WithContext(ctx)
+ g.SetLimit(maxProcesses)
Review Comment:
Isn't it already set or dynamically determined during runtime? Scaling
processes up or down to handle the workload and Io wait time?
##########
go-sdk/pkg/bundles/shared/discovery.go:
##########
@@ -147,60 +150,66 @@ func (d *Discovery) DiscoverBundles(ctx context.Context)
error {
if err != nil {
self = ""
}
+ maxProcesses := runtime.GOMAXPROCS(0)
+ g, ctx := errgroup.WithContext(ctx)
+ g.SetLimit(maxProcesses)
+ var mu sync.Mutex
for _, file := range files {
- if ctx.Err() != nil {
- // Check if we are done.
- return ctx.Err()
- }
-
- // Check if file is executable
- if !isExecutable(file) {
- continue
- }
+ g.Go(func() error {
+ if ctx.Err() != nil {
+ // Check if we are done.
+ return ctx.Err()
+ }
- abs, err := filepath.Abs(file)
- if err != nil {
- d.logger.Warn("Unable to load resolve file path",
"file", file, "err", err)
- continue
- }
- if self != "" && self == abs {
- d.logger.Warn("Not trying to load ourselves as a
plugin", "file", file)
- continue
- }
+ // Check if file is executable
+ if !isExecutable(file) {
+ return nil
+ }
- d.logger.Debug("Found potential bundle", slog.String("path",
file))
+ abs, err := filepath.Abs(file)
+ if err != nil {
+ d.logger.Warn("Unable to load resolve file
path", "file", file, "err", err)
+ return nil
+ }
+ if self != "" && self == abs {
+ d.logger.Warn("Not trying to load ourselves as
a plugin", "file", file)
+ return nil
+ }
- // TODO: Use a sync.WaitGroup to parallelize running multiple
procs without blowing concurrency up and fork-bombing
- // the host
- bundle, err := d.getBundleVersionInfo(file)
- if err != nil {
- d.logger.Warn("Unable to load BundleMetadata", "file",
file, "err", err)
- continue
- }
+ d.logger.Debug("Found potential bundle",
slog.String("path", file))
- versions, exists := d.bundles[bundle.Name]
- if !exists {
- versions = make(map[string]string)
- d.bundles[bundle.Name] = versions
- }
+ bundle, err := d.getBundleVersionInfo(file)
+ if err != nil {
+ d.logger.Warn("Unable to load BundleMetadata",
"file", file, "err", err)
+ return nil
+ }
+ mu.Lock()
+ versions, exists := d.bundles[bundle.Name]
+ if !exists {
+ versions = make(map[string]string)
+ d.bundles[bundle.Name] = versions
+ }
- var key string
- logAs := bundle.Name
- if bundle.Version != nil {
- key = *bundle.Version
- logAs = fmt.Sprintf("%s@%s", bundle.Name, key)
- }
- d.logger.Info(
- "Discovered bundle",
- "key",
- logAs,
- "file",
- file,
- )
- versions[key] = file
+ var key string
+ logAs := bundle.Name
+ if bundle.Version != nil {
+ key = *bundle.Version
+ logAs = fmt.Sprintf("%s@%s", bundle.Name, key)
+ }
+ d.logger.Info(
+ "Discovered bundle",
+ "key",
+ logAs,
+ "file",
+ file,
+ )
+ versions[key] = file
+ mu.Unlock()
+ return nil
+ })
}
- return nil
+ return g.Wait()
Review Comment:
Why not a simple wg for all processes?
Isn't it more idiomatic and simple?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]