[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=130588&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130588 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 02/Aug/18 18:57 Start Date: 02/Aug/18 18:57 Worklog Time Spent: 10m Work Description: herohde closed pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/go/pkg/beam/core/metrics/metrics.go b/sdks/go/pkg/beam/core/metrics/metrics.go index 1899cc89971..d00f1f04afd 100644 --- a/sdks/go/pkg/beam/core/metrics/metrics.go +++ b/sdks/go/pkg/beam/core/metrics/metrics.go @@ -69,14 +69,53 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// beamCtx is a caching context for IDs necessary to place metric updates. +// Allocating contexts and searching for PTransformIDs for every element +// is expensive, so we avoid it if possible. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Value lifts the beam contLift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + // Checking for *beamCtx is an optimization, so we don't dig deeply + // for ids if not necessary. + if bctx, ok := ctx.(*beamCtx); ok { + return &beamCtx{Context: bctx.Context, bundleID: id, ptransformID: bctx.ptransformID} + } + return &beamCtx{Context: ctx, bundleID: id} } // SetPTransformID sets the id of the current PTransform. func SetPTransformID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, ptransformKey, id) + // Checking for *beamCtx is an optimization, so we don't dig deeply + // for ids if not necessary. + if bctx, ok := ctx.(*beamCtx); ok { + return &beamCtx{Context: bctx.Context, bundleID: bctx.bundleID, ptransformID: id} + } + return &beamCtx{Context: ctx, ptransformID: id} } func getContextKey(ctx context.Context, n name) key { diff --git a/sdks/go/pkg/beam/core/runtime/exec/pardo.go b/sdks/go/pkg/beam/core/runtime/exec/pardo.go index 41da9f241fa..f0e62cc3b81 100644 --- a/sdks/go/pkg/beam/core/runtime/exec/pardo.go +++ b/sdks/go/pkg/beam/core/runtime/exec/pardo.go @@ -45,8 +45,8 @@ type ParDo struct { status Status errerrorx.GuardedError - - inv *invoker + ctxcontext.Context + inv*invoker } func (n *ParDo) ID() UnitID { @@ -71,8 +71,12 @@ func (n *ParDo) StartBundle(ctx context.Context, id string, data DataManager) er return fmt.Errorf("invalid status for pardo %v: %v, want Up", n.UID, n.status) } n.status = Active + // Allocating contexts all the time is expensive, but we seldom re-write them, + // and never accept modified contexts from users, so we will cache them per-bundle + // per-unit, to avoid the constant allocation overhead. + n.ctx = metrics.SetPTransformID(ctx, n.PID) - if err := MultiStartBundle(ctx, id, data, n.Out...); err != nil { + if err := MultiStartBundle(n.ctx, id, data, n.Out...); err != nil { return n.fail(err) } @@ -87,7 +91,7 @@ func (n *ParDo) StartBundle(ctx context.Context, id string, data DataManager) er // TODO(BEAM-3303): what to set for StartBundle/FinishBundle emitter timestamp? - if _, err := n.invokeDataFn(ctx, window.SingleGlobalWindow, mtime.ZeroTimestamp, n.Fn.StartBundleFn(), nil); err != nil { + if _, err := n.invokeDataFn(n.ctx, window.SingleGlobalWindow, mtime.ZeroTimestamp, n.Fn.StartBundleFn(), nil); err != nil { return n.fail(err) }
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=130587&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130587 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 02/Aug/18 18:56 Start Date: 02/Aug/18 18:56 Worklog Time Spent: 10m Work Description: herohde commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r203970121 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// beamCtx is a caching context for necessary IDs. Allocating contexts and searching +// for PTransformIDs for every element is expensive, so we avoid it where possible. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Value lifts the beam contLift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + if bctx, ok := ctx.(*beamCtx); ok { + return &beamCtx{Context: bctx, bundleID: id, ptransformID: bctx.ptransformID} Review comment: We can use "bctx.context" here is shorted the chain. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130587) Time Spent: 3h 50m (was: 3h 40m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 3h 50m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=130580&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130580 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 02/Aug/18 18:46 Start Date: 02/Aug/18 18:46 Worklog Time Spent: 10m Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#issuecomment-410029391 Ping @herohde; I think I've covered all your comments in this iteration. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 130580) Time Spent: 3h 40m (was: 3.5h) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 3h 40m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129430&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129430 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 31/Jul/18 18:34 Start Date: 31/Jul/18 18:34 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r206610503 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { + ptransformID = bctx.ptransformID + } + return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID} Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129430) Time Spent: 3h 20m (was: 3h 10m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 3h 20m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129431&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129431 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 31/Jul/18 18:34 Start Date: 31/Jul/18 18:34 Worklog Time Spent: 10m Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#issuecomment-409323843 PTAL @herohde This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129431) Time Spent: 3.5h (was: 3h 20m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 3.5h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129427&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129427 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 31/Jul/18 18:34 Start Date: 31/Jul/18 18:34 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r206609509 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129427) Time Spent: 2h 50m (was: 2h 40m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 2h 50m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129428&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129428 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 31/Jul/18 18:34 Start Date: 31/Jul/18 18:34 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r206610122 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129428) Time Spent: 3h (was: 2h 50m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 3h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129429&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129429 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 31/Jul/18 18:34 Start Date: 31/Jul/18 18:34 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r206611230 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { + ptransformID = bctx.ptransformID + } + return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID} } // SetPTransformID sets the id of the current PTransform. func SetPTransformID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, ptransformKey, id) + var bundleID string + if bctx, ok := ctx.(*beamCtx); ok { + bundleID = bctx.bundleID + } + return &beamCtx{Context: ctx, bundleID: bundleID, ptransformID: id} } func getContextKey(ctx context.Context, n name) key { key := key{name: n, bundle: "(bundle id unset)", ptransform: "(ptransform id unset)"} + if bctx, ok := ctx.(*beamCtx); ok { Review comment: I think you're right. Removing this block doesn't hurt performance (measured via the pardo benchmark I added) and is too subtle. Removed! This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 129429) Time Spent: 3h 10m (was: 3h) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 3h 10m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=122039&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122039 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 11/Jul/18 20:43 Start Date: 11/Jul/18 20:43 Worklog Time Spent: 10m Work Description: herohde closed pull request #5885: [BEAM-4727] Improve metric lookup overhead for predeclared metrics URL: https://github.com/apache/beam/pull/5885 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/go/pkg/beam/core/metrics/metrics.go b/sdks/go/pkg/beam/core/metrics/metrics.go index 73ef73a36b6..1899cc89971 100644 --- a/sdks/go/pkg/beam/core/metrics/metrics.go +++ b/sdks/go/pkg/beam/core/metrics/metrics.go @@ -130,12 +130,12 @@ var ( // simplifies code understanding, since each only contains a single type of // cell. - // counters is a map[key]*counter - counters = sync.Map{} - // distributions is a map[key]*distribution - distributions = sync.Map{} - // gauges is a map[key]*gauge - gauges = sync.Map{} + countersMu sync.RWMutex + counters= make(map[key]*counter) + distributionsMu sync.RWMutex + distributions = make(map[key]*distribution) + gaugesMusync.RWMutex + gauges = make(map[key]*gauge) ) // TODO(lostluck): 2018/03/05 Use a common internal beam now() instead, once that exists. @@ -144,37 +144,54 @@ var now = time.Now // Counter is a simple counter for incrementing and decrementing a value. type Counter struct { name name + // The following are a fast cache of the key and storage + mu sync.Mutex + k key + c *counter } -func (m Counter) String() string { +func (m *Counter) String() string { return fmt.Sprintf("Counter metric %s", m.name) } // NewCounter returns the Counter with the given namespace and name. -func NewCounter(ns, n string) Counter { +func NewCounter(ns, n string) *Counter { mn := newName(ns, n) - return Counter{ + return &Counter{ name: mn, } } // Inc increments the counter within the given PTransform context by v. -func (m Counter) Inc(ctx context.Context, v int64) { +func (m *Counter) Inc(ctx context.Context, v int64) { key := getContextKey(ctx, m.name) cs := &counter{ value: v, } - if m, loaded := counters.LoadOrStore(key, cs); loaded { - c := m.(*counter) + m.mu.Lock() + if m.k == key { + m.c.inc(v) + m.mu.Unlock() + return + } + m.k = key + countersMu.Lock() + if c, loaded := counters[key]; loaded { + m.c = c + countersMu.Unlock() + m.mu.Unlock() c.inc(v) - } else { - c := m.(*counter) - storeMetric(key, c) + return } + m.c = cs + counters[key] = cs + countersMu.Unlock() + m.mu.Unlock() + storeMetric(key, cs) } // Dec decrements the counter within the given PTransform context by v. -func (m Counter) Dec(ctx context.Context, v int64) { +func (m *Counter) Dec(ctx context.Context, v int64) { m.Inc(ctx, -v) } @@ -211,22 +228,27 @@ func (m *counter) toProto() *fnexecution_v1.Metrics_User { // Distribution is a simple distribution of values. type Distribution struct { name name + + // The following are a fast cache of the key and storage + mu sync.Mutex + k key + d *distribution } -func (m Distribution) String() string { +func (m *Distribution) String() string { return fmt.Sprintf("Distribution metric %s", m.name) } // NewDistribution returns the Distribution with the given namespace and name. -func NewDistribution(ns, n string) Distribution { +func NewDistribution(ns, n string) *Distribution { mn := newName(ns, n) - return Distribution{ + return &Distribution{ name: mn, } } // Update updates the distribution within the given PTransform context with v. -func (m Distribution) Update(ctx context.Context, v int64) { +func (m *Distribution) Update(ctx context.Context, v int64) { key := getContextKey(ctx, m.name) ds := &distribution{ count: 1, @@ -234,13 +256,26 @@ func (m Distribution) Update(ctx context.Context, v int64) { min: v, max: v, } - if m, loaded := distributions.LoadOrStore(key, ds); loaded { - d := m.(*distribution)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=122032&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122032 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 11/Jul/18 20:18 Start Date: 11/Jul/18 20:18 Worklog Time Spent: 10m Work Description: herohde commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r201826123 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { + ptransformID = bctx.ptransformID + } + return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID} Review comment: I was unclear. I was suggesting a simple improvement to these helpers avoid nesting beamCtxs by adding a sibling to the input chain of a beamCtx: func SetBundleID(ctx context.Context, id string) context.Context { if bctx, ok := ctx.(*beamCtx); ok { return &beamCtx{Context: bctx.ctx, bundleID: id, ptransformID: bctx.ptransformID} } return &beamCtx{Context: ctx, bundleID: id} } This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122032) Time Spent: 2.5h (was: 2h 20m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 2.5h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=122010&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122010 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 11/Jul/18 18:32 Start Date: 11/Jul/18 18:32 Worklog Time Spent: 10m Work Description: lostluck commented on issue #5885: [BEAM-4727] Improve metric lookup overhead for predeclared metrics URL: https://github.com/apache/beam/pull/5885#issuecomment-404268081 PTAL @herohde Removed the other commits. Just the relevant changes now. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 122010) Time Spent: 2h 20m (was: 2h 10m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 2h 20m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121964&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121964 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 11/Jul/18 17:17 Start Date: 11/Jul/18 17:17 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r201773998 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { + ptransformID = bctx.ptransformID + } + return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID} Review comment: I considered that but then metrics following an emit would be incorrect, short of retaining the parent PTransform ID and re-adding that, which would work, but would be cumbersome. That approach is a viable alternative to caching the Context in pardo.go though as it accomplishes the same goal. Do you prefer that approach? eg. re: correctness issue on ordering of event if the context isn't re-allocated: SDK sets PtransformID SDK calls User ParDo User ParDo1 emits - > SDK runs child ParDos which set their own PTransformIDs User ParDo1 logs a metric -> Metric is associated with the wrong PTransform. That is, we'd need to extract and re-assert the parent's PTransform ID before returning from the ParDo process elements. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121964) Time Spent: 2h 10m (was: 2h) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 2h 10m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121647&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121647 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 10/Jul/18 21:59 Start Date: 10/Jul/18 21:59 Worklog Time Spent: 10m Work Description: herohde commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r201507007 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { + ptransformID = bctx.ptransformID + } + return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID} Review comment: We can also use the beamCtx's context as Context here to avoid nesting beamCtxs. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121647) Time Spent: 1h 50m (was: 1h 40m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 1h 50m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121650&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121650 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 10/Jul/18 21:59 Start Date: 10/Jul/18 21:59 Worklog Time Spent: 10m Work Description: herohde commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r201506572 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { Review comment: The semantics here is subtle and deserves a comment. I would suggest making a note that checking for beamCtx here is just an optimization. Always using "" as ptransformID would work as well. Ditto for the other set helper. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121650) Time Spent: 2h (was: 1h 50m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 2h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121649&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121649 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 10/Jul/18 21:59 Start Date: 10/Jul/18 21:59 Worklog Time Spent: 10m Work Description: herohde commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r201505368 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { + ptransformID = bctx.ptransformID + } + return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID} } // SetPTransformID sets the id of the current PTransform. func SetPTransformID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, ptransformKey, id) + var bundleID string + if bctx, ok := ctx.(*beamCtx); ok { + bundleID = bctx.bundleID + } + return &beamCtx{Context: ctx, bundleID: bundleID, ptransformID: id} } func getContextKey(ctx context.Context, n name) key { key := key{name: n, bundle: "(bundle id unset)", ptransform: "(ptransform id unset)"} + if bctx, ok := ctx.(*beamCtx); ok { Review comment: This inserted block would be wrong, if we used context.WithValue on a beamCtx and went around the on-write caching. Is this block even needed given the behavior of beamCtx.Value as a genuine cache? At any rate, please document the behavior and assumptions of beamCtx. It feels subtle. Perhaps also add some test where we mix normal contexts and operations with beamCtx and its helpers. IMO it should be a semantic no-op. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121649) Time Spent: 2h (was: 1h 50m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 2h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121648&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121648 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 10/Jul/18 21:59 Start Date: 10/Jul/18 21:59 Worklog Time Spent: 10m Work Description: herohde commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r201483579 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. Review comment: nit: beamCtx is ... Maybe also elaborate what it caches. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 121648) Time Spent: 1h 50m (was: 1h 40m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 1h 50m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119989&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119989 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 06/Jul/18 19:05 Start Date: 06/Jul/18 19:05 Worklog Time Spent: 10m Work Description: lostluck commented on issue #5885: [BEAM-4727] Improve metric lookup overhead for predeclared metrics URL: https://github.com/apache/beam/pull/5885#issuecomment-403121250 PTAL @wcn3 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119989) Time Spent: 1h 40m (was: 1.5h) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 1h 40m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119988&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119988 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 06/Jul/18 19:05 Start Date: 06/Jul/18 19:05 Worklog Time Spent: 10m Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#issuecomment-403121189 PTAL @wcn3 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119988) Time Spent: 1.5h (was: 1h 20m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 1.5h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119981&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119981 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 06/Jul/18 18:51 Start Date: 06/Jul/18 18:51 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r200740947 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { Review comment: This is not an error. BundleID will in practice always be set before any ptransformID, but I don't like making that assumption. This approach is the "silently not have a ptransformId" variant. If this isn't a beamCtx that's OK, calling this method returns a beamCtx. If it's not a beamCtx, the Value method is invoked. If a beamCtx is anywhere on the stack, eventually the beamCtx.Value method will be called, which then delegates down the stack if the ctx doesn't have the value cached already, and then it caches it for later to prevent it from going deeper. The necessity of the cache is partly mitigated by retaining an earlier configured context in the execution node, but the cache is valuable in avoiding unnecessary type assertion overhead. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119981) Time Spent: 1h 20m (was: 1h 10m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 1h 20m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119974&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119974 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 06/Jul/18 18:41 Start Date: 06/Jul/18 18:41 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #5885: [BEAM-4727] Improve metric lookup overhead for predeclared metrics URL: https://github.com/apache/beam/pull/5885#discussion_r200738445 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -211,36 +268,57 @@ func (m *counter) toProto() *fnexecution_v1.Metrics_User { // Distribution is a simple distribution of values. type Distribution struct { name name + + // The following are a fast cache of the key and storage + mu sync.Mutex + k key + d *distribution } -func (m Distribution) String() string { +func (m *Distribution) String() string { return fmt.Sprintf("Distribution metric %s", m.name) } // NewDistribution returns the Distribution with the given namespace and name. -func NewDistribution(ns, n string) Distribution { +func NewDistribution(ns, n string) *Distribution { mn := newName(ns, n) - return Distribution{ + return &Distribution{ name: mn, } } // Update updates the distribution within the given PTransform context with v. -func (m Distribution) Update(ctx context.Context, v int64) { +func (m *Distribution) Update(ctx context.Context, v int64) { key := getContextKey(ctx, m.name) ds := &distribution{ count: 1, sum: v, min: v, max: v, } - if m, loaded := distributions.LoadOrStore(key, ds); loaded { - d := m.(*distribution) + // if "collisions.distribution" == m.name.String() { Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119974) Time Spent: 1h 10m (was: 1h) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 1h 10m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119973&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119973 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 06/Jul/18 18:32 Start Date: 06/Jul/18 18:32 Worklog Time Spent: 10m Work Description: wcn3 commented on a change in pull request #5885: [BEAM-4727] Improve metric lookup overhead for predeclared metrics URL: https://github.com/apache/beam/pull/5885#discussion_r200736367 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -211,36 +268,57 @@ func (m *counter) toProto() *fnexecution_v1.Metrics_User { // Distribution is a simple distribution of values. type Distribution struct { name name + + // The following are a fast cache of the key and storage + mu sync.Mutex + k key + d *distribution } -func (m Distribution) String() string { +func (m *Distribution) String() string { return fmt.Sprintf("Distribution metric %s", m.name) } // NewDistribution returns the Distribution with the given namespace and name. -func NewDistribution(ns, n string) Distribution { +func NewDistribution(ns, n string) *Distribution { mn := newName(ns, n) - return Distribution{ + return &Distribution{ name: mn, } } // Update updates the distribution within the given PTransform context with v. -func (m Distribution) Update(ctx context.Context, v int64) { +func (m *Distribution) Update(ctx context.Context, v int64) { key := getContextKey(ctx, m.name) ds := &distribution{ count: 1, sum: v, min: v, max: v, } - if m, loaded := distributions.LoadOrStore(key, ds); loaded { - d := m.(*distribution) + // if "collisions.distribution" == m.name.String() { Review comment: Please remove this. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119973) Time Spent: 1h (was: 50m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 1h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119968&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119968 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 06/Jul/18 18:30 Start Date: 06/Jul/18 18:30 Worklog Time Spent: 10m Work Description: wcn3 commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r200735970 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { Review comment: Or is this the way it is because we don't want strict ordering between SetBundleID and SetPTransformID? If that's the case, that's worth documenting. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119968) Time Spent: 50m (was: 40m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 50m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119969&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119969 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 06/Jul/18 18:30 Start Date: 06/Jul/18 18:30 Worklog Time Spent: 10m Work Description: wcn3 commented on a change in pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#discussion_r200735666 ## File path: sdks/go/pkg/beam/core/metrics/metrics.go ## @@ -69,18 +69,58 @@ type ctxKey string const bundleKey ctxKey = "beam:bundle" const ptransformKey ctxKey = "beam:ptransform" +// context is a caching context since reads & writes are expensive. +type beamCtx struct { + context.Context + bundleID, ptransformID string +} + +// Lift the keys value for faster lookups when not available. +func (ctx *beamCtx) Value(key interface{}) interface{} { + switch key { + case bundleKey: + if ctx.bundleID == "" { + if id := ctx.Value(key); id != nil { + ctx.bundleID = id.(string) + } + } + return ctx.bundleID + case ptransformKey: + if ctx.ptransformID == "" { + if id := ctx.Value(key); id != nil { + ctx.ptransformID = id.(string) + } + } + return ctx.ptransformID + } + return ctx.Context.Value(key) +} + // SetBundleID sets the id of the current Bundle. func SetBundleID(ctx context.Context, id string) context.Context { - return context.WithValue(ctx, bundleKey, id) + var ptransformID string + if bctx, ok := ctx.(*beamCtx); ok { Review comment: This doesn't handle the case where the provided context isn't the beamCtx, and accordingly ptransformID wouldn't be set. Is this an error? Should we just silently not have a ptransformID? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 119969) Time Spent: 50m (was: 40m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 50m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118910&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118910 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 03/Jul/18 23:45 Start Date: 03/Jul/18 23:45 Worklog Time Spent: 10m Work Description: lostluck commented on issue #5885: [BEAM-4727] Improve metric lookup overhead for predeclared metrics URL: https://github.com/apache/beam/pull/5885#issuecomment-402322315 R: @herohde This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 118910) Time Spent: 40m (was: 0.5h) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 40m > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118908&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118908 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 03/Jul/18 23:45 Start Date: 03/Jul/18 23:45 Worklog Time Spent: 10m Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884#issuecomment-402322294 R: @herohde This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 118908) Time Spent: 0.5h (was: 20m) > Reduce metrics overhead > --- > > Key: BEAM-4727 > URL: https://issues.apache.org/jira/browse/BEAM-4727 > Project: Beam > Issue Type: Sub-task > Components: sdk-go >Reporter: Robert Burke >Assignee: Robert Burke >Priority: Major > Time Spent: 0.5h > Remaining Estimate: 0h > > There are a few opportunities to avoid metrics overhead. > First when setting state in the context, we allocate a new one for the stored > value, per element. This generates a fair amount of objects for the garbage > collector too. If we retain and re-use contexts within a bundle, we would > have the opportunity to save on these costs. > Also, it's possible that we have overhead on the metric updating paths. We > can possibly do better than the general sync.Map, and avoid the type > assertion cost for extracting values of known types from the maps. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118887&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118887 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 03/Jul/18 22:48 Start Date: 03/Jul/18 22:48 Worklog Time Spent: 10m Work Description: lostluck opened a new pull request #5885: [BEAM-4727] Improve metric lookup overhead for predeclared metrics URL: https://github.com/apache/beam/pull/5885 Introduces a performance for predeclared metrics when they're used within a single ptransform by caching the storage unit associated with them. Switched away from using sync.Map in favour of maps with locks. sync.Map doesn't handle high contention very well. Experimented with changing from a struct key to a string key, but string construction time was worse than directly using the struct. It remains unchanged. Builds on #5884 Run on @lostluck's desktop: Before: BenchmarkMetrics/counter_inplace-12 1000 226 ns/op 80 B/op 2 allocs/op BenchmarkMetrics/distribution_inplace-12 1000 231 ns/op 112 B/op 2 allocs/op BenchmarkMetrics/gauge_inplace-12 500 252 ns/op 112 B/op 2 allocs/op BenchmarkMetrics/counter_predeclared-12 1000 222 ns/op 80 B/op 2 allocs/op BenchmarkMetrics/distribution_predeclared-12 1000 228 ns/op 112 B/op 2 allocs/op BenchmarkMetrics/gauge_predeclared-12 500 247 ns/op 112 B/op 2 allocs/op After: BenchmarkMetrics/counter_inplace-12 500 243 ns/op 128 B/op 2 allocs/op BenchmarkMetrics/distribution_inplace-12 500 252 ns/op 160 B/op 2 allocs/op BenchmarkMetrics/gauge_inplace-12500 266 ns/op 160 B/op 2 allocs/op BenchmarkMetrics/counter_predeclared-12 2000 74.3 ns/op16 B/op 1 allocs/op BenchmarkMetrics/distribution_predeclared-122000 79.6 ns/op48 B/op 1 allocs/op BenchmarkMetrics/gauge_predeclared-12 2000 92.9 ns/op48 B/op 1 allocs/op Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). It will help us expedite review of your Pull Request if you tag someone (e.g. `@username`) to look at it. Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/) | --- | --- | --- | --- | --- | --- Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/b
[jira] [Work logged] (BEAM-4727) Reduce metrics overhead
[ https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118843&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118843 ] ASF GitHub Bot logged work on BEAM-4727: Author: ASF GitHub Bot Created on: 03/Jul/18 21:29 Start Date: 03/Jul/18 21:29 Worklog Time Spent: 10m Work Description: lostluck opened a new pull request #5884: [BEAM-4727] Re-use metric context throughout bundle URL: https://github.com/apache/beam/pull/5884 There's overhead in re-allocating a context for every element, and since the Go SDK doesn't modify contexts at present, except to assign identifiers for metrics collection, avoiding the re-allocation costs this way seems reasonable. Builds off of #5882 Before: BenchmarkParDo_EmitSumFn-12 100 1202 ns/op 529 B/op After: BenchmarkParDo_EmitSumFn-12 100 1030 ns/op 481 B/op 3 allocs/op Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). It will help us expedite review of your Pull Request if you tag someone (e.g. `@username`) to look at it. Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/) | --- | --- | --- | --- | --- | --- Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza_Gradle/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark_Gradle/lastCompletedBuild/) Python | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/) | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/) [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/) | --- | --- | --- | --- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 118843) Time Spent: 10m Remaining Estimate: 0h > Reduce metrics overhead > --- > > Key: BEAM-