[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-08-02 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=130588&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130588
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 02/Aug/18 18:57
Start Date: 02/Aug/18 18:57
Worklog Time Spent: 10m 
  Work Description: herohde closed pull request #5884: [BEAM-4727] Re-use 
metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/go/pkg/beam/core/metrics/metrics.go 
b/sdks/go/pkg/beam/core/metrics/metrics.go
index 1899cc89971..d00f1f04afd 100644
--- a/sdks/go/pkg/beam/core/metrics/metrics.go
+++ b/sdks/go/pkg/beam/core/metrics/metrics.go
@@ -69,14 +69,53 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// beamCtx is a caching context for IDs necessary to place metric updates.
+//  Allocating contexts and searching for PTransformIDs for every element
+// is expensive, so we avoid it if possible.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Value lifts the beam contLift the keys value for faster lookups when not 
available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   // Checking for *beamCtx is an optimization, so we don't dig deeply
+   // for ids if not necessary.
+   if bctx, ok := ctx.(*beamCtx); ok {
+   return &beamCtx{Context: bctx.Context, bundleID: id, 
ptransformID: bctx.ptransformID}
+   }
+   return &beamCtx{Context: ctx, bundleID: id}
 }
 
 // SetPTransformID sets the id of the current PTransform.
 func SetPTransformID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, ptransformKey, id)
+   // Checking for *beamCtx is an optimization, so we don't dig deeply
+   // for ids if not necessary.
+   if bctx, ok := ctx.(*beamCtx); ok {
+   return &beamCtx{Context: bctx.Context, bundleID: bctx.bundleID, 
ptransformID: id}
+   }
+   return &beamCtx{Context: ctx, ptransformID: id}
 }
 
 func getContextKey(ctx context.Context, n name) key {
diff --git a/sdks/go/pkg/beam/core/runtime/exec/pardo.go 
b/sdks/go/pkg/beam/core/runtime/exec/pardo.go
index 41da9f241fa..f0e62cc3b81 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/pardo.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/pardo.go
@@ -45,8 +45,8 @@ type ParDo struct {
 
status Status
errerrorx.GuardedError
-
-   inv *invoker
+   ctxcontext.Context
+   inv*invoker
 }
 
 func (n *ParDo) ID() UnitID {
@@ -71,8 +71,12 @@ func (n *ParDo) StartBundle(ctx context.Context, id string, 
data DataManager) er
return fmt.Errorf("invalid status for pardo %v: %v, want Up", 
n.UID, n.status)
}
n.status = Active
+   // Allocating contexts all the time is expensive, but we seldom 
re-write them,
+   // and never accept modified contexts from users, so we will cache them 
per-bundle
+   // per-unit, to avoid the constant allocation overhead.
+   n.ctx = metrics.SetPTransformID(ctx, n.PID)
 
-   if err := MultiStartBundle(ctx, id, data, n.Out...); err != nil {
+   if err := MultiStartBundle(n.ctx, id, data, n.Out...); err != nil {
return n.fail(err)
}
 
@@ -87,7 +91,7 @@ func (n *ParDo) StartBundle(ctx context.Context, id string, 
data DataManager) er
 
// TODO(BEAM-3303): what to set for StartBundle/FinishBundle emitter 
timestamp?
 
-   if _, err := n.invokeDataFn(ctx, window.SingleGlobalWindow, 
mtime.ZeroTimestamp, n.Fn.StartBundleFn(), nil); err != nil {
+   if _, err := n.invokeDataFn(n.ctx, window.SingleGlobalWindow, 
mtime.ZeroTimestamp, n.Fn.StartBundleFn(), nil); err != nil {
return n.fail(err)
}

[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-08-02 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=130587&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130587
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 02/Aug/18 18:56
Start Date: 02/Aug/18 18:56
Worklog Time Spent: 10m 
  Work Description: herohde commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r203970121
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// beamCtx is a caching context for necessary IDs. Allocating contexts and 
searching
+// for PTransformIDs for every element is expensive, so we avoid it where 
possible.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Value lifts the beam contLift the keys value for faster lookups when not 
available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   if bctx, ok := ctx.(*beamCtx); ok {
+   return &beamCtx{Context: bctx, bundleID: id, ptransformID:  
bctx.ptransformID}
 
 Review comment:
   We can use "bctx.context" here is shorted the chain.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 130587)
Time Spent: 3h 50m  (was: 3h 40m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 3h 50m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-08-02 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=130580&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-130580
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 02/Aug/18 18:46
Start Date: 02/Aug/18 18:46
Worklog Time Spent: 10m 
  Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use 
metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#issuecomment-410029391
 
 
   Ping @herohde; I think I've covered all your comments in this iteration.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 130580)
Time Spent: 3h 40m  (was: 3.5h)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 3h 40m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-31 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129430&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129430
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 31/Jul/18 18:34
Start Date: 31/Jul/18 18:34
Worklog Time Spent: 10m 
  Work Description: lostluck commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r206610503
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   ptransformID = bctx.ptransformID
+   }
+   return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID}
 
 Review comment:
   Done.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 129430)
Time Spent: 3h 20m  (was: 3h 10m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 3h 20m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-31 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129431&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129431
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 31/Jul/18 18:34
Start Date: 31/Jul/18 18:34
Worklog Time Spent: 10m 
  Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use 
metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#issuecomment-409323843
 
 
   PTAL @herohde 


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 129431)
Time Spent: 3.5h  (was: 3h 20m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 3.5h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-31 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129427&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129427
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 31/Jul/18 18:34
Start Date: 31/Jul/18 18:34
Worklog Time Spent: 10m 
  Work Description: lostluck commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r206609509
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
 
 Review comment:
   Done.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 129427)
Time Spent: 2h 50m  (was: 2h 40m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 2h 50m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-31 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129428&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129428
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 31/Jul/18 18:34
Start Date: 31/Jul/18 18:34
Worklog Time Spent: 10m 
  Work Description: lostluck commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r206610122
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
 
 Review comment:
   Done.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 129428)
Time Spent: 3h  (was: 2h 50m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 3h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-31 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=129429&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-129429
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 31/Jul/18 18:34
Start Date: 31/Jul/18 18:34
Worklog Time Spent: 10m 
  Work Description: lostluck commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r206611230
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   ptransformID = bctx.ptransformID
+   }
+   return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID}
 }
 
 // SetPTransformID sets the id of the current PTransform.
 func SetPTransformID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, ptransformKey, id)
+   var bundleID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   bundleID = bctx.bundleID
+   }
+   return &beamCtx{Context: ctx, bundleID: bundleID, ptransformID: id}
 }
 
 func getContextKey(ctx context.Context, n name) key {
key := key{name: n, bundle: "(bundle id unset)", ptransform: 
"(ptransform id unset)"}
+   if bctx, ok := ctx.(*beamCtx); ok {
 
 Review comment:
   I think you're right. Removing this block doesn't hurt performance (measured 
via the pardo benchmark I added) and is too subtle. Removed!


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 129429)
Time Spent: 3h 10m  (was: 3h)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 3h 10m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-11 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=122039&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122039
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 11/Jul/18 20:43
Start Date: 11/Jul/18 20:43
Worklog Time Spent: 10m 
  Work Description: herohde closed pull request #5885: [BEAM-4727] Improve 
metric lookup overhead for predeclared metrics
URL: https://github.com/apache/beam/pull/5885
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/go/pkg/beam/core/metrics/metrics.go 
b/sdks/go/pkg/beam/core/metrics/metrics.go
index 73ef73a36b6..1899cc89971 100644
--- a/sdks/go/pkg/beam/core/metrics/metrics.go
+++ b/sdks/go/pkg/beam/core/metrics/metrics.go
@@ -130,12 +130,12 @@ var (
// simplifies code understanding, since each only contains a single 
type of
// cell.
 
-   // counters is a map[key]*counter
-   counters = sync.Map{}
-   // distributions is a map[key]*distribution
-   distributions = sync.Map{}
-   // gauges is a map[key]*gauge
-   gauges = sync.Map{}
+   countersMu  sync.RWMutex
+   counters= make(map[key]*counter)
+   distributionsMu sync.RWMutex
+   distributions   = make(map[key]*distribution)
+   gaugesMusync.RWMutex
+   gauges  = make(map[key]*gauge)
 )
 
 // TODO(lostluck): 2018/03/05 Use a common internal beam now() instead, once 
that exists.
@@ -144,37 +144,54 @@ var now = time.Now
 // Counter is a simple counter for incrementing and decrementing a value.
 type Counter struct {
name name
+   // The following are a fast cache of the key and storage
+   mu sync.Mutex
+   k  key
+   c  *counter
 }
 
-func (m Counter) String() string {
+func (m *Counter) String() string {
return fmt.Sprintf("Counter metric %s", m.name)
 }
 
 // NewCounter returns the Counter with the given namespace and name.
-func NewCounter(ns, n string) Counter {
+func NewCounter(ns, n string) *Counter {
mn := newName(ns, n)
-   return Counter{
+   return &Counter{
name: mn,
}
 }
 
 // Inc increments the counter within the given PTransform context by v.
-func (m Counter) Inc(ctx context.Context, v int64) {
+func (m *Counter) Inc(ctx context.Context, v int64) {
key := getContextKey(ctx, m.name)
cs := &counter{
value: v,
}
-   if m, loaded := counters.LoadOrStore(key, cs); loaded {
-   c := m.(*counter)
+   m.mu.Lock()
+   if m.k == key {
+   m.c.inc(v)
+   m.mu.Unlock()
+   return
+   }
+   m.k = key
+   countersMu.Lock()
+   if c, loaded := counters[key]; loaded {
+   m.c = c
+   countersMu.Unlock()
+   m.mu.Unlock()
c.inc(v)
-   } else {
-   c := m.(*counter)
-   storeMetric(key, c)
+   return
}
+   m.c = cs
+   counters[key] = cs
+   countersMu.Unlock()
+   m.mu.Unlock()
+   storeMetric(key, cs)
 }
 
 // Dec decrements the counter within the given PTransform context by v.
-func (m Counter) Dec(ctx context.Context, v int64) {
+func (m *Counter) Dec(ctx context.Context, v int64) {
m.Inc(ctx, -v)
 }
 
@@ -211,22 +228,27 @@ func (m *counter) toProto() *fnexecution_v1.Metrics_User {
 // Distribution is a simple distribution of values.
 type Distribution struct {
name name
+
+   // The following are a fast cache of the key and storage
+   mu sync.Mutex
+   k  key
+   d  *distribution
 }
 
-func (m Distribution) String() string {
+func (m *Distribution) String() string {
return fmt.Sprintf("Distribution metric %s", m.name)
 }
 
 // NewDistribution returns the Distribution with the given namespace and name.
-func NewDistribution(ns, n string) Distribution {
+func NewDistribution(ns, n string) *Distribution {
mn := newName(ns, n)
-   return Distribution{
+   return &Distribution{
name: mn,
}
 }
 
 // Update updates the distribution within the given PTransform context with v.
-func (m Distribution) Update(ctx context.Context, v int64) {
+func (m *Distribution) Update(ctx context.Context, v int64) {
key := getContextKey(ctx, m.name)
ds := &distribution{
count: 1,
@@ -234,13 +256,26 @@ func (m Distribution) Update(ctx context.Context, v 
int64) {
min:   v,
max:   v,
}
-   if m, loaded := distributions.LoadOrStore(key, ds); loaded {
-   d := m.(*distribution)

[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-11 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=122032&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122032
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 11/Jul/18 20:18
Start Date: 11/Jul/18 20:18
Worklog Time Spent: 10m 
  Work Description: herohde commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r201826123
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   ptransformID = bctx.ptransformID
+   }
+   return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID}
 
 Review comment:
   I was unclear. I was suggesting a simple improvement to these helpers avoid 
nesting beamCtxs by adding a sibling to the input chain of a beamCtx:
   
   func SetBundleID(ctx context.Context, id string) context.Context {
if bctx, ok := ctx.(*beamCtx); ok {
return &beamCtx{Context: bctx.ctx, bundleID: id, ptransformID: 
bctx.ptransformID}
}
return &beamCtx{Context: ctx, bundleID: id}
   }


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 122032)
Time Spent: 2.5h  (was: 2h 20m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 2.5h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-11 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=122010&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-122010
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 11/Jul/18 18:32
Start Date: 11/Jul/18 18:32
Worklog Time Spent: 10m 
  Work Description: lostluck commented on issue #5885: [BEAM-4727] Improve 
metric lookup overhead for predeclared metrics
URL: https://github.com/apache/beam/pull/5885#issuecomment-404268081
 
 
   PTAL @herohde Removed the other commits. Just the relevant changes now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 122010)
Time Spent: 2h 20m  (was: 2h 10m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 2h 20m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-11 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121964&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121964
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 11/Jul/18 17:17
Start Date: 11/Jul/18 17:17
Worklog Time Spent: 10m 
  Work Description: lostluck commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r201773998
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   ptransformID = bctx.ptransformID
+   }
+   return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID}
 
 Review comment:
   I considered that but then metrics following an emit would be incorrect, 
short of retaining the parent PTransform ID and re-adding that, which would 
work, but would be cumbersome. That approach is a viable alternative to caching 
the Context in pardo.go though as it accomplishes the same goal. 
   Do you prefer that approach?
   
   eg. re: correctness issue on ordering of event if the context isn't 
re-allocated:
   SDK sets PtransformID
   SDK calls User ParDo
   User ParDo1 emits - > SDK runs child ParDos which set their own PTransformIDs
   User ParDo1 logs a metric -> Metric is associated with the wrong PTransform.
   
   That is, we'd need to extract and re-assert the parent's PTransform ID 
before returning from the ParDo process elements.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 121964)
Time Spent: 2h 10m  (was: 2h)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 2h 10m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-10 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121647&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121647
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 10/Jul/18 21:59
Start Date: 10/Jul/18 21:59
Worklog Time Spent: 10m 
  Work Description: herohde commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r201507007
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   ptransformID = bctx.ptransformID
+   }
+   return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID}
 
 Review comment:
   We can also use the beamCtx's context as Context here to avoid nesting 
beamCtxs.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 121647)
Time Spent: 1h 50m  (was: 1h 40m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 1h 50m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-10 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121650&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121650
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 10/Jul/18 21:59
Start Date: 10/Jul/18 21:59
Worklog Time Spent: 10m 
  Work Description: herohde commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r201506572
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
 
 Review comment:
   The semantics here is subtle and deserves a comment. I would suggest making 
a note that checking for beamCtx here is just an optimization. Always using "" 
as ptransformID would work as well.
   
   Ditto for the other set helper.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 121650)
Time Spent: 2h  (was: 1h 50m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 2h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-10 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121649&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121649
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 10/Jul/18 21:59
Start Date: 10/Jul/18 21:59
Worklog Time Spent: 10m 
  Work Description: herohde commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r201505368
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   ptransformID = bctx.ptransformID
+   }
+   return &beamCtx{Context: ctx, bundleID: id, ptransformID: ptransformID}
 }
 
 // SetPTransformID sets the id of the current PTransform.
 func SetPTransformID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, ptransformKey, id)
+   var bundleID string
+   if bctx, ok := ctx.(*beamCtx); ok {
+   bundleID = bctx.bundleID
+   }
+   return &beamCtx{Context: ctx, bundleID: bundleID, ptransformID: id}
 }
 
 func getContextKey(ctx context.Context, n name) key {
key := key{name: n, bundle: "(bundle id unset)", ptransform: 
"(ptransform id unset)"}
+   if bctx, ok := ctx.(*beamCtx); ok {
 
 Review comment:
   This inserted block would be wrong, if we used context.WithValue on a 
beamCtx and went around the on-write caching. Is this block even needed given 
the behavior of beamCtx.Value as a genuine cache?
   
   At any rate, please document the behavior and assumptions of beamCtx. It 
feels subtle. Perhaps also add some test where we mix normal contexts and 
operations with beamCtx and its helpers. IMO it should be a semantic no-op.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 121649)
Time Spent: 2h  (was: 1h 50m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 2h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-10 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=121648&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-121648
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 10/Jul/18 21:59
Start Date: 10/Jul/18 21:59
Worklog Time Spent: 10m 
  Work Description: herohde commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r201483579
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
 
 Review comment:
   nit: beamCtx is ... Maybe also elaborate what it caches.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 121648)
Time Spent: 1h 50m  (was: 1h 40m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 1h 50m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-06 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119989&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119989
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 06/Jul/18 19:05
Start Date: 06/Jul/18 19:05
Worklog Time Spent: 10m 
  Work Description: lostluck commented on issue #5885: [BEAM-4727] Improve 
metric lookup overhead for predeclared metrics
URL: https://github.com/apache/beam/pull/5885#issuecomment-403121250
 
 
   PTAL @wcn3 


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 119989)
Time Spent: 1h 40m  (was: 1.5h)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 1h 40m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-06 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119988&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119988
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 06/Jul/18 19:05
Start Date: 06/Jul/18 19:05
Worklog Time Spent: 10m 
  Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use 
metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#issuecomment-403121189
 
 
   PTAL @wcn3 


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 119988)
Time Spent: 1.5h  (was: 1h 20m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 1.5h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-06 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119981&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119981
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 06/Jul/18 18:51
Start Date: 06/Jul/18 18:51
Worklog Time Spent: 10m 
  Work Description: lostluck commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r200740947
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
 
 Review comment:
   This is not an error.
   BundleID will in practice always be set before any ptransformID, but I don't 
like making that assumption.
   This approach is the "silently not have a ptransformId" variant.
   
   If this isn't a beamCtx that's OK, calling this method returns a beamCtx.
   
   If it's not a beamCtx, the Value method is invoked. If a beamCtx is anywhere 
on the stack, eventually the beamCtx.Value method will be called, which then 
delegates down the stack if the ctx doesn't have the value cached already, and 
then it caches it for later to prevent it from going deeper.
   The necessity of the cache is partly mitigated by retaining an earlier 
configured context in the execution node, but the cache is valuable in avoiding 
unnecessary type assertion overhead.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 119981)
Time Spent: 1h 20m  (was: 1h 10m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-06 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119974&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119974
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 06/Jul/18 18:41
Start Date: 06/Jul/18 18:41
Worklog Time Spent: 10m 
  Work Description: lostluck commented on a change in pull request #5885: 
[BEAM-4727] Improve metric lookup overhead for predeclared metrics
URL: https://github.com/apache/beam/pull/5885#discussion_r200738445
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -211,36 +268,57 @@ func (m *counter) toProto() *fnexecution_v1.Metrics_User 
{
 // Distribution is a simple distribution of values.
 type Distribution struct {
name name
+
+   // The following are a fast cache of the key and storage
+   mu sync.Mutex
+   k  key
+   d  *distribution
 }
 
-func (m Distribution) String() string {
+func (m *Distribution) String() string {
return fmt.Sprintf("Distribution metric %s", m.name)
 }
 
 // NewDistribution returns the Distribution with the given namespace and name.
-func NewDistribution(ns, n string) Distribution {
+func NewDistribution(ns, n string) *Distribution {
mn := newName(ns, n)
-   return Distribution{
+   return &Distribution{
name: mn,
}
 }
 
 // Update updates the distribution within the given PTransform context with v.
-func (m Distribution) Update(ctx context.Context, v int64) {
+func (m *Distribution) Update(ctx context.Context, v int64) {
key := getContextKey(ctx, m.name)
ds := &distribution{
count: 1,
sum:   v,
min:   v,
max:   v,
}
-   if m, loaded := distributions.LoadOrStore(key, ds); loaded {
-   d := m.(*distribution)
+   // if "collisions.distribution" == m.name.String() {
 
 Review comment:
   Done.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 119974)
Time Spent: 1h 10m  (was: 1h)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 1h 10m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-06 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119973&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119973
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 06/Jul/18 18:32
Start Date: 06/Jul/18 18:32
Worklog Time Spent: 10m 
  Work Description: wcn3 commented on a change in pull request #5885: 
[BEAM-4727] Improve metric lookup overhead for predeclared metrics
URL: https://github.com/apache/beam/pull/5885#discussion_r200736367
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -211,36 +268,57 @@ func (m *counter) toProto() *fnexecution_v1.Metrics_User 
{
 // Distribution is a simple distribution of values.
 type Distribution struct {
name name
+
+   // The following are a fast cache of the key and storage
+   mu sync.Mutex
+   k  key
+   d  *distribution
 }
 
-func (m Distribution) String() string {
+func (m *Distribution) String() string {
return fmt.Sprintf("Distribution metric %s", m.name)
 }
 
 // NewDistribution returns the Distribution with the given namespace and name.
-func NewDistribution(ns, n string) Distribution {
+func NewDistribution(ns, n string) *Distribution {
mn := newName(ns, n)
-   return Distribution{
+   return &Distribution{
name: mn,
}
 }
 
 // Update updates the distribution within the given PTransform context with v.
-func (m Distribution) Update(ctx context.Context, v int64) {
+func (m *Distribution) Update(ctx context.Context, v int64) {
key := getContextKey(ctx, m.name)
ds := &distribution{
count: 1,
sum:   v,
min:   v,
max:   v,
}
-   if m, loaded := distributions.LoadOrStore(key, ds); loaded {
-   d := m.(*distribution)
+   // if "collisions.distribution" == m.name.String() {
 
 Review comment:
   Please remove this.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 119973)
Time Spent: 1h  (was: 50m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 1h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-06 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119968&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119968
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 06/Jul/18 18:30
Start Date: 06/Jul/18 18:30
Worklog Time Spent: 10m 
  Work Description: wcn3 commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r200735970
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
 
 Review comment:
   Or is this the way it is because we don't want strict ordering between 
SetBundleID and SetPTransformID? If that's the case, that's worth documenting.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 119968)
Time Spent: 50m  (was: 40m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 50m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-06 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=119969&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-119969
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 06/Jul/18 18:30
Start Date: 06/Jul/18 18:30
Worklog Time Spent: 10m 
  Work Description: wcn3 commented on a change in pull request #5884: 
[BEAM-4727] Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#discussion_r200735666
 
 

 ##
 File path: sdks/go/pkg/beam/core/metrics/metrics.go
 ##
 @@ -69,18 +69,58 @@ type ctxKey string
 const bundleKey ctxKey = "beam:bundle"
 const ptransformKey ctxKey = "beam:ptransform"
 
+// context is a caching context since reads & writes are expensive.
+type beamCtx struct {
+   context.Context
+   bundleID, ptransformID string
+}
+
+// Lift the keys value for faster lookups when not available.
+func (ctx *beamCtx) Value(key interface{}) interface{} {
+   switch key {
+   case bundleKey:
+   if ctx.bundleID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.bundleID = id.(string)
+   }
+   }
+   return ctx.bundleID
+   case ptransformKey:
+   if ctx.ptransformID == "" {
+   if id := ctx.Value(key); id != nil {
+   ctx.ptransformID = id.(string)
+   }
+   }
+   return ctx.ptransformID
+   }
+   return ctx.Context.Value(key)
+}
+
 // SetBundleID sets the id of the current Bundle.
 func SetBundleID(ctx context.Context, id string) context.Context {
-   return context.WithValue(ctx, bundleKey, id)
+   var ptransformID string
+   if bctx, ok := ctx.(*beamCtx); ok {
 
 Review comment:
   This doesn't handle the case where the provided context isn't the beamCtx, 
and accordingly ptransformID wouldn't be set. Is this an error? Should we just 
silently not have a ptransformID?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 119969)
Time Spent: 50m  (was: 40m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 50m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-03 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118910&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118910
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 03/Jul/18 23:45
Start Date: 03/Jul/18 23:45
Worklog Time Spent: 10m 
  Work Description: lostluck commented on issue #5885: [BEAM-4727] Improve 
metric lookup overhead for predeclared metrics
URL: https://github.com/apache/beam/pull/5885#issuecomment-402322315
 
 
   R: @herohde 


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 118910)
Time Spent: 40m  (was: 0.5h)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 40m
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-03 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118908&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118908
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 03/Jul/18 23:45
Start Date: 03/Jul/18 23:45
Worklog Time Spent: 10m 
  Work Description: lostluck commented on issue #5884: [BEAM-4727] Re-use 
metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884#issuecomment-402322294
 
 
   R: @herohde 


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 118908)
Time Spent: 0.5h  (was: 20m)

> Reduce metrics overhead
> ---
>
> Key: BEAM-4727
> URL: https://issues.apache.org/jira/browse/BEAM-4727
> Project: Beam
>  Issue Type: Sub-task
>  Components: sdk-go
>Reporter: Robert Burke
>Assignee: Robert Burke
>Priority: Major
>  Time Spent: 0.5h
>  Remaining Estimate: 0h
>
> There are a few opportunities to avoid metrics overhead.
> First when setting state in the context, we allocate a new one for the stored 
> value, per element. This generates a fair amount of objects for the garbage 
> collector too. If we retain and re-use contexts within a bundle, we would 
> have the opportunity to save on these costs.
> Also, it's possible that we have overhead on the metric updating paths. We 
> can possibly do better than the general sync.Map, and avoid the type 
> assertion cost for extracting values of known types from the maps.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-03 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118887&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118887
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 03/Jul/18 22:48
Start Date: 03/Jul/18 22:48
Worklog Time Spent: 10m 
  Work Description: lostluck opened a new pull request #5885: [BEAM-4727] 
Improve metric lookup overhead for predeclared metrics
URL: https://github.com/apache/beam/pull/5885
 
 
   Introduces a performance for predeclared metrics when they're used within a 
single ptransform by caching the storage unit associated with them.
   Switched away from using sync.Map in favour of maps with locks. sync.Map 
doesn't handle high contention very well.
   
   Experimented with changing from a struct key to a string key, but string 
construction time was worse than directly using the struct. It remains 
unchanged.
   
   Builds on #5884
   
   Run on @lostluck's desktop:
   
   Before:
   BenchmarkMetrics/counter_inplace-12  1000   226 
ns/op  80 B/op  2 allocs/op
   BenchmarkMetrics/distribution_inplace-12 1000   231 
ns/op 112 B/op  2 allocs/op
   BenchmarkMetrics/gauge_inplace-12 500   252 
ns/op 112 B/op  2 allocs/op
   BenchmarkMetrics/counter_predeclared-12  1000   222 
ns/op  80 B/op  2 allocs/op
   BenchmarkMetrics/distribution_predeclared-12 1000
   228 ns/op 112 B/op  2 allocs/op
   BenchmarkMetrics/gauge_predeclared-12 500
   247 ns/op 112 B/op  2 allocs/op
   
   After:
BenchmarkMetrics/counter_inplace-12  500   
243 ns/op 128 B/op  2 allocs/op
BenchmarkMetrics/distribution_inplace-12 500   
252 ns/op 160 B/op  2 allocs/op
BenchmarkMetrics/gauge_inplace-12500   
266 ns/op 160 B/op  2 allocs/op
BenchmarkMetrics/counter_predeclared-12 2000
74.3 ns/op16 B/op  1 allocs/op
BenchmarkMetrics/distribution_predeclared-122000
79.6 ns/op48 B/op  1 allocs/op
BenchmarkMetrics/gauge_predeclared-12   2000
92.9 ns/op48 B/op  1 allocs/op
   
   
   Follow this checklist to help us incorporate your contribution quickly and 
easily:
   
- [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in 
ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA 
issue, if applicable. This will automatically link the pull request to the 
issue.
- [ ] If this contribution is large, please file an Apache [Individual 
Contributor License Agreement](https://www.apache.org/licenses/icla.pdf).
   
   It will help us expedite review of your Pull Request if you tag someone 
(e.g. `@username`) to look at it.
   
   Post-Commit Tests Status (on master branch)
   

   
   Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark
   --- | --- | --- | --- | --- | --- | --- | ---
   Go | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/)
 | --- | --- | --- | --- | --- | ---
   Java | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/b

[jira] [Work logged] (BEAM-4727) Reduce metrics overhead

2018-07-03 Thread ASF GitHub Bot (JIRA)


 [ 
https://issues.apache.org/jira/browse/BEAM-4727?focusedWorklogId=118843&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-118843
 ]

ASF GitHub Bot logged work on BEAM-4727:


Author: ASF GitHub Bot
Created on: 03/Jul/18 21:29
Start Date: 03/Jul/18 21:29
Worklog Time Spent: 10m 
  Work Description: lostluck opened a new pull request #5884: [BEAM-4727] 
Re-use metric context throughout bundle
URL: https://github.com/apache/beam/pull/5884
 
 
   There's overhead in re-allocating a context for every element, and since the 
Go SDK doesn't modify contexts at present, except to assign identifiers for 
metrics collection, avoiding the re-allocation costs this way seems reasonable.
   
   Builds off of #5882
   
   Before:
   BenchmarkParDo_EmitSumFn-12   100  1202 ns/op
 529 B/op
   After:
   BenchmarkParDo_EmitSumFn-12   100  1030 ns/op
 481 B/op  3 allocs/op
   
   
   Follow this checklist to help us incorporate your contribution quickly and 
easily:
   
- [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in 
ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA 
issue, if applicable. This will automatically link the pull request to the 
issue.
- [ ] If this contribution is large, please file an Apache [Individual 
Contributor License Agreement](https://www.apache.org/licenses/icla.pdf).
   
   It will help us expedite review of your Pull Request if you tag someone 
(e.g. `@username`) to look at it.
   
   Post-Commit Tests Status (on master branch)
   

   
   Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark
   --- | --- | --- | --- | --- | --- | --- | ---
   Go | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_GradleBuild/lastCompletedBuild/)
 | --- | --- | --- | --- | --- | ---
   Java | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_GradleBuild/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza_Gradle/lastCompletedBuild/)
 | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark_Gradle/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark_Gradle/lastCompletedBuild/)
   Python | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/)
 | --- | [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/)
  [![Build 
Status](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/)
 | --- | --- | --- | ---
   
   
   
   
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
---

Worklog Id: (was: 118843)
Time Spent: 10m
Remaining Estimate: 0h

> Reduce metrics overhead
> ---
>
> Key: BEAM-