[ 
https://issues.apache.org/jira/browse/BEAM-11088?focusedWorklogId=631829&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-631829
 ]

ASF GitHub Bot logged work on BEAM-11088:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 30/Jul/21 18:40
            Start Date: 30/Jul/21 18:40
    Worklog Time Spent: 10m 
      Work Description: jrmccluskey commented on a change in pull request 
#15253:
URL: https://github.com/apache/beam/pull/15253#discussion_r680149037



##########
File path: sdks/go/pkg/beam/testing/teststream/teststream.go
##########
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package teststream contains code configuring the TestStream primitive for
+// use in testing code that is meant to be run on streaming data sources.
+// TestStream is not supported on the Go direct runner.
+package teststream
+
+import (
+       "bytes"
+       "fmt"
+
+       "github.com/apache/beam/sdks/go/pkg/beam"
+       "github.com/apache/beam/sdks/go/pkg/beam/core/graph/coder"
+       "github.com/apache/beam/sdks/go/pkg/beam/core/graph/mtime"
+       "github.com/apache/beam/sdks/go/pkg/beam/core/util/protox"
+
+       pipepb "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+)
+
+const urn = "beam:transform:teststream:v1"
+
+// Config holds information used to create a TestStreamPayload object.
+type Config struct {
+       ElmCoder  *coder.Coder
+       Events    []*pipepb.TestStreamPayload_Event
+       Endpoint  *pipepb.ApiServiceDescriptor
+       Watermark int64
+}
+
+// MakeConfig initializes a Config struct to begin inserting TestStream 
events/endpoints into.
+// Requires that users provide the coder for the elements they are trying to 
emit.
+func MakeConfig(c *coder.Coder) Config {
+       return Config{ElmCoder: c,
+               Events:    []*pipepb.TestStreamPayload_Event{},
+               Endpoint:  &pipepb.ApiServiceDescriptor{},
+               Watermark: 0,
+       }
+}
+
+// SetEndpoint sets a URL for a TestStreamService that will emit events 
instead of having them
+// defined manually. Currently does not support authentication, so the 
TestStreamService should
+// be accessed in a trusted context.
+func (c *Config) SetEndpoint(url string) {
+       c.Endpoint.Url = url
+}
+
+// CreatePayload converts the Config object into a TestStreamPayload to be 
sent to the runner.
+func (c *Config) CreatePayload() *pipepb.TestStreamPayload {
+       return &pipepb.TestStreamPayload{CoderId: "c0", Events: c.Events, 
Endpoint: c.Endpoint}
+}
+
+// AdvanceWatermark adds an event to the Config Events struct advancing the 
watermark for a PCollection
+// to the given timestamp. if the tag is empty, this is applied to the default 
PCollection. Timestamp is

Review comment:
       Done.

##########
File path: sdks/go/pkg/beam/testing/teststream/teststream.go
##########
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package teststream contains code configuring the TestStream primitive for
+// use in testing code that is meant to be run on streaming data sources.
+// TestStream is not supported on the Go direct runner.
+package teststream
+
+import (
+       "bytes"
+       "fmt"
+
+       "github.com/apache/beam/sdks/go/pkg/beam"
+       "github.com/apache/beam/sdks/go/pkg/beam/core/graph/coder"
+       "github.com/apache/beam/sdks/go/pkg/beam/core/graph/mtime"
+       "github.com/apache/beam/sdks/go/pkg/beam/core/util/protox"
+
+       pipepb "github.com/apache/beam/sdks/go/pkg/beam/model/pipeline_v1"
+)
+
+const urn = "beam:transform:teststream:v1"
+
+// Config holds information used to create a TestStreamPayload object.
+type Config struct {
+       ElmCoder  *coder.Coder
+       Events    []*pipepb.TestStreamPayload_Event
+       Endpoint  *pipepb.ApiServiceDescriptor
+       Watermark int64
+}
+
+// MakeConfig initializes a Config struct to begin inserting TestStream 
events/endpoints into.
+// Requires that users provide the coder for the elements they are trying to 
emit.
+func MakeConfig(c *coder.Coder) Config {
+       return Config{ElmCoder: c,
+               Events:    []*pipepb.TestStreamPayload_Event{},
+               Endpoint:  &pipepb.ApiServiceDescriptor{},
+               Watermark: 0,
+       }
+}
+
+// SetEndpoint sets a URL for a TestStreamService that will emit events 
instead of having them
+// defined manually. Currently does not support authentication, so the 
TestStreamService should
+// be accessed in a trusted context.
+func (c *Config) SetEndpoint(url string) {
+       c.Endpoint.Url = url
+}
+
+// CreatePayload converts the Config object into a TestStreamPayload to be 
sent to the runner.
+func (c *Config) CreatePayload() *pipepb.TestStreamPayload {
+       return &pipepb.TestStreamPayload{CoderId: "c0", Events: c.Events, 
Endpoint: c.Endpoint}
+}
+
+// AdvanceWatermark adds an event to the Config Events struct advancing the 
watermark for a PCollection
+// to the given timestamp. if the tag is empty, this is applied to the default 
PCollection. Timestamp is
+// in milliseconds
+func (c *Config) AdvanceWatermark(timestamp int64) error {
+       if c.Watermark >= timestamp {
+               return fmt.Errorf("watermark must be monotonally increasing, is 
at %v, got %v", c.Watermark, timestamp)
+       }
+       watermarkAdvance := 
&pipepb.TestStreamPayload_Event_AdvanceWatermark{NewWatermark: timestamp}
+       watermarkEvent := 
&pipepb.TestStreamPayload_Event_WatermarkEvent{WatermarkEvent: watermarkAdvance}
+       c.Events = append(c.Events, &pipepb.TestStreamPayload_Event{Event: 
watermarkEvent})
+       c.Watermark = timestamp
+       return nil
+}
+
+// AdvanceWatermarkToInfinity advances the watermark for the PCollection 
corresponding to the tag
+// to the maximum timestamp.
+func (c *Config) AdvanceWatermarkToInfinity() error {
+       return c.AdvanceWatermark(mtime.MaxTimestamp.Milliseconds())
+}
+
+// AdvanceProcessingTime adds an event into the Config Events struct advancing 
the processing time by a given
+// duration. This advancement is applied to all of the PCollections output by 
the TestStream.
+func (c *Config) AdvanceProcessingTime(duration int64) {
+       processingAdvance := 
&pipepb.TestStreamPayload_Event_AdvanceProcessingTime{AdvanceDuration: duration}
+       processingEvent := 
&pipepb.TestStreamPayload_Event_ProcessingTimeEvent{ProcessingTimeEvent: 
processingAdvance}
+       c.Events = append(c.Events, &pipepb.TestStreamPayload_Event{Event: 
processingEvent})
+}
+
+// AdvanceProcessingTimeToInfinity moves the TestStream processing time to the 
largest possible
+// timestamp.
+func (c *Config) AdvanceProcessingTimeToInfinity() {
+       c.AdvanceProcessingTime(mtime.MaxTimestamp.Milliseconds())
+}
+
+// AddElements adds a number of elements to the Config object at the specified 
timestamp.
+// The encoder will panic if there is a type mismatch between the provided 
coder and the
+// elements.
+func (c *Config) AddElements(timestamp int64, elements ...interface{}) error {
+       newElements := []*pipepb.TestStreamPayload_TimestampedElement{}
+       enc := beam.NewElementEncoder(c.ElmCoder.T.Type())
+       for _, e := range elements {
+               var buf bytes.Buffer
+               if err := enc.Encode(e, &buf); err != nil {
+                       return fmt.Errorf("encoding value %v failed, got %v", 
e, err)
+               }
+               newElements = append(newElements, 
&pipepb.TestStreamPayload_TimestampedElement{EncodedElement: buf.Bytes(), 
Timestamp: timestamp})
+       }
+       addElementsEvent := 
&pipepb.TestStreamPayload_Event_AddElements{Elements: newElements}
+       elementEvent := 
&pipepb.TestStreamPayload_Event_ElementEvent{ElementEvent: addElementsEvent}
+       c.Events = append(c.Events, &pipepb.TestStreamPayload_Event{Event: 
elementEvent})
+       return nil
+}
+
+// TestStream inserts a TestStream primitive into a pipeline, taking a scope 
and a Config object and
+// producing an array of output PCollections.
+func TestStream(s beam.Scope, c Config) []beam.PCollection {

Review comment:
       Done.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 631829)
    Time Spent: 1h 20m  (was: 1h 10m)

> [Go SDK] Implement TestStream primitive
> ---------------------------------------
>
>                 Key: BEAM-11088
>                 URL: https://issues.apache.org/jira/browse/BEAM-11088
>             Project: Beam
>          Issue Type: New Feature
>          Components: sdk-go
>            Reporter: Robert Burke
>            Assignee: Jack McCluskey
>            Priority: P3
>          Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> TestStream is a Test-Only primitive to help in verifying streaming SDK and 
> Runner semantics.
> It's a Known URN in the Beam Pipeline proto which Runners (like the Python 
> Portable Runner) can implement, and SDKs can configure to achieve desired 
> behavior.
> This task is to implement Test Stream so it can be added to SDK and user test 
> pipelines to simplify validating various SDK semantics. 
> This task is *not* to implement and support it in the Go Direct Runner at 
> this time, though a separate Jira can be filed for that. At least direct 
> runner is expected to fail, clearly saying it doesn't support the TestStream 
> primitive. (Further improving the Direct Runner is in issue 
> https://issues.apache.org/jira/browse/BEAM-11076)
> Implementing further allows large sections of tests already implemented in 
> [Python|https://github.com/apache/beam/search?l=Python&q=TestStream] and 
> [Java|https://github.com/apache/beam/search?l=Java&q=TestStream] to be 
> replicated in Go, to further improve confidence in the SDK implementation. 
> Care needs to be taken for this though, many of those tests also validate the 
> "runner" implementations of test stream itself, as Python and Java already 
> have runner implementations.
> To implement the Go SDK side of Test Stream look to the following:
>  [The original TestStream Blog 
> post|https://beam.apache.org/blog/2016/10/20/test-stream.html] describing 
> it's overall purpose.
> [TestStreamPayload|https://github.com/apache/beam/blob/43c97d811b9ec85116dbde49cde3f0718c2498ce/model/pipeline/src/main/proto/beam_runner_api.proto#L568]
>  message for configuring test streams. 
> [TEST_STREAM 
> Urn|https://github.com/apache/beam/blob/43c97d811b9ec85116dbde49cde3f0718c2498ce/model/pipeline/src/main/proto/beam_runner_api.proto#L266]
>  for adding the test stream primitive to the pipeline graph.
> Look at how [Reshuffle was added to the Go 
> SDK.|https://github.com/apache/beam/pull/11197/files] While Reshuffle isn't a 
> well known URN like TestStream is (and thus was more work) the same code end 
> up needing to be modified to allow user side specification.
>  * A [new TestStream 
> edge|https://github.com/apache/beam/pull/11197/files#diff-ad3762b94450801cd205383673b76f0cc7c7aebd5f55da4e1bd61aac6512fc2e]
>  needs to be added to the core/graph package. 
>  * Handling translation of that edge into the TestStreamPayload needs to 
> happen in the [runtime/graphx 
> package|https://github.com/apache/beam/pull/11197/files#diff-ba723a9194fc9c7dd64d4b22c76b83c55c92c006cfa2dd1c7e4072c5650f71b3]
>  * A user facing [beam package entry 
> point|https://github.com/apache/beam/pull/11197/files#diff-ba19ec6c6322550c7ee60adec55cf212ca9d40fcc8909be98231427da32e4710]
>  needs to be added with documentation so users can add TestStream to their 
> pipelines. 
>  * At that point, [integration 
> tests|https://github.com/apache/beam/tree/master/sdks/go/test/integration/primitives]
>  can begin to be added using the primitive, for supported runners. 
>  
> Included in this work is various convenience helper functions or libraries to 
> make using test stream simple to use for end users. This may include a new 
> user facing package with various options depended on by the beam, graph, and 
> graphx packages for configuration. 
> Like the rest of the SDK implementation, it's strongly recommended that Beam 
> Pipeline Protos are handled in the graphx package to avoid overly coupling on 
> a specific implementation of beam, should that change in the future.
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to