[ 
https://issues.apache.org/jira/browse/BEAM-9746?focusedWorklogId=422409&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-422409
 ]

ASF GitHub Bot logged work on BEAM-9746:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 14/Apr/20 22:55
            Start Date: 14/Apr/20 22:55
    Worklog Time Spent: 10m 
      Work Description: youngoli commented on pull request #11413: [BEAM-9746] 
check for 0 length copies from state
URL: https://github.com/apache/beam/pull/11413#discussion_r408477556
 
 

 ##########
 File path: sdks/go/pkg/beam/core/runtime/harness/statemgr_test.go
 ##########
 @@ -258,6 +261,167 @@ func TestStateChannel(t *testing.T) {
        }
 }
 
+// TestStateKeyReader validates ordinary Read cases
+func TestStateKeyReader(t *testing.T) {
+       const readLen = 4
+       tests := []struct {
+               name     string
+               buflens  []int // sizes of the buffers received on the state 
channel.
+               numReads int
+               closed   bool // tries to read from closed reader
+               noGet    bool // tries to read from nil get response reader
+       }{
+               {
+                       name:     "emptyData",
+                       buflens:  []int{-1},
+                       numReads: 1,
+               }, {
+                       name:     "singleBufferSingleRead",
+                       buflens:  []int{readLen},
+                       numReads: 2,
+               }, {
+                       name:     "singleBufferMultipleReads",
+                       buflens:  []int{2 * readLen},
+                       numReads: 3,
+               }, {
+                       name:     "singleBufferShortRead",
+                       buflens:  []int{readLen - 1},
+                       numReads: 2,
+               }, {
+                       name:     "multiBuffer",
+                       buflens:  []int{readLen, readLen},
+                       numReads: 3,
+               }, {
+                       name:     "multiBuffer-short-reads",
+                       buflens:  []int{readLen - 1, readLen - 1, readLen - 2},
+                       numReads: 4,
+               }, {
+                       name:     "emptyDataFirst", // Shouldn't happen, but 
not unreasonable to handle.
+                       buflens:  []int{-1, readLen, readLen},
+                       numReads: 4,
+               }, {
+                       name:     "emptyDataMid", // Shouldn't happen, but not 
unreasonable to handle.
+                       buflens:  []int{readLen, readLen, -1, readLen},
+                       numReads: 5,
+               }, {
+                       name:     "emptyDataLast", // Shouldn't happen, but not 
unreasonable to handle.
+                       buflens:  []int{readLen, readLen, -1},
+                       numReads: 3,
+               }, {
+                       name:     "emptyDataLast-short",
+                       buflens:  []int{3*readLen - 2, -1},
+                       numReads: 4,
+               }, {
+                       name:     "closed",
+                       buflens:  []int{-1, -1},
+                       numReads: 1,
+                       closed:   true,
+               }, {
+                       name:     "noGet",
+                       buflens:  []int{-1},
+                       numReads: 1,
+                       noGet:    true,
+               },
+       }
+       for _, test := range tests {
+               t.Run(test.name, func(t *testing.T) {
+                       ctx, cancelFn := 
context.WithCancel(context.Background())
+                       ch := &StateChannel{
+                               id:        "test",
+                               requests:  make(chan *fnpb.StateRequest),
+                               responses: make(map[string]chan<- 
*fnpb.StateResponse),
+                               cancelFn:  cancelFn,
+                               DoneCh:    ctx.Done(),
+                       }
+
+                       // Handle the channel behavior asynchronously.
+                       go func() {
+                               for i, buflen := range test.buflens {
+                                       token := []byte(strconv.Itoa(i))
+                                       var buf []byte
+                                       if buflen >= 0 {
+                                               buf = bytes.Repeat([]byte{42}, 
buflen)
+                                       }
+                                       // On the last request response pair, 
send no token.
+                                       if i+1 == len(test.buflens) {
+                                               token = nil
+                                       }
+
+                                       req := <-ch.requests
+
+                                       if test.noGet {
+                                               ch.responses[req.Id] <- 
&fnpb.StateResponse{
+                                                       Id: req.Id,
+                                               }
+                                               return
+                                       }
+
+                                       ch.responses[req.Id] <- 
&fnpb.StateResponse{
+                                               Id: req.Id,
+                                               Response: 
&fnpb.StateResponse_Get{
+                                                       Get: 
&fnpb.StateGetResponse{
+                                                               
ContinuationToken: token,
+                                                               Data:           
   buf,
+                                                       },
+                                               },
+                                       }
+                               }
+                       }()
+
+                       r := stateKeyReader{
+                               ch: ch,
+                       }
+
+                       if test.closed {
+                               err := r.Close()
+                               if err != nil {
+                                       t.Errorf("unexpected error on Close(), 
got %v", err)
+                               }
+                       }
+
+                       var totalBytes int
+                       for _, l := range test.buflens {
+                               if l > 0 {
+                                       totalBytes += l
+                               }
+                       }
+                       var finalerr error
+                       var count, reads int
+
+                       // Read all the bytes.
+                       for count <= totalBytes {
+                               reads++
+                               b := make([]byte, readLen) // io.Read is keyed 
off of length, not capacity.
+                               n, err := r.Read(b)
+                               if err != nil {
+                                       finalerr = err
+                                       break
+                               }
+                               count += n
+                               // Special check to avoid spurious zero 
elements.
+                               if count == totalBytes && n == 0 {
+                                       t.Error("expected byte count read, last 
read is 0, but no EOF")
+                               }
+                       }
+                       if got, want := reads, test.numReads; got != want {
+                               t.Errorf("read %d times, want %d", got, want)
+                       }
+                       if got, want := count, totalBytes; got != want {
+                               t.Errorf("read %v bytes, want %v", got, want)
+                       }
+                       if test.closed {
+                               if got, want := finalerr, errors.New("side 
input closed"); !contains(got, want) {
+                                       t.Errorf("got err %v, want to contain 
%v", got, want)
 
 Review comment:
   Nit: This error message could benefit from some escaped quotation marks 
around the %v, to make it explicit where the quoted error begins and ends. 
Alternatively, surrounding the quoted error with some newlines and tabs might 
work well too, like so:
   
   ```
   got err:
       <error message>
   want to contain:
       <error message>
   ```
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 422409)
    Time Spent: 2.5h  (was: 2h 20m)

> [Go SDK] Empty side inputs causing spurious zero elements
> ---------------------------------------------------------
>
>                 Key: BEAM-9746
>                 URL: https://issues.apache.org/jira/browse/BEAM-9746
>             Project: Beam
>          Issue Type: Improvement
>          Components: sdk-go
>            Reporter: Robert Burke
>            Assignee: Robert Burke
>            Priority: Major
>          Time Spent: 2.5h
>  Remaining Estimate: 0h
>
> A user discovered that empty side inputs would spuriously provide a single 
> zero element.
> The error was narrowed down to the Go SDK's state manager codeĀ  copying the 
> stateGetResponse data wasn't checking that the original data source even had 
> any bytes in it, leading it in particular to interpret length prefixed data 
> as having 0 length, which would cause zero value elements to be generated. 
> Notably, this caused empty strings.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to