fishy commented on a change in pull request #2292:
URL: https://github.com/apache/thrift/pull/2292#discussion_r540629906



##########
File path: lib/go/thrift/binary_protocol.go
##########
@@ -479,38 +486,21 @@ func (p *TBinaryProtocol) readAll(ctx context.Context, 
buf []byte) (err error) {
        return NewTProtocolException(err)
 }
 
-const readLimit = 32768
-
 func (p *TBinaryProtocol) readStringBody(size int32) (value string, err error) 
{
-       if size < 0 {
-               return "", nil
-       }
-
-       var (
-               buf bytes.Buffer
-               e   error
-               b   []byte
-       )
+       buf, err := safeReadBytes(size, p.trans)
+       return string(buf), NewTProtocolException(err)
+}
 
-       switch {
-       case int(size) <= len(p.buffer):
-               b = p.buffer[:size] // avoids allocation for small reads
-       case int(size) < readLimit:
-               b = make([]byte, size)
-       default:
-               b = make([]byte, readLimit)
+// This function is shared between TBinaryProtocol and TCompactProtocol.
+//
+// It tries to read size bytes from trans, in a way that prevents large
+// allocations when size is insanely large (mostly caused by malformed 
message).
+func safeReadBytes(size int32, trans io.Reader) ([]byte, error) {
+       if size < 0 {
+               return nil, nil
        }
 
-       for size > 0 {
-               _, e = io.ReadFull(p.trans, b)
-               buf.Write(b)
-               if e != nil {
-                       break
-               }
-               size -= readLimit
-               if size < readLimit && size > 0 {
-                       b = b[:size]
-               }
-       }
-       return buf.String(), NewTProtocolException(e)
+       buf := new(bytes.Buffer)

Review comment:
       What you suggested IS the large allocation when size is insanely large.

##########
File path: lib/go/thrift/binary_protocol.go
##########
@@ -432,6 +432,15 @@ func (p *TBinaryProtocol) ReadString(ctx context.Context) 
(value string, err err
                err = invalidDataLength
                return
        }
+       if size == 0 {
+               return "", nil
+       }
+       if size < int32(len(p.buffer)) {
+               // Avoid allocation on small reads
+               buf := p.buffer[:size]
+               read, e := io.ReadFull(p.trans, buf)
+               return string(buf[:read]), NewTProtocolException(e)
+       }

Review comment:
       This avoids the extra allocation (we reused `p.buffer`)
   
   `readStringBody` is a function attached to type `TBinaryProtocol`, so it 
cannot be used by `TCompactProtocol`. we cannot move it to top level function 
either as it's also used by other parts of `TBinaryProtocol`. I mean we can do 
that refactor, but consider it's just 4 lines it's not really worth it.

##########
File path: lib/go/thrift/binary_protocol.go
##########
@@ -479,38 +486,21 @@ func (p *TBinaryProtocol) readAll(ctx context.Context, 
buf []byte) (err error) {
        return NewTProtocolException(err)
 }
 
-const readLimit = 32768
-
 func (p *TBinaryProtocol) readStringBody(size int32) (value string, err error) 
{
-       if size < 0 {
-               return "", nil
-       }
-
-       var (
-               buf bytes.Buffer
-               e   error
-               b   []byte
-       )
+       buf, err := safeReadBytes(size, p.trans)
+       return string(buf), NewTProtocolException(err)
+}
 
-       switch {
-       case int(size) <= len(p.buffer):
-               b = p.buffer[:size] // avoids allocation for small reads
-       case int(size) < readLimit:
-               b = make([]byte, size)
-       default:
-               b = make([]byte, readLimit)
+// This function is shared between TBinaryProtocol and TCompactProtocol.
+//
+// It tries to read size bytes from trans, in a way that prevents large
+// allocations when size is insanely large (mostly caused by malformed 
message).

Review comment:
       It's in the details of the implementation of `io.CopyN` and 
`bytes.Buffer`. what they did is similar to the previous version, that they 
will do the copy chunk by chunk (and grow the buffer when necessary), instead 
of allocate the whole size up front.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to