joechenrh commented on code in PR #485:
URL: https://github.com/apache/arrow-go/pull/485#discussion_r2310638476
##########
parquet/file/column_reader.go:
##########
@@ -437,16 +446,27 @@ func (c *columnChunkReader) initDataDecoder(page Page,
lvlByteLen int64) error {
format.Encoding_DELTA_LENGTH_BYTE_ARRAY,
format.Encoding_DELTA_BINARY_PACKED,
format.Encoding_BYTE_STREAM_SPLIT:
- c.curDecoder =
c.decoderTraits.Decoder(parquet.Encoding(encoding), c.descr, false, c.mem)
- c.decoders[encoding] = c.curDecoder
+ c.curDecoder =
c.decoderTraits.Decoder(parquet.Encoding(enc), c.descr, false, c.mem)
+ c.decoders[enc] = c.curDecoder
case format.Encoding_RLE_DICTIONARY:
return errors.New("parquet: dictionary page must be
before data page")
default:
- return fmt.Errorf("parquet: unknown encoding type %s",
encoding)
+ return fmt.Errorf("parquet: unknown encoding type %s",
enc)
+ }
+ }
+
+ switch c.descr.PhysicalType() {
+ case parquet.Types.FixedLenByteArray:
+ c.curDecoder = &encoding.FixedLenByteArrayDecoderWrapper{
+ FixedLenByteArrayDecoder:
c.curDecoder.(encoding.FixedLenByteArrayDecoder),
+ }
+ case parquet.Types.ByteArray:
+ c.curDecoder = &encoding.ByteArrayDecoderWrapper{
+ ByteArrayDecoder:
c.curDecoder.(encoding.ByteArrayDecoder),
Review Comment:
```diff
From eec53776b4aba9af1011a5a419472768c7c4dd0e Mon Sep 17 00:00:00 2001
From: Ruihao Chen <[email protected]>
Date: Fri, 29 Aug 2025 12:41:17 -0400
Subject: [PATCH] Add simple test case
Signed-off-by: Ruihao Chen <[email protected]>
---
parquet/file/column_writer_test.go | 36 ++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/parquet/file/column_writer_test.go
b/parquet/file/column_writer_test.go
index 90b239e4..3670de55 100644
--- a/parquet/file/column_writer_test.go
+++ b/parquet/file/column_writer_test.go
@@ -56,6 +56,41 @@ const (
DictionaryPageSize = 1024 * 1024
)
+type simpleAllocator struct {
+ q chan []byte
+}
+
+func (a *simpleAllocator) Allocate(n int) []byte {
+ l := len(a.q)
+ for range l {
+ b := <-a.q
+ if cap(b) >= n {
+ return b[:n]
+ }
+ a.q <- b
+ }
+ return make([]byte, n)
+}
+
+func (a *simpleAllocator) Free(b []byte) {
+ if b == nil {
+ return
+ }
+ select {
+ case a.q <- b:
+ default: // discard if full
+ }
+}
+
+func (a *simpleAllocator) Reallocate(n int, old []byte) []byte {
+ a.Free(old)
+ return a.Allocate(n)
+}
+
+func newSimpleAllocator() *simpleAllocator {
+ return &simpleAllocator{q: make(chan []byte, 64)}
+}
+
type mockpagewriter struct {
mock.Mock
}
@@ -258,6 +293,7 @@ func (p *PrimitiveWriterTestSuite) TearDownTest() {
}
func (p *PrimitiveWriterTestSuite) buildReader(nrows int64, compression
compress.Compression) file.ColumnChunkReader {
+ mem := newSimpleAllocator()
p.readbuffer = p.sink.Finish()
pagereader, _ :=
file.NewPageReader(arrutils.NewByteReader(p.readbuffer.Bytes()), nrows,
compression, mem, nil)
return file.NewColumnReader(p.descr, pagereader, mem, &p.bufferPool)
--
2.47.3
```
I've crafted a customized allocator, which could make the UT in
column_writer_test on main branch fail.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]