pitrou commented on code in PR #38272:
URL: https://github.com/apache/arrow/pull/38272#discussion_r1400930191


##########
cpp/src/arrow/util/compression_zlib.cc:
##########
@@ -392,40 +395,46 @@ class GZipCodec : public Codec {
       return 0;
     }
 
-    // Reset the stream for this block
-    if (inflateReset(&stream_) != Z_OK) {
-      return ZlibErrorPrefix("zlib inflateReset failed: ", stream_.msg);
-    }
+    // inflate() will not automatically decode concatenated gzip members, keep 
calling
+    // inflate until reading all input data

Review Comment:
   ```suggestion
       // inflate until reading all input data (GH-38271)
   ```



##########
cpp/src/parquet/reader_test.cc:
##########
@@ -778,6 +782,24 @@ TEST_F(TestCheckDataPageCrc, CorruptDict) {
   }
 }
 
+TEST(TestGzipMembersRead, TwoConcatenatedMembers) {
+  auto file_reader = ParquetFileReader::OpenFile(concatenated_gzip_members(),
+                                                 /*memory_map=*/false);
+  auto col_reader = std::dynamic_pointer_cast<TypedColumnReader<Int64Type>>(
+      file_reader->RowGroup(0)->Column(0));
+  int64_t num_values = 0;
+  int64_t num_repdef = 0;
+  std::vector<int16_t> reps(1024);
+  std::vector<int16_t> defs(1024);
+  std::vector<int64_t> vals(1024);
+
+  num_repdef = 
+      col_reader->ReadBatch(1024, defs.data(), reps.data(), vals.data(), 
&num_values);

Review Comment:
   Can you assert the value of `num_repdef`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to