mapleFU commented on code in PR #15241:
URL: https://github.com/apache/arrow/pull/15241#discussion_r1066046136


##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -1427,5 +1429,66 @@ TYPED_TEST(TestDeltaBitPackEncoding, BasicRoundTrip) {
   }
 }
 
+TEST(DeltaBitPackEncoding, MalfordMiniblockBitWidth) {
+  std::shared_ptr<ColumnDescriptor> descr = ExampleDescr<Int32Type>();
+  auto decoder = MakeTypedDecoder<Int32Type>(Encoding::DELTA_BINARY_PACKED, 
descr.get());
+  using c_type = parquet::Int32Type::c_type;
+
+  int encode_buffer_size = 274;
+  int num_values = 65;
+  c_type* decode_buf = nullptr;
+  std::vector<uint8_t> output_bytes;
+  int values_decoded = 0;
+
+  // Both good_data and bad_data is a DELTA_BINARY_PACKED INT32 Page with 65 
values.
+  // For needed miniblocks, there bit-widths are all 32.
+  // There bit-widths for unneeded miniblocks are different:
+  // * good_data's unneeded bit-width is 0.
+  // * bad_data's unneeded bit-width is 165.
+  // We use Hex to represent good_data and bad_data.
+  {
+    char good_data_hex[] =
+        
"80010441BDA08DB708A488C0F1012020000089F79E53946D93CA242D48B21478591FDE6E4855CC03"
+        
"7024A4E8E56E1D20687C8BFB4DE880E73CE2E49E873BEB0B1DAD0E0169C7951C411CA76AD949D8ED"
+        
"FD9E07A485DF4410B4486253A6280335BB87494B7D61E40A8D487731BBC83A37E1E8EE2F5431150C"
+        
"F46D7224B3C7C271AC3220F2C6663289427E1CE3B00F8C34D5DDA845664AF36F56ACF6FED2339911"
+        
"7AB54F6667A80EA78D08DC61B4262CE28960B46B2D93785ED9933175A910415B50B30E9889C70ECD"
+        
"84C8856531E78B4E05B2AB27B1233E4BAB722869D785F5B8B1049955EFE200000000C6C2201E1836"
+        "C0579B202A6A7395C761044AD151D5CEE992CBBA5AE937E54F29D28F86CC0627B9D1";
+    unsigned char good_data[274];
+    for (int i = 0; i < 274; ++i) {
+      auto s = ::arrow::ParseHexValue(&good_data_hex[i * 2], &good_data[i]);
+      ASSERT_TRUE(s.ok());
+    }
+
+    output_bytes = std::vector<uint8_t>(num_values * sizeof(c_type));
+    decode_buf = reinterpret_cast<c_type*>(output_bytes.data());
+    decoder->SetData(num_values, &good_data[0], encode_buffer_size);
+    values_decoded = decoder->Decode(decode_buf, num_values);
+    ASSERT_EQ(num_values, values_decoded);
+  }
+
+  {
+    char bad_data_hex[] =
+        
"80010441BDA08DB708A488C0F1012020A5A589F79E53946D93CA242D48B21478591FDE6E4855CC03"
+        
"7024A4E8E56E1D20687C8BFB4DE880E73CE2E49E873BEB0B1DAD0E0169C7951C411CA76AD949D8ED"
+        
"FD9E07A485DF4410B4486253A6280335BB87494B7D61E40A8D487731BBC83A37E1E8EE2F5431150C"
+        
"F46D7224B3C7C271AC3220F2C6663289427E1CE3B00F8C34D5DDA845664AF36F56ACF6FED2339911"
+        
"7AB54F6667A80EA78D08DC61B4262CE28960B46B2D93785ED9933175A910415B50B30E9889C70ECD"
+        
"84C8856531E78B4E05B2AB27B1233E4BAB722869D785F5B8B1049955EFE200000000C6C2201E1836"
+        "C0579B202A6A7395C761044AD151D5CEE992CBBA5AE937E54F29D28F86CC0627B9D1";

Review Comment:
   Other data is equal. (In fact, they're all generated by `this->Execute(65, 
1)` )



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to