pitrou commented on a change in pull request #10627:
URL: https://github.com/apache/arrow/pull/10627#discussion_r692035158



##########
File path: cpp/src/arrow/util/bit_util_test.cc
##########
@@ -1939,24 +1939,61 @@ TEST(BitUtil, RoundUpToPowerOf2) {
 #undef U64
 #undef S64
 
-static void TestZigZag(int32_t v) {
+static void TestZigZag(int32_t v, std::array<uint8_t, 5> buffer_expect) {
   uint8_t buffer[BitUtil::BitReader::kMaxVlqByteLength] = {};
   BitUtil::BitWriter writer(buffer, sizeof(buffer));
   BitUtil::BitReader reader(buffer, sizeof(buffer));
   writer.PutZigZagVlqInt(v);
+  EXPECT_EQ(buffer_expect[0], buffer[0]);
+  EXPECT_EQ(buffer_expect[1], buffer[1]);
+  EXPECT_EQ(buffer_expect[2], buffer[2]);
+  EXPECT_EQ(buffer_expect[3], buffer[3]);
+  EXPECT_EQ(buffer_expect[4], buffer[4]);

Review comment:
       Nit: can probably write `EXPECT_THAT(buffer, 
testing::ElementsAreArray(buffer_expect))`

##########
File path: cpp/src/parquet/arrow/arrow_reader_writer_test.cc
##########
@@ -4056,5 +4061,34 @@ TEST(TestArrowWriteDictionaries, NestedSubfield) {
   ::arrow::AssertTablesEqual(*table, *actual);
 }
 
+#ifdef ARROW_CSV
+TEST(TestArrowReadDeltaEncoding, DeltaBinaryPacked) {
+  auto file = test::get_data_file("delta_binary_packed.parquet");
+  auto expect_file = test::get_data_file("delta_binary_packed_expect.csv");
+  auto pool = ::arrow::default_memory_pool();
+  std::unique_ptr<FileReader> parquet_reader;
+  std::shared_ptr<::arrow::Table> table;
+  ASSERT_OK(
+      FileReader::Make(pool, ParquetFileReader::OpenFile(file, false), 
&parquet_reader));
+  ASSERT_OK(parquet_reader->ReadTable(&table));
+
+  ASSERT_OK_AND_ASSIGN(auto input_file, 
::arrow::io::ReadableFile::Open(expect_file));
+  auto convert_options = ::arrow::csv::ConvertOptions::Defaults();
+  for (int i = 0; i <= 64; ++i) {
+    std::string column_name = "bitwidth" + std::to_string(i);
+    convert_options.column_types[column_name] = ::arrow::int64();
+  }
+  convert_options.column_types["int_value"] = ::arrow::int32();
+  ASSERT_OK_AND_ASSIGN(auto csv_reader,
+                       ::arrow::csv::TableReader::Make(
+                           ::arrow::io::default_io_context(), input_file,
+                           ::arrow::csv::ReadOptions::Defaults(),
+                           ::arrow::csv::ParseOptions::Defaults(), 
convert_options));
+  ASSERT_OK_AND_ASSIGN(auto expect_table, csv_reader->Read());
+
+  ::arrow::AssertTablesEqual(*table, *expect_table);
+}
+#endif
+

Review comment:
       When ARROW_CSV is not defined, can probably skip the test instead of not 
defining it, e.g.:
   ```c++
   TEST(TestArrowReadDeltaEncoding, DeltaBinaryPacked) {
     GTEST_SKIP() << "Test needs CSV reader";
   }
   ```

##########
File path: cpp/src/parquet/CMakeLists.txt
##########
@@ -407,3 +407,7 @@ endif()
 if(ARROW_WITH_ZSTD)
   add_definitions(-DARROW_WITH_ZSTD)
 endif()
+
+if(ARROW_CSV)

Review comment:
       This seems ok to me currently.

##########
File path: cpp/src/arrow/util/bit_stream_utils.h
##########
@@ -418,14 +450,59 @@ inline bool BitReader::GetVlqInt(uint32_t* v) {
 }
 
 inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
-  auto u_v = ::arrow::util::SafeCopy<uint32_t>(v);
-  return PutVlqInt((u_v << 1) ^ (u_v >> 31));
+  uint32_t u_v = ::arrow::util::SafeCopy<uint32_t>(v);
+  u_v = (u_v << 1) ^ static_cast<uint32_t>(v >> 31);
+  return PutVlqInt(u_v);

Review comment:
       Ah, thanks. You're right indeed.

##########
File path: cpp/src/arrow/util/bit_stream_utils.h
##########
@@ -313,7 +337,18 @@ inline int BitReader::GetBatch(int num_bits, T* v, int 
batch_size) {
                            reinterpret_cast<uint32_t*>(v + i), batch_size - i, 
num_bits);
     i += num_unpacked;
     byte_offset += num_unpacked * num_bits / 8;
+  } else if (sizeof(T) == 8 && num_bits > 32) {
+    // Use unpack64 only if num_bits is larger then 32

Review comment:
       I hadn't noticed previously, but why? Is it just a performance issue?

##########
File path: cpp/src/arrow/util/bit_util_test.cc
##########
@@ -1939,24 +1939,61 @@ TEST(BitUtil, RoundUpToPowerOf2) {
 #undef U64
 #undef S64
 
-static void TestZigZag(int32_t v) {
+static void TestZigZag(int32_t v, std::array<uint8_t, 5> buffer_expect) {
   uint8_t buffer[BitUtil::BitReader::kMaxVlqByteLength] = {};
   BitUtil::BitWriter writer(buffer, sizeof(buffer));
   BitUtil::BitReader reader(buffer, sizeof(buffer));
   writer.PutZigZagVlqInt(v);
+  EXPECT_EQ(buffer_expect[0], buffer[0]);
+  EXPECT_EQ(buffer_expect[1], buffer[1]);
+  EXPECT_EQ(buffer_expect[2], buffer[2]);
+  EXPECT_EQ(buffer_expect[3], buffer[3]);
+  EXPECT_EQ(buffer_expect[4], buffer[4]);

Review comment:
       You'll need to include `<gmock/gmock-matchers.h>` for that.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to