AntoinePrv commented on code in PR #47294:
URL: https://github.com/apache/arrow/pull/47294#discussion_r2334113286


##########
cpp/src/arrow/util/bit_util_test.cc:
##########
@@ -1997,11 +1997,119 @@ TEST(BitUtil, RoundUpToPowerOf2) {
 #undef U64
 #undef S64
 
+/// Test the maximum number of bytes needed to write a LEB128 of a give size.
+TEST(BitStreamUtil, MaxLEB128ByteLenFor) {
+  EXPECT_EQ(bit_util::MaxLEB128ByteLenFor<int16_t>, 3);
+  EXPECT_EQ(bit_util::MaxLEB128ByteLenFor<int32_t>, 5);
+  EXPECT_EQ(bit_util::MaxLEB128ByteLenFor<int64_t>, 10);
+}
+
+/// Utility function to test LEB128 encoding with known input value and 
expected byte
+/// array
+template <typename Int, std::size_t N>
+void TestLEB128Encode(Int input_value, std::array<uint8_t, N> const& 
expected_data,
+                      int32_t expected_bytes_written) {
+  std::array<uint8_t, N> buffer{};
+  auto bytes_written = bit_util::WriteLEB128(input_value, buffer.data(),
+                                             
static_cast<int32_t>(buffer.size()));
+  EXPECT_EQ(bytes_written, expected_bytes_written);
+  if (bytes_written > 0) {
+    EXPECT_EQ(buffer, expected_data);
+  }
+}
+
+/// Test encoding to known LEB128 byte sequences
+TEST(WriteLEB128Test, KnownArrayValues) {
+  // Single byte value 0
+  TestLEB128Encode(0U, std::array<uint8_t, 1>{0x00}, 1);
+  // Single byte value 127
+  TestLEB128Encode(127U, std::array<uint8_t, 1>{0x7F}, 1);
+  // Two byte value 128
+  TestLEB128Encode(128U, std::array<uint8_t, 2>{0x80, 0x01}, 2);
+  // Two byte value 300
+  TestLEB128Encode(300U, std::array<uint8_t, 2>{0xAC, 0x02}, 2);
+  // Three byte value 16384
+  TestLEB128Encode(16384U, std::array<uint8_t, 3>{0x80, 0x80, 0x01}, 3);
+  // Four byte value 268435455
+  TestLEB128Encode(268435455U, std::array<uint8_t, 4>{0xFF, 0xFF, 0xFF, 0x7F}, 
4);
+  // Five byte uint32_t max value
+  TestLEB128Encode(4294967295U, std::array<uint8_t, 5>{0xFF, 0xFF, 0xFF, 0xFF, 
0x0F}, 5);
+  // uint64_t value requiring 10 bytes
+  TestLEB128Encode(
+      18446744073709551615ULL,
+      std::array<uint8_t, 10>{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
0xFF, 0x01},
+      10);
+  // Edge case: Exact buffer size match
+  TestLEB128Encode(16384U, std::array<uint8_t, 3>{0x80, 0x80, 0x01}, 3);
+  // Various single byte values
+  TestLEB128Encode(1U, std::array<uint8_t, 1>{0x01}, 1);
+  TestLEB128Encode(63U, std::array<uint8_t, 1>{0x3F}, 1);
+  TestLEB128Encode(64U, std::array<uint8_t, 1>{0x40}, 1);
+  // Two byte boundary values
+  TestLEB128Encode(129U, std::array<uint8_t, 2>{0x81, 0x01}, 2);
+  TestLEB128Encode(16383U, std::array<uint8_t, 2>{0xFF, 0x7F}, 2);
+  // Error case: Buffer too small for value 128 (needs 2 bytes but only 1 
provided)
+  TestLEB128Encode(128U, std::array<uint8_t, 1>{}, 0);
+  // Error case: Buffer too small for uint32_t max (needs 5 bytes but only 4 
provided)
+  TestLEB128Encode(4294967295U, std::array<uint8_t, 4>{}, 0);
+  // Error case: Zero buffer size
+  TestLEB128Encode(52U, std::array<uint8_t, 0>{}, 0);
+}
+
+/// Utility function to test LEB128 decoding with known byte array and 
expected result
+template <typename Int, std::size_t N>
+void TestLEB128Decode(std::array<uint8_t, N> const& data, Int expected_value,
+                      int32_t expected_bytes_read) {
+  Int result = 0;
+  auto bytes_read = bit_util::ParseLeadingLEB128(
+      data.data(), static_cast<int32_t>(data.size()), &result);
+  EXPECT_EQ(bytes_read, expected_bytes_read);
+  if (expected_bytes_read > 0) {
+    EXPECT_EQ(result, expected_value);
+  }
+}
+
+/// Test decoding from known LEB128 byte sequences
+TEST(BitStreamUtil, LEB128) {
+  // Single byte value 0
+  TestLEB128Decode(std::array<uint8_t, 1>{0x00}, 0U, 1);
+  // Single byte value 127
+  TestLEB128Decode(std::array<uint8_t, 1>{0x7F}, 127U, 1);
+  // Two byte value 128
+  TestLEB128Decode(std::array<uint8_t, 2>{0x80, 0x01}, 128U, 2);
+  // Two byte value 300
+  TestLEB128Decode(std::array<uint8_t, 2>{0xAC, 0x02}, 300U, 2);
+  // Three byte value 16384
+  TestLEB128Decode(std::array<uint8_t, 3>{0x80, 0x80, 0x01}, 16384U, 3);
+  // Three byte value 16384, with remaining data
+  TestLEB128Decode(std::array<uint8_t, 5>{0x80, 0x80, 0x01, 0x80, 0x00}, 
16384U, 3);
+  // Four byte value 268435455
+  TestLEB128Decode(std::array<uint8_t, 4>{0xFF, 0xFF, 0xFF, 0x7F}, 268435455U, 
4);
+  // Five byte uint32_t max value
+  TestLEB128Decode(std::array<uint8_t, 5>{0xFF, 0xFF, 0xFF, 0xFF, 0x0F}, 
4294967295U, 5);
+  // uint64_t value requiring 10 bytes
+  TestLEB128Decode(
+      std::array<uint8_t, 10>{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
0xFF, 0x01},
+      18446744073709551615ULL, 10);
+  // int32_t with maximum size (31 bits of 1)
+  TestLEB128Decode(std::array<uint8_t, 5>{0xFF, 0xFF, 0xFF, 0xFF, 0x7},
+                   std::numeric_limits<int32_t>::max(), 5);
+
+  // Error case: Truncated sequence (continuation bit set but no more data)
+  TestLEB128Decode(std::array<uint8_t, 1>{0x80}, 0U, 0);
+  // Error case: Input over the maximum number of bytes for a int32_t (5), but 
the
+  // overflow none the less (7 * 5 = 35 bits of data).
+  TestLEB128Decode(std::array<uint8_t, 5>{0xFF, 0xFF, 0xFF, 0xFF, 0x7F}, 
int32_t{}, 0);

Review Comment:
   These tests are already here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to