AntoinePrv commented on code in PR #47294:
URL: https://github.com/apache/arrow/pull/47294#discussion_r2355358556


##########
cpp/src/arrow/util/rle_encoding_test.cc:
##########
@@ -602,20 +1038,126 @@ struct GetBatchSpacedTestCase {
   int bit_width;
 };
 
-TEST(RleDecoder, GetBatchSpaced) {
-  uint32_t kSeed = 1337;
-  ::arrow::random::RandomArrayGenerator rand(kSeed);
-
-  std::vector<GetBatchSpacedTestCase<int32_t>> int32_cases{
-      {1, 100000, 0.01, 1}, {1, 100000, 0.1, 1},    {1, 100000, 0.5, 1},
-      {4, 100000, 0.05, 3}, {100, 100000, 0.05, 7},
+template <typename T>
+void DoTestGetBatchSpacedRoundtrip() {
+  using Data = DataTestRleBitPacked<T>;
+  using ArrowType = typename Data::ArrowType;
+  using RandomPart = typename Data::RandomPart;
+  using NullPart = typename Data::NullPart;
+  using RepeatPart = typename Data::RepeatPart;
+
+  std::vector<Data> test_cases = {
+      {
+          {RandomPart{/* max=*/1, /* size=*/400, /* null_proba= */ 0.1}},
+          /* bit_width= */ 1,
+      },
+      {
+          {
+              RandomPart{/* max=*/7, /* size=*/10037, /* null_proba= */ 0.0},
+              NullPart{/* size= */ 1153},
+              RandomPart{/* max=*/7, /* size=*/800, /* null_proba= */ 0.5},
+          },
+          /* bit_width= */ 3,
+      },
+      {
+          {
+              NullPart{/* size= */ 80},
+              RandomPart{/* max=*/static_cast<T>(1023), /* size=*/800,
+                         /* null_proba= */ 0.01},
+              NullPart{/* size= */ 1023},
+          },
+          /* bit_width= */ 11,
+      },
+      {
+          {RepeatPart{/* value=*/13, /* size=*/100000, /* null_proba= */ 
0.01}},
+          /* bit_width= */ 10,
+      },
+      {
+          {
+              NullPart{/* size= */ 1024},
+              RepeatPart{/* value=*/static_cast<T>(10000), /* size=*/100000,
+                         /* null_proba= */ 0.1},
+              NullPart{/* size= */ 77},
+          },
+          /* bit_width= */ 23,
+      },
+      {
+          {
+              RepeatPart{/* value=*/13, /* size=*/100000, /* null_proba= */ 
0.0},
+              NullPart{/* size= */ 1153},
+              RepeatPart{/* value=*/72, /* size=*/100799, /* null_proba= */ 
0.5},
+          },
+          /* bit_width= */ 10,
+      },
+      {
+          {
+              RandomPart{/* max=*/1, /* size=*/1013, /* null_proba= */ 0.01},
+              NullPart{/* size=*/8},
+              RepeatPart{1, /* size= */ 256, /* null_proba= */ 0.1},
+              NullPart{/* size=*/128},
+              RepeatPart{0, /* size= */ 256, /* null_proba= */ 0.0},
+              NullPart{/* size=*/15},
+              RandomPart{/* max=*/1, /* size=*/8 * 1024, /* null_proba= */ 
0.01},
+          },
+          /* bit_width= */ 1,
+      },
   };
-  for (auto case_ : int32_cases) {
-    auto arr = rand.Int32(case_.size, /*min=*/0, case_.max_value, 
case_.null_probability);
-    CheckRoundTripSpaced<Int32Type>(*arr, case_.bit_width);
-    CheckRoundTripSpaced<Int32Type>(*arr->Slice(1), case_.bit_width);
+
+  ::arrow::random::RandomArrayGenerator rand(/* seed= */ 12);
+  // FRAGILE: we create a dictionary large enough so that any encoded value 
from the
+  // previous test cases can be used as an index in the dictionary.
+  // Its size must be increased accordingly if larger values are encoded in 
the test
+  // cases.
+  auto dict = std::static_pointer_cast<arrow::FloatArray>(rand.Float32(20000, 
-1.0, 1.0));
+
+  // Number of bits available in T to write a positive integer.
+  constexpr int kBitsAvailable = 8 * sizeof(T) - (std::is_signed_v<T> ? 1 : 0);

Review Comment:
   No, for a given bit_width, we test with all possible integer type that can 
fit that contain it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to