hhr293 commented on code in PR #12299:
URL: https://github.com/apache/gluten/pull/12299#discussion_r3412388183


##########
cpp/core/utils/tac/FForCodec.cc:
##########
@@ -57,4 +58,46 @@ FForCodec::decompress(const uint8_t* input, int64_t 
inputSize, uint8_t* output,
   return static_cast<int64_t>(nDecoded);
 }
 
+int64_t FForCodec::maxCompressedLength128(int64_t inputSize) {
+  if (inputSize % sizeof(__int128_t) != 0) {
+    return 0;
+  }
+  size_t numValues = inputSize / sizeof(__int128_t);
+  return static_cast<int64_t>(ffor::compress128Bound(numValues));
+}
+
+arrow::Result<int64_t>
+FForCodec::compress128(const uint8_t* input, int64_t inputSize, uint8_t* 
output, int64_t outputSize) {
+  if (inputSize == 0) {
+    return 0;
+  }
+  if (inputSize % sizeof(__int128_t) != 0) {
+    return arrow::Status::Invalid("FForCodec: input size ", inputSize, " is 
not a multiple of ", sizeof(__int128_t), ".");
+  }
+
+  size_t numValues = inputSize / sizeof(__int128_t);
+  auto maxLen = static_cast<int64_t>(ffor::compress128Bound(numValues));
+  if (outputSize < maxLen) {
+    return arrow::Status::Invalid(
+        "FForCodec: output buffer too small for 128-bit compression (need ",
+        maxLen, " bytes, got ", outputSize, ").");
+  }
+
+  auto written = ffor::compress128(input, numValues, output);
+  return static_cast<int64_t>(written);
+}
+
+arrow::Result<int64_t>
+FForCodec::decompress128(const uint8_t* input, int64_t inputSize, uint8_t* 
output, int64_t outputSize) {
+  if (outputSize == 0) {
+    return 0;
+  }
+  if (outputSize % sizeof(__int128_t) != 0) {
+    return arrow::Status::Invalid("FForCodec: output size ", outputSize, " is 
not a multiple of ", sizeof(__int128_t), ".");
+  }
+
+  auto nDecoded = ffor::decompress128(input, inputSize, output, 
static_cast<size_t>(outputSize));
+  return static_cast<int64_t>(nDecoded);
+}

Review Comment:
   When outputSize == 0, the caller is saying "I expect zero values." Returning 0
     decoded values is the only correct answer regardless of input content.
     Validating the input stream format in this case would be wasted work — 
even if
     the stream is corrupt, producing 0 values into a 0-sized buffer is 
harmless and
     matches caller expectations.
   
     The caller already knows the expected output size from shuffle metadata. 
If the
     stream is actually corrupt, the mismatch will be caught at a higher level 
when
     the actual data (non-zero) fails to decode correctly in subsequent calls.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to