Copilot commented on code in PR #18436:
URL: https://github.com/apache/pinot/pull/18436#discussion_r3200700621


##########
pinot-plugins/pinot-input-format/pinot-thrift/src/main/java/org/apache/pinot/plugin/inputformat/thrift/ThriftRecordExtractor.java:
##########
@@ -63,50 +66,97 @@ public GenericRow extract(TBase from, GenericRow to) {
     if (_extractAll) {
       for (Map.Entry<String, Integer> nameToId : _fieldIds.entrySet()) {
         Object value = 
from.getFieldValue(from.fieldForId(nameToId.getValue()));
-        if (value != null) {
-          value = convert(value);
-        }
-        to.putValue(nameToId.getKey(), value);
+        to.putValue(nameToId.getKey(), value != null ? convert(value) : null);
       }
     } else {
       for (String fieldName : _fields) {
         Integer fieldId = _fieldIds.get(fieldName);
         if (fieldId != null) {
           Object value = from.getFieldValue(from.fieldForId(fieldId));
-          if (value != null) {
-            value = convert(value);
-          }
-          to.putValue(fieldName, value);
+          to.putValue(fieldName, value != null ? convert(value) : null);
         }
       }
     }
     return to;
   }
 
-  /**
-   * Returns whether the object is a Thrift object.
-   */
-  @Override
-  protected boolean isRecord(Object value) {
-    return value instanceof TBase;
+  /// Dispatches a non-null Thrift value off its runtime Java type: `Object[]` 
for `Collection` (list / set),
+  /// `Map<String, Object>` for `Map` and for nested `TBase` records, 
single-value normalization for scalars.
+  private static Object convert(Object value) {
+    // List
+    if (value instanceof Collection) {
+      return convertCollection((Collection<Object>) value);
+    }
+    // Map
+    if (value instanceof Map) {
+      return convertMap((Map<Object, Object>) value);
+    }
+    // Record
+    if (value instanceof TBase) {
+      return convertRecord((TBase) value);
+    }
+    // Single value
+    return convertSingleValue(value);
   }
 
-  /**
-   * Handles the conversion of each field of a Thrift object.
-   *
-   * @param value should be verified to be a Thrift TBase type prior to 
calling this method as it will be casted
-   *              without checking
-   */
-  @Override
-  protected Map<String, Object> convertRecord(Object value) {
-    TBase record = (TBase) value;
+  private static Object[] convertCollection(Collection<Object> collection) {
+    Object[] result = new Object[collection.size()];
+    int i = 0;
+    for (Object value : collection) {
+      result[i++] = value != null ? convert(value) : null;
+    }
+    return result;
+  }
+
+  /// Converts a `map<K, V>`. Keys flow through `convertSingleValue` (Thrift's 
allowed key types — bool /
+  /// i8 / i16 / i32 / i64 / double / string / binary — cover the same 
matrix), then are stringified via
+  /// [BaseRecordExtractor#stringifyMapKey] per the `Map<String, Object>` 
contract.
+  private static Map<String, Object> convertMap(Map<Object, Object> map) {
+    Map<String, Object> result = Maps.newHashMapWithExpectedSize(map.size());
+    for (Map.Entry<Object, Object> entry : map.entrySet()) {
+      Object key = entry.getKey();
+      if (key == null) {
+        continue;
+      }
+      Object convertedKey = convertSingleValue(key);
+      if (convertedKey == null) {
+        continue;
+      }
+      Object value = entry.getValue();
+      result.put(stringifyMapKey(convertedKey), value != null ? convert(value) 
: null);
+    }
+    return result;
+  }
+
+  private static Map<String, Object> convertRecord(TBase record) {
     Set<TFieldIdEnum> fields = 
FieldMetaData.getStructMetaDataMap(record.getClass()).keySet();
-    Map<String, Object> convertedRecord = 
Maps.newHashMapWithExpectedSize(fields.size());
+    Map<String, Object> result = 
Maps.newHashMapWithExpectedSize(fields.size());
     for (TFieldIdEnum field : fields) {
-      Object fieldValue = record.getFieldValue(field);
-      Object convertedValue = fieldValue != null ? convert(fieldValue) : null;
-      convertedRecord.put(field.getFieldName(), convertedValue);
+      Object value = record.getFieldValue(field);
+      result.put(field.getFieldName(), value != null ? convert(value) : null);
+    }
+    return result;
+  }
+
+  /// Single-value normalization for Thrift's scalar Java types: `Byte` / 
`Short` widen to `Integer`,
+  /// `ByteBuffer` materializes to `byte[]` (slice-safely so the source 
buffer's position is not advanced),
+  /// other `Number` / `Boolean` / `byte[]` pass through, anything else (e.g. 
`TEnum`) → `toString()`.
+  private static Object convertSingleValue(Object value) {
+    if (value instanceof Number) {
+      if (value instanceof Byte || value instanceof Short) {
+        return ((Number) value).intValue();
+      }
+      return value;
+    }
+    if (value instanceof Boolean || value instanceof byte[]) {
+      return value;
+    }
+    if (value instanceof ByteBuffer) {
+      ByteBuffer slice = ((ByteBuffer) value).slice();
+      byte[] bytes = new byte[slice.limit()];
+      slice.get(bytes);
+      return bytes;

Review Comment:
   The ByteBuffer→byte[] conversion path is now implemented inside 
ThriftRecordExtractor (instead of being covered by BaseRecordExtractorTest 
previously), but there is no Thrift unit test exercising binary fields / 
ByteBuffer position-safety. Please add a test that feeds a ByteBuffer 
(including a non-zero position) through the extractor and asserts it returns 
the expected byte[] without advancing the original buffer position.



##########
pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogRecordExtractor.java:
##########
@@ -165,6 +151,47 @@ public GenericRow extract(Map<String, Object> from, 
GenericRow to) {
     return to;
   }
 
+  /// Walks a non-null Jackson-parsed value and produces the contract shape: 
`BigDecimal` for `BigInteger`
+  /// (oversized ints), `Object[]` for JSON arrays, `Map<String, Object>` for 
JSON objects, pass-through for
+  /// the other Jackson scalar types.
+  private static Object convert(Object value) {
+    // BigInteger widens (Pinot has no BigInteger type)
+    if (value instanceof BigInteger) {
+      return new BigDecimal((BigInteger) value);

Review Comment:
   The new JSON-like conversion helper (BigInteger→BigDecimal, List→Object[], 
nested Map recursion) isn’t exercised by the existing CLPLogRecordExtractorTest 
(which currently only feeds String values). Consider adding a small unit test 
that passes representative non-String values through an unencoded field and 
asserts the contract-shaped output, so future changes don’t silently break 
non-CLP fields.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to