cloud-fan commented on code in PR #45575:
URL: https://github.com/apache/spark/pull/45575#discussion_r1537123309


##########
common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java:
##########
@@ -141,4 +148,295 @@ public static byte arrayHeader(boolean largeSize, int 
offsetSize) {
     return (byte) (((largeSize ? 1 : 0) << (BASIC_TYPE_BITS + 2)) |
         ((offsetSize - 1) << BASIC_TYPE_BITS) | ARRAY);
   }
+
+  // An exception indicating that the variant value or metadata doesn't
+  static SparkRuntimeException malformedVariant() {
+    return new SparkRuntimeException("MALFORMED_VARIANT",
+        Map$.MODULE$.<String, String>empty(), null, new QueryContext[]{}, "");
+  }
+
+  // An exception indicating that an external caller tried to call the Variant 
constructor with
+  // value or metadata exceeding the 16MiB size limit. We will never construct 
a Variant this large,
+  // so it should only be possible to encounter this exception when reading a 
Variant produced by
+  // another tool.
+  static SparkRuntimeException variantConstructorSizeLimit() {
+    return new SparkRuntimeException("VARIANT_CONSTRUCTOR_SIZE_LIMIT",
+        Map$.MODULE$.<String, String>empty(), null, new QueryContext[]{}, "");
+  }
+
+  // Check the validity of an array index `pos`. Throw `MALFORMED_VARIANT` if 
it is out of bound,
+  // meaning that the variant is malformed.
+  static void checkIndex(int pos, int length) {
+    if (pos < 0 || pos >= length) throw malformedVariant();
+  }
+
+  // Read a little-endian signed long value from `bytes[pos, pos + numBytes)`.
+  static long readLong(byte[] bytes, int pos, int numBytes) {
+    checkIndex(pos, bytes.length);
+    checkIndex(pos + numBytes - 1, bytes.length);
+    long result = 0;
+    // All bytes except the most significant byte should be unsign-extended 
and shifted (so we need
+    // `& 0xFF`). The most significant byte should be sign-extended and is 
handled after the loop.
+    for (int i = 0; i < numBytes - 1; ++i) {
+      long unsignedByteValue = bytes[pos + i] & 0xFF;
+      result |= unsignedByteValue << (8 * i);
+    }
+    long signedByteValue = bytes[pos + numBytes - 1];
+    result |= signedByteValue << (8 * (numBytes - 1));
+    return result;
+  }
+
+  // Read a little-endian unsigned int value from `bytes[pos, pos + 
numBytes)`. The value must fit
+  // into a non-negative int (`[0, Integer.MAX_VALUE]`).
+  static int readUnsigned(byte[] bytes, int pos, int numBytes) {
+    checkIndex(pos, bytes.length);
+    checkIndex(pos + numBytes - 1, bytes.length);
+    int result = 0;
+    // Similar to the `readLong` loop, but all bytes should be unsign-extended.
+    for (int i = 0; i < numBytes; ++i) {
+      int unsignedByteValue = bytes[pos + i] & 0xFF;
+      result |= unsignedByteValue << (8 * i);
+    }
+    if (result < 0) throw malformedVariant();
+    return result;
+  }
+
+  // The value type of variant value. It is determined by the header byte but 
not a 1:1 mapping
+  // (for example, INT1/2/4/8 all maps to `Type.LONG`).
+  public enum Type {
+    OBJECT,
+    ARRAY,
+    NULL,
+    BOOLEAN,
+    LONG,
+    STRING,
+    DOUBLE,
+    DECIMAL,
+  }
+
+  // Get the value type of variant value `value[pos...]`. It is only legal to 
call `get*` if
+  // `getType` returns this type (for example, it is only legal to call 
`getLong` if `getType`
+  // returns `Type.Long`).
+  // Throw `MALFORMED_VARIANT` if the variant is malformed.
+  public static Type getType(byte[] value, int pos) {
+    checkIndex(pos, value.length);
+    int basicType = value[pos] & BASIC_TYPE_MASK;
+    int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK;
+    switch (basicType) {
+      case SHORT_STR:
+        return Type.STRING;
+      case OBJECT:
+        return Type.OBJECT;
+      case ARRAY:
+        return Type.ARRAY;
+      default:
+        switch (typeInfo) {
+          case NULL:
+            return Type.NULL;
+          case TRUE:
+          case FALSE:
+            return Type.BOOLEAN;
+          case INT1:
+          case INT2:
+          case INT4:
+          case INT8:
+            return Type.LONG;
+          case DOUBLE:
+            return Type.DOUBLE;
+          case DECIMAL4:
+          case DECIMAL8:
+          case DECIMAL16:
+            return Type.DECIMAL;
+          case LONG_STR:
+            return Type.STRING;
+          default:
+            throw malformedVariant();
+        }
+    }
+  }
+
+  static IllegalStateException unexpectedType(Type type) {
+    return new IllegalStateException("Expect type to be " + type);

Review Comment:
   We can address this minor issue in the next VARIANT PR.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to