chenhao-db commented on code in PR #45989:
URL: https://github.com/apache/spark/pull/45989#discussion_r1565141311
##########
common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java:
##########
@@ -100,6 +105,272 @@ private Variant result() {
return new Variant(Arrays.copyOfRange(writeBuffer, 0, writePos), metadata);
}
+ public void appendString(String str) {
+ byte[] text = str.getBytes(StandardCharsets.UTF_8);
+ boolean longStr = text.length > MAX_SHORT_STR_SIZE;
+ checkCapacity((longStr ? 1 + U32_SIZE : 1) + text.length);
+ if (longStr) {
+ writeBuffer[writePos++] = primitiveHeader(LONG_STR);
+ writeLong(writeBuffer, writePos, text.length, U32_SIZE);
+ writePos += U32_SIZE;
+ } else {
+ writeBuffer[writePos++] = shortStrHeader(text.length);
+ }
+ System.arraycopy(text, 0, writeBuffer, writePos, text.length);
+ writePos += text.length;
+ }
+
+ public void appendNull() {
+ checkCapacity(1);
+ writeBuffer[writePos++] = primitiveHeader(NULL);
+ }
+
+ public void appendBoolean(boolean b) {
+ checkCapacity(1);
+ writeBuffer[writePos++] = primitiveHeader(b ? TRUE : FALSE);
+ }
+
+ // Append a long value to the variant builder. The actual used integer type
depends on the value
+ // range of the long value.
+ public void appendLong(long l) {
+ checkCapacity(1 + 8);
+ if (l == (byte) l) {
+ writeBuffer[writePos++] = primitiveHeader(INT1);
+ writeLong(writeBuffer, writePos, l, 1);
+ writePos += 1;
+ } else if (l == (short) l) {
+ writeBuffer[writePos++] = primitiveHeader(INT2);
+ writeLong(writeBuffer, writePos, l, 2);
+ writePos += 2;
+ } else if (l == (int) l) {
+ writeBuffer[writePos++] = primitiveHeader(INT4);
+ writeLong(writeBuffer, writePos, l, 4);
+ writePos += 4;
+ } else {
+ writeBuffer[writePos++] = primitiveHeader(INT8);
+ writeLong(writeBuffer, writePos, l, 8);
+ writePos += 8;
+ }
+ }
+
+ public void appendDouble(double d) {
+ checkCapacity(1 + 8);
+ writeBuffer[writePos++] = primitiveHeader(DOUBLE);
+ writeLong(writeBuffer, writePos, Double.doubleToLongBits(d), 8);
+ writePos += 8;
+ }
+
+ // Append a decimal value to the variant builder. The caller should
guarantee that its precision
+ // and scale fit into `MAX_DECIMAL16_PRECISION`.
+ public void appendDecimal(BigDecimal d) {
+ checkCapacity(2 + 16);
+ BigInteger unscaled = d.unscaledValue();
+ if (d.scale() <= MAX_DECIMAL4_PRECISION && d.precision() <=
MAX_DECIMAL4_PRECISION) {
+ writeBuffer[writePos++] = primitiveHeader(DECIMAL4);
+ writeBuffer[writePos++] = (byte) d.scale();
+ writeLong(writeBuffer, writePos, unscaled.intValueExact(), 4);
+ writePos += 4;
+ } else if (d.scale() <= MAX_DECIMAL8_PRECISION && d.precision() <=
MAX_DECIMAL8_PRECISION) {
+ writeBuffer[writePos++] = primitiveHeader(DECIMAL8);
+ writeBuffer[writePos++] = (byte) d.scale();
+ writeLong(writeBuffer, writePos, unscaled.longValueExact(), 8);
+ writePos += 8;
+ } else {
+ assert d.scale() <= MAX_DECIMAL16_PRECISION && d.precision() <=
MAX_DECIMAL16_PRECISION;
+ writeBuffer[writePos++] = primitiveHeader(DECIMAL16);
+ writeBuffer[writePos++] = (byte) d.scale();
+ // `toByteArray` returns a big-endian representation. We need to copy it
reversely and sign
+ // extend it to 16 bytes.
+ byte[] bytes = unscaled.toByteArray();
+ for (int i = 0; i < bytes.length; ++i) {
+ writeBuffer[writePos + i] = bytes[bytes.length - 1 - i];
+ }
+ byte sign = (byte) (bytes[0] < 0 ? -1 : 0);
+ for (int i = bytes.length; i < 16; ++i) {
+ writeBuffer[writePos + i] = sign;
+ }
+ writePos += 16;
+ }
+ }
+
+ public void appendDate(int daysSinceEpoch) {
Review Comment:
We do need it. Suppose there is a date value and an integer value with the
same underlying physical integer value, and we want to cast them into the
variant type, the resulting variant bytes must be different so that we can know
their original data types when we read them out. This can affect the result of
subsequent operations, e.g., the `to_json` representations will be different
(date vs int format). This is also why we have datetime types as dedicate
scalar types in the variant spec.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]