difin commented on code in PR #6200:
URL: https://github.com/apache/hive/pull/6200#discussion_r2550015530
##########
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java:
##########
@@ -18,25 +18,112 @@
package org.apache.hadoop.hive.ql.io.parquet.vector;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
/**
- * A dummy vectorized parquet reader to support schema evolution.
+ * A dummy vectorized parquet reader used for schema evolution.
+ * If a default value is provided, it returns that value for the entire batch.
+ * Otherwise, it returns nulls.
*/
public class VectorizedDummyColumnReader extends BaseVectorizedColumnReader {
- public VectorizedDummyColumnReader() {
+ private final Object defaultValue;
+
+ public VectorizedDummyColumnReader(Object defaultValue) {
super();
+ this.defaultValue = defaultValue;
}
@Override
- public void readBatch(int total, ColumnVector column, TypeInfo columnType)
throws IOException {
- Arrays.fill(column.isNull, true);
- column.isRepeating = true;
- column.noNulls = false;
+ public void readBatch(int total, ColumnVector col, TypeInfo typeInfo) throws
IOException {
+
+ // Case 1: No default → (all nulls)
+ if (defaultValue == null) {
+ Arrays.fill(col.isNull, true);
+ col.noNulls = false;
+ col.isRepeating = true;
+ return;
+ }
+
+ // Case 2: We have a default → fill with constant value
+ col.isRepeating = true;
+ col.noNulls = true;
+ col.isNull[0] = false;
+
+ if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+ fillPrimitive(col, (PrimitiveTypeInfo) typeInfo, defaultValue);
+ } else {
+ throw new IOException("Unsupported type category in DummyColumnReader: "
+ typeInfo.getCategory());
+ }
+ }
+
+ /* -------------------------
+ Primitive/leaf-type filler
+ ------------------------- */
+ private void fillPrimitive(ColumnVector col, PrimitiveTypeInfo ti, Object
value) throws IOException {
+
+ switch (ti.getPrimitiveCategory()) {
+
+ case BOOLEAN:
+ ((LongColumnVector) col).vector[0] = ((Boolean) value) ? 1 : 0;
+ return;
+
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ ((LongColumnVector) col).vector[0] = ((Number) value).longValue();
+ return;
+
+ case FLOAT:
+ case DOUBLE:
+ ((DoubleColumnVector) col).vector[0] = ((Number) value).doubleValue();
+ return;
+
+ case STRING:
+ case VARCHAR:
+ case CHAR:
+ byte[] bytes = value.toString().getBytes(StandardCharsets.UTF_8);
+ ((BytesColumnVector) col).setRef(0, bytes, 0, bytes.length);
+ return;
+
+ case DECIMAL:
+ DecimalColumnVector dcv = (DecimalColumnVector) col;
+ dcv.set(0, HiveDecimal.create(value.toString()));
+ return;
+
+ case TIMESTAMP: {
+ TimestampColumnVector tcv = (TimestampColumnVector) col;
+
+ long micros = (Long) value;
+ long seconds = micros / 1_000_000L;
+ long nanos = (micros % 1_000_000L) * 1000L;
+ tcv.time[0] = seconds * 1000L;
+ tcv.nanos[0] = (int) nanos;
+
+ return;
+ }
+
+ case DATE: {
+ LongColumnVector lcv = (LongColumnVector) col;
+ lcv.vector[0] = ((Number) value).intValue();
+ return;
+ }
Review Comment:
The indentation of the `case` blocks is off here, missing 2 spaces.
It's not consistent, the identation of the `default` block is good, but not
the `case` blocks.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]