Copilot commented on code in PR #6527:
URL: https://github.com/apache/hive/pull/6527#discussion_r3367079997


##########
ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java:
##########
@@ -344,14 +366,180 @@ protected static void writeData(ParquetWriter<Group> 
writer, boolean isDictionar
   }
 
   protected static void initialVectorizedRowBatchCtx(Configuration conf) 
throws HiveException {
+    initialVectorizedRowBatchCtx(conf, null);
+  }
+
+  protected static void initialVectorizedRowBatchCtx(Configuration conf,
+      DataTypePhysicalVariation[] rowDataTypePhysicalVariations) throws 
HiveException {
     MapWork mapWork = new MapWork();
     VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
     rbCtx.init(createStructObjectInspector(conf), new String[0]);
+    if (rowDataTypePhysicalVariations != null) {
+      rbCtx.setRowDataTypePhysicalVariations(rowDataTypePhysicalVariations);
+    }
     mapWork.setVectorMode(true);
     mapWork.setVectorizedRowBatchCtx(rbCtx);
     Utilities.setMapWork(conf, mapWork);
   }
 
+  /**
+   * Verifies the Decimal64 read path: when the decimal column is tagged 
DECIMAL_64 (as the
+   * vectorizer does once {@code MapredParquetInputFormat} advertises it), the 
reader must fill a
+   * {@link Decimal64ColumnVector} (long-backed) with the correct unscaled 
values.
+   */
+  protected void decimal64Read(boolean isDictionaryEncoding) throws Exception {
+    Configuration readerConf = new Configuration();
+    readerConf.set(IOConstants.COLUMNS, "value");
+    readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(5,2)");
+    readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+    readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+    VectorizedParquetRecordReader reader = createTestParquetReader(
+        "message hive_schema { required value (DECIMAL(5,2));}", readerConf,
+        new DataTypePhysicalVariation[] { DataTypePhysicalVariation.DECIMAL_64 
});
+    VectorizedRowBatch previous = reader.createValue();
+    try {
+      int c = 0;
+      while (reader.next(NullWritable.get(), previous)) {
+        assertTrue("expected Decimal64ColumnVector but got " + 
previous.cols[0].getClass().getSimpleName(),
+            previous.cols[0] instanceof Decimal64ColumnVector);
+        Decimal64ColumnVector vector = (Decimal64ColumnVector) 
previous.cols[0];
+        assertTrue(vector.noNulls);
+        assertEquals((short) 5, vector.precision);
+        assertEquals((short) 2, vector.scale);
+        for (int i = 0; i < vector.vector.length; i++) {
+          if (c == nElements) {
+            break;
+          }
+          long expected =
+              new HiveDecimalWritable(getDecimal(isDictionaryEncoding, 
c).setScale(2)).serialize64(2);
+          assertEquals("Check failed at pos " + c, expected, vector.vector[i]);
+          assertFalse(vector.isNull[i]);
+          c++;
+        }
+      }
+      assertEquals(nElements, c);
+    } finally {
+      reader.close();
+    }
+  }
+
+  // Unscaled values (scale=2) used by the INT32/INT64-backed Decimal64 tests. 
Index 3 is null.
+  // 1001 -> 10.01, 1234 -> 12.34, -550 -> -5.50, 0 -> 0.00. These would all 
decode to a WRONG
+  // value (truncated integer part, e.g. 1001 -> 10) on the buggy 
readInteger()/readLong() path,
+  // and to the correct unscaled long on the fixed readDecimal()/serialize64 
path. For scale 2,
+  // serialize64(2) of any of these equals the literal long itself (e.g. 
1001), so the asserted

Review Comment:
   The comment describing DECIMAL64_UNSCALED is internally inconsistent: it 
says index 3 is null but still lists `0 -> 0.00` as a decoded value. Since 
`DECIMAL64_NULL_INDEX` rows are written as null, clarify that the array element 
at the null index is ignored (or adjust the example values/null index to match).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to