[GitHub] [orc] dongjoon-hyun commented on a change in pull request #568: ORC-672: FIX ORC type conversion within arrays where array.length > 1…

GitBox Mon, 09 Nov 2020 09:42:45 -0800


dongjoon-hyun commented on a change in pull request #568:
URL: https://github.com/apache/orc/pull/568#discussion_r519998678




##########
File path: 
java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
##########
@@ -32,29 +43,96 @@
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
-import org.apache.orc.TestVectorOrcFile;
+import org.apache.orc.TestProlepticConversions;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TestName;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 public class TestConvertTreeReaderFactory {
 
-  @Test
-  public void testArraySizeBiggerThan1024AndConvertToDecimal() throws 
Exception {
-    Decimal64ColumnVector columnVector = 
testArraySizeBiggerThan1024("decimal(6,1)", Decimal64ColumnVector.class);
-    assertEquals(columnVector.vector.length, 1025);
+  private Path workDir =
+      new Path(System.getProperty("test.tmp.dir", "target" + File.separator + 
"test" + File.separator + "tmp"));
+
+  private Configuration conf;
+  private FileSystem fs;
+  private Path testFilePath;
+  private int LARGE_BATCH_SIZE;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void setupPath() throws Exception {
+    // Default CV length is 1024
+    this.LARGE_BATCH_SIZE = 1030;
+    this.conf = new Configuration();
+    this.fs = FileSystem.getLocal(conf);
+    this.testFilePath = new Path(workDir, TestWriterImpl.class.getSimpleName() 
+ testCaseName.getMethodName().
+        replaceFirst("\\[[0-9]+]", "") + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  public <TExpectedColumnVector extends ColumnVector> TExpectedColumnVector 
createORCFileWithLargeArray(
+      TypeDescription schema, Class<TExpectedColumnVector> expectedColumnType, 
boolean useDecimal64)
+      throws IOException, ParseException {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    Writer w = OrcFile.createWriter(testFilePath, 
OrcFile.writerOptions(conf).setSchema(schema));
+
+    SimpleDateFormat dateFormat = 
TestProlepticConversions.createParser("yyyy-MM-dd", new GregorianCalendar());
+    VectorizedRowBatch batch = schema.createRowBatch(
+        useDecimal64 ? TypeDescription.RowBatchVersion.USE_DECIMAL64 : 
TypeDescription.RowBatchVersion.ORIGINAL,
+        LARGE_BATCH_SIZE);
+
+    ListColumnVector listCol = (ListColumnVector) batch.cols[0];
+    TExpectedColumnVector dcv = (TExpectedColumnVector) (listCol).child;
+    batch.size = 1;
+    for (int row = 0; row < LARGE_BATCH_SIZE; ++row) {
+      if (dcv instanceof DecimalColumnVector) {
+        ((DecimalColumnVector) dcv).set(row, HiveDecimal.create(row * 2 + 1));
+      } else if (dcv instanceof DoubleColumnVector) {
+        ((DoubleColumnVector) dcv).vector[row] = row * 2 + 1;
+      } else if (dcv instanceof BytesColumnVector) {
+        ((BytesColumnVector) dcv).setVal(row, ((row * 2 + 1) + 
"").getBytes(StandardCharsets.UTF_8));
+      } else if (dcv instanceof LongColumnVector) {
+        ((LongColumnVector) dcv).vector[row] = row * 2 + 1;
+      } else if (dcv instanceof TimestampColumnVector) {
+        ((TimestampColumnVector) dcv).set(row, Timestamp.valueOf((1900 + row) 
+ "-04-01 12:34:56.9"));
+      } else if (dcv instanceof DateColumnVector) {
+        String date = String.format("%04d-01-23", row * 2 + 1);
+        ((DateColumnVector) dcv).vector[row] = 
TimeUnit.MILLISECONDS.toDays(dateFormat.parse(date).getTime());
+      } else {
+        throw new IllegalStateException("Writing File with a large array of: 
"+ expectedColumnType + " not supported!");

Review comment:
       `of:` -> `of` 
   `not supported!` -> `is not supported!`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [orc] dongjoon-hyun commented on a change in pull request #568: ORC-672: FIX ORC type conversion within arrays where array.length > 1…

Reply via email to