This is an automated email from the ASF dual-hosted git repository.

abti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 493064b75 [GOBBLIN-1690] Added logging to ORC writer
493064b75 is described below

commit 493064b751da99d031ef79ced765c3bd61b719b5
Author: Ratandeep <[email protected]>
AuthorDate: Thu Aug 25 01:07:50 2022 -0400

    [GOBBLIN-1690] Added logging to ORC writer
    
    Closes #3543 from rdsr/master
---
 .../org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java  | 8 ++++++--
 .../main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java | 5 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git 
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
 
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
index 32b63b464..e764a14db 100644
--- 
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
+++ 
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
@@ -95,6 +95,7 @@ public class GenericRecordToOrcValueWriter implements 
OrcValueWriter<GenericReco
     this(typeDescription, avroSchema);
     this.enabledSmartSizing = 
state.getPropAsBoolean(ENABLE_SMART_ARRAY_ENLARGE, 
DEFAULT_ENABLE_SMART_ARRAY_ENLARGE);
     this.enlargeFactor = state.getPropAsInt(ENLARGE_FACTOR_KEY, 
DEFAULT_ENLARGE_FACTOR);
+    log.info("enabledSmartSizing: {}, enlargeFactor: {}", enabledSmartSizing, 
enlargeFactor);
   }
 
   @Override
@@ -302,6 +303,7 @@ public class GenericRecordToOrcValueWriter implements 
OrcValueWriter<GenericReco
       // If seeing child array being saturated, will need to expand with a 
reasonable amount.
       if (cv.childCount > cv.child.isNull.length) {
         int resizedLength = resize(rowsAdded, cv.isNull.length, cv.childCount);
+        log.info("Column vector: {}, resizing to: {}, child count: {}", 
cv.child, resizedLength, cv.childCount);
         cv.child.ensureSize(resizedLength, true);
       }
 
@@ -344,7 +346,9 @@ public class GenericRecordToOrcValueWriter implements 
OrcValueWriter<GenericReco
       // make sure the child is big enough
       if (cv.childCount > cv.keys.isNull.length) {
         int resizedLength = resize(rowsAdded, cv.isNull.length, cv.childCount);
+        log.info("Column vector: {}, resizing to: {}, child count: {}", 
cv.keys, resizedLength, cv.childCount);
         cv.keys.ensureSize(resizedLength, true);
+        log.info("Column vector: {}, resizing to: {}, child count: {}", 
cv.values, resizedLength, cv.childCount);
         cv.values.ensureSize(resizedLength, true);
       }
       // Add each element
@@ -377,10 +381,10 @@ public class GenericRecordToOrcValueWriter implements 
OrcValueWriter<GenericReco
    * If there's further resize requested, it will add delta again to be 
conservative, but chances of adding delta
    * for multiple times should be low, unless the container size is 
fluctuating too much.
    */
-  private int resize(int rowsAdded, int batchSize, int currentSize) {
+  private int resize(int rowsAdded, int batchSize, int requestedSize) {
     resizeCount += 1;
     log.info(String.format("It has been resized %s times in current writer", 
resizeCount));
-    return enabledSmartSizing ? currentSize + (currentSize / rowsAdded + 1) * 
batchSize : enlargeFactor * currentSize;
+    return enabledSmartSizing ? requestedSize + (requestedSize / rowsAdded + 
1) * batchSize : enlargeFactor * requestedSize;
   }
 
   private Converter buildConverter(TypeDescription schema, Schema avroSchema) {
diff --git 
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
 
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
index 62b6ff8ca..b6400d59b 100644
--- 
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
+++ 
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
@@ -159,8 +159,9 @@ public abstract class GobblinBaseOrcWriter<S, D> extends 
FsDataWriter<D> {
     this.rowBatch = typeDescription.createRowBatch(this.batchSize);
     this.deepCleanBatch = 
properties.getPropAsBoolean(ORC_WRITER_DEEP_CLEAN_EVERY_BATCH, false);
 
-    log.info("Start to construct a ORC-Native Writer, with batchSize:" + 
batchSize + ", enable batchDeepClean:"
-        + deepCleanBatch + "\n, schema in input format:" + this.inputSchema);
+    log.info("Created ORC writer, batch size: {}, {}: {}",
+            batchSize, OrcConf.ROWS_BETWEEN_CHECKS.name(), 
properties.getProp(OrcConf.ROWS_BETWEEN_CHECKS.name(),
+                    OrcConf.ROWS_BETWEEN_CHECKS.getDefaultValue().toString()));
 
     // Create file-writer
     Configuration conf = new Configuration();

Reply via email to