This is an automated email from the ASF dual-hosted git repository.
abti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 493064b75 [GOBBLIN-1690] Added logging to ORC writer
493064b75 is described below
commit 493064b751da99d031ef79ced765c3bd61b719b5
Author: Ratandeep <[email protected]>
AuthorDate: Thu Aug 25 01:07:50 2022 -0400
[GOBBLIN-1690] Added logging to ORC writer
Closes #3543 from rdsr/master
---
.../org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java | 8 ++++++--
.../main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java | 5 +++--
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
index 32b63b464..e764a14db 100644
---
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
+++
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GenericRecordToOrcValueWriter.java
@@ -95,6 +95,7 @@ public class GenericRecordToOrcValueWriter implements
OrcValueWriter<GenericReco
this(typeDescription, avroSchema);
this.enabledSmartSizing =
state.getPropAsBoolean(ENABLE_SMART_ARRAY_ENLARGE,
DEFAULT_ENABLE_SMART_ARRAY_ENLARGE);
this.enlargeFactor = state.getPropAsInt(ENLARGE_FACTOR_KEY,
DEFAULT_ENLARGE_FACTOR);
+ log.info("enabledSmartSizing: {}, enlargeFactor: {}", enabledSmartSizing,
enlargeFactor);
}
@Override
@@ -302,6 +303,7 @@ public class GenericRecordToOrcValueWriter implements
OrcValueWriter<GenericReco
// If seeing child array being saturated, will need to expand with a
reasonable amount.
if (cv.childCount > cv.child.isNull.length) {
int resizedLength = resize(rowsAdded, cv.isNull.length, cv.childCount);
+ log.info("Column vector: {}, resizing to: {}, child count: {}",
cv.child, resizedLength, cv.childCount);
cv.child.ensureSize(resizedLength, true);
}
@@ -344,7 +346,9 @@ public class GenericRecordToOrcValueWriter implements
OrcValueWriter<GenericReco
// make sure the child is big enough
if (cv.childCount > cv.keys.isNull.length) {
int resizedLength = resize(rowsAdded, cv.isNull.length, cv.childCount);
+ log.info("Column vector: {}, resizing to: {}, child count: {}",
cv.keys, resizedLength, cv.childCount);
cv.keys.ensureSize(resizedLength, true);
+ log.info("Column vector: {}, resizing to: {}, child count: {}",
cv.values, resizedLength, cv.childCount);
cv.values.ensureSize(resizedLength, true);
}
// Add each element
@@ -377,10 +381,10 @@ public class GenericRecordToOrcValueWriter implements
OrcValueWriter<GenericReco
* If there's further resize requested, it will add delta again to be
conservative, but chances of adding delta
* for multiple times should be low, unless the container size is
fluctuating too much.
*/
- private int resize(int rowsAdded, int batchSize, int currentSize) {
+ private int resize(int rowsAdded, int batchSize, int requestedSize) {
resizeCount += 1;
log.info(String.format("It has been resized %s times in current writer",
resizeCount));
- return enabledSmartSizing ? currentSize + (currentSize / rowsAdded + 1) *
batchSize : enlargeFactor * currentSize;
+ return enabledSmartSizing ? requestedSize + (requestedSize / rowsAdded +
1) * batchSize : enlargeFactor * requestedSize;
}
private Converter buildConverter(TypeDescription schema, Schema avroSchema) {
diff --git
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
index 62b6ff8ca..b6400d59b 100644
---
a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
+++
b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java
@@ -159,8 +159,9 @@ public abstract class GobblinBaseOrcWriter<S, D> extends
FsDataWriter<D> {
this.rowBatch = typeDescription.createRowBatch(this.batchSize);
this.deepCleanBatch =
properties.getPropAsBoolean(ORC_WRITER_DEEP_CLEAN_EVERY_BATCH, false);
- log.info("Start to construct a ORC-Native Writer, with batchSize:" +
batchSize + ", enable batchDeepClean:"
- + deepCleanBatch + "\n, schema in input format:" + this.inputSchema);
+ log.info("Created ORC writer, batch size: {}, {}: {}",
+ batchSize, OrcConf.ROWS_BETWEEN_CHECKS.name(),
properties.getProp(OrcConf.ROWS_BETWEEN_CHECKS.name(),
+ OrcConf.ROWS_BETWEEN_CHECKS.getDefaultValue().toString()));
// Create file-writer
Configuration conf = new Configuration();