This is an automated email from the ASF dual-hosted git repository.
htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 18c6f803d9 [ASTERIXDB-3509]: COPY TO CSV Perf Issue
18c6f803d9 is described below
commit 18c6f803d9057770532f06394bb75e1f59f97fdf
Author: utsavCbase <[email protected]>
AuthorDate: Wed Jan 22 20:46:11 2025 +0530
[ASTERIXDB-3509]: COPY TO CSV Perf Issue
Details: Instance creation of csv printerFactory was getting created for
all the data in the records. Change that to single instance creation per query.
Change-Id: I55b114c81707e57307b52a1828952fa56847b2dc
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354
Reviewed-by: Hussain Towaileb <[email protected]>
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
---
.../printers/csv/ANullPrinterFactory.java | 6 +-----
.../printers/csv/AObjectPrinterFactory.java | 22 +++++++++++-----------
.../printers/csv/AStringPrinterFactory.java | 5 +----
.../om/pointables/printer/csv/APrintVisitor.java | 6 +++++-
4 files changed, 18 insertions(+), 21 deletions(-)
diff --git
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
index 3d3ca3e888..101d7c2f07 100644
---
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
@@ -19,7 +19,6 @@
package org.apache.asterix.dataflow.data.nontagged.printers.csv;
import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.hyracks.algebricks.data.IPrinter;
import org.apache.hyracks.algebricks.data.IPrinterFactory;
@@ -27,8 +26,6 @@ import org.apache.hyracks.algebricks.data.IPrinterFactory;
public class ANullPrinterFactory implements IPrinterFactory {
private static final long serialVersionUID = 1L;
private static final String DEFAULT_NULL_STRING = "";
- // Store the information about the instance based on the parameters
- private static final ConcurrentHashMap<String, ANullPrinterFactory>
instanceCache = new ConcurrentHashMap<>();
private String nullString;
private ANullPrinterFactory(String nullString) {
@@ -36,8 +33,7 @@ public class ANullPrinterFactory implements IPrinterFactory {
}
public static ANullPrinterFactory createInstance(String nullString) {
- String key = CSVUtils.generateKey(nullString);
- return instanceCache.computeIfAbsent(key, k -> new
ANullPrinterFactory(nullString));
+ return new ANullPrinterFactory(nullString);
}
private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps)
-> {
diff --git
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
index 20b1ef0316..fdde82cd96 100644
---
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
@@ -27,7 +27,6 @@ import static
org.apache.asterix.dataflow.data.nontagged.printers.csv.CSVUtils.K
import java.io.PrintStream;
import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.asterix.om.pointables.ARecordVisitablePointable;
import org.apache.asterix.om.pointables.base.DefaultOpenFieldType;
@@ -43,7 +42,8 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
public class AObjectPrinterFactory implements IPrinterFactory {
private static final long serialVersionUID = 1L;
- private static final ConcurrentHashMap<String, AObjectPrinterFactory>
instanceCache = new ConcurrentHashMap<>();
+ private final IPrinter nullPrinter;
+ private final IPrinter stringPrinter;
private ARecordType itemType;
private Map<String, String> configuration;
private boolean emptyFieldAsNull;
@@ -53,12 +53,15 @@ public class AObjectPrinterFactory implements
IPrinterFactory {
this.configuration = configuration;
String emptyFieldAsNullStr =
configuration.get(KEY_EMPTY_FIELD_AS_NULL);
this.emptyFieldAsNull = emptyFieldAsNullStr != null &&
Boolean.parseBoolean(emptyFieldAsNullStr);
+ this.nullPrinter =
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter();
+ this.stringPrinter =
+
AStringPrinterFactory.createInstance(configuration.get(KEY_QUOTE),
configuration.get(KEY_FORCE_QUOTE),
+ configuration.get(KEY_ESCAPE),
configuration.get(KEY_DELIMITER)).createPrinter();
+
}
public static AObjectPrinterFactory createInstance(ARecordType itemType,
Map<String, String> configuration) {
- // generate a unique identifier based on the parameters and hash the
instance corresponding to it.
- String key = CSVUtils.generateKey(itemType, configuration);
- return instanceCache.computeIfAbsent(key, k -> new
AObjectPrinterFactory(itemType, configuration));
+ return new AObjectPrinterFactory(itemType, configuration);
}
public boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l,
PrintStream ps)
@@ -78,7 +81,7 @@ public class AObjectPrinterFactory implements IPrinterFactory
{
return true;
case MISSING:
case NULL:
-
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b,
s, l, ps);
+ nullPrinter.print(b, s, l, ps);
return true;
case BOOLEAN:
ABooleanPrinterFactory.PRINTER.print(b, s, l, ps);
@@ -130,12 +133,9 @@ public class AObjectPrinterFactory implements
IPrinterFactory {
return true;
case STRING:
if (emptyFieldAsNull && CSVUtils.isEmptyString(b, s, l)) {
-
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b,
s, l, ps);
+ nullPrinter.print(b, s, l, ps);
} else {
- AStringPrinterFactory
- .createInstance(configuration.get(KEY_QUOTE),
configuration.get(KEY_FORCE_QUOTE),
- configuration.get(KEY_ESCAPE),
configuration.get(KEY_DELIMITER))
- .createPrinter().print(b, s, l, ps);
+ stringPrinter.print(b, s, l, ps);
}
return true;
case BINARY:
diff --git
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
index ae368bdf11..d17b7cbfa7 100644
---
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
@@ -25,7 +25,6 @@ import static
org.apache.asterix.dataflow.data.nontagged.printers.csv.CSVUtils.K
import java.io.IOException;
import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.asterix.dataflow.data.nontagged.printers.PrintTools;
import org.apache.hyracks.algebricks.data.IPrinter;
@@ -34,7 +33,6 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
public class AStringPrinterFactory implements IPrinterFactory {
private static final long serialVersionUID = 1L;
- private static final ConcurrentHashMap<String, AStringPrinterFactory>
instanceCache = new ConcurrentHashMap<>();
private static final String NONE = "none";
private String quote;
private Boolean forceQuote;
@@ -51,8 +49,7 @@ public class AStringPrinterFactory implements IPrinterFactory
{
public static AStringPrinterFactory createInstance(String quote, String
forceQuoteStr, String escape,
String delimiter) {
boolean forceQuote = forceQuoteStr == null ||
Boolean.parseBoolean(forceQuoteStr);
- String key = CSVUtils.generateKey(quote, forceQuoteStr, escape,
delimiter);
- return instanceCache.computeIfAbsent(key, k -> new
AStringPrinterFactory(quote, forceQuote, escape, delimiter));
+ return new AStringPrinterFactory(quote, forceQuote, escape, delimiter);
}
private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps)
-> {
diff --git
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
index 22c502bc84..bbe5286445 100644
---
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
+++
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
@@ -44,6 +44,7 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
public class APrintVisitor extends AbstractPrintVisitor {
private final ARecordType itemType;
private final Map<String, String> configuration;
+ private AObjectPrinterFactory objectPrinterFactory;
public APrintVisitor(ARecordType itemType, Map<String, String>
configuration) {
super();
@@ -67,6 +68,9 @@ public class APrintVisitor extends AbstractPrintVisitor {
@Override
protected boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l,
PrintStream ps)
throws HyracksDataException {
- return AObjectPrinterFactory.createInstance(itemType,
configuration).printFlatValue(typeTag, b, s, l, ps);
+ if (objectPrinterFactory == null) {
+ objectPrinterFactory =
AObjectPrinterFactory.createInstance(itemType, configuration);
+ }
+ return objectPrinterFactory.printFlatValue(typeTag, b, s, l, ps);
}
}