This is an automated email from the ASF dual-hosted git repository.

htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 18c6f803d9 [ASTERIXDB-3509]: COPY TO CSV Perf Issue
18c6f803d9 is described below

commit 18c6f803d9057770532f06394bb75e1f59f97fdf
Author: utsavCbase <[email protected]>
AuthorDate: Wed Jan 22 20:46:11 2025 +0530

    [ASTERIXDB-3509]: COPY TO CSV Perf Issue
    
    Details: Instance creation of csv printerFactory was getting created for 
all the data in the records. Change that to single instance creation per query.
    
    Change-Id: I55b114c81707e57307b52a1828952fa56847b2dc
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19354
    Reviewed-by: Hussain Towaileb <[email protected]>
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../printers/csv/ANullPrinterFactory.java          |  6 +-----
 .../printers/csv/AObjectPrinterFactory.java        | 22 +++++++++++-----------
 .../printers/csv/AStringPrinterFactory.java        |  5 +----
 .../om/pointables/printer/csv/APrintVisitor.java   |  6 +++++-
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
index 3d3ca3e888..101d7c2f07 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/ANullPrinterFactory.java
@@ -19,7 +19,6 @@
 package org.apache.asterix.dataflow.data.nontagged.printers.csv;
 
 import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.hyracks.algebricks.data.IPrinter;
 import org.apache.hyracks.algebricks.data.IPrinterFactory;
@@ -27,8 +26,6 @@ import org.apache.hyracks.algebricks.data.IPrinterFactory;
 public class ANullPrinterFactory implements IPrinterFactory {
     private static final long serialVersionUID = 1L;
     private static final String DEFAULT_NULL_STRING = "";
-    // Store the information about the instance based on the parameters
-    private static final ConcurrentHashMap<String, ANullPrinterFactory> 
instanceCache = new ConcurrentHashMap<>();
     private String nullString;
 
     private ANullPrinterFactory(String nullString) {
@@ -36,8 +33,7 @@ public class ANullPrinterFactory implements IPrinterFactory {
     }
 
     public static ANullPrinterFactory createInstance(String nullString) {
-        String key = CSVUtils.generateKey(nullString);
-        return instanceCache.computeIfAbsent(key, k -> new 
ANullPrinterFactory(nullString));
+        return new ANullPrinterFactory(nullString);
     }
 
     private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) 
-> {
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
index 20b1ef0316..fdde82cd96 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AObjectPrinterFactory.java
@@ -27,7 +27,6 @@ import static 
org.apache.asterix.dataflow.data.nontagged.printers.csv.CSVUtils.K
 
 import java.io.PrintStream;
 import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.asterix.om.pointables.ARecordVisitablePointable;
 import org.apache.asterix.om.pointables.base.DefaultOpenFieldType;
@@ -43,7 +42,8 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
 
 public class AObjectPrinterFactory implements IPrinterFactory {
     private static final long serialVersionUID = 1L;
-    private static final ConcurrentHashMap<String, AObjectPrinterFactory> 
instanceCache = new ConcurrentHashMap<>();
+    private final IPrinter nullPrinter;
+    private final IPrinter stringPrinter;
     private ARecordType itemType;
     private Map<String, String> configuration;
     private boolean emptyFieldAsNull;
@@ -53,12 +53,15 @@ public class AObjectPrinterFactory implements 
IPrinterFactory {
         this.configuration = configuration;
         String emptyFieldAsNullStr = 
configuration.get(KEY_EMPTY_FIELD_AS_NULL);
         this.emptyFieldAsNull = emptyFieldAsNullStr != null && 
Boolean.parseBoolean(emptyFieldAsNullStr);
+        this.nullPrinter = 
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter();
+        this.stringPrinter =
+                
AStringPrinterFactory.createInstance(configuration.get(KEY_QUOTE), 
configuration.get(KEY_FORCE_QUOTE),
+                        configuration.get(KEY_ESCAPE), 
configuration.get(KEY_DELIMITER)).createPrinter();
+
     }
 
     public static AObjectPrinterFactory createInstance(ARecordType itemType, 
Map<String, String> configuration) {
-        // generate a unique identifier based on the parameters and hash the 
instance corresponding to it.
-        String key = CSVUtils.generateKey(itemType, configuration);
-        return instanceCache.computeIfAbsent(key, k -> new 
AObjectPrinterFactory(itemType, configuration));
+        return new AObjectPrinterFactory(itemType, configuration);
     }
 
     public boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, 
PrintStream ps)
@@ -78,7 +81,7 @@ public class AObjectPrinterFactory implements IPrinterFactory 
{
                 return true;
             case MISSING:
             case NULL:
-                
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b,
 s, l, ps);
+                nullPrinter.print(b, s, l, ps);
                 return true;
             case BOOLEAN:
                 ABooleanPrinterFactory.PRINTER.print(b, s, l, ps);
@@ -130,12 +133,9 @@ public class AObjectPrinterFactory implements 
IPrinterFactory {
                 return true;
             case STRING:
                 if (emptyFieldAsNull && CSVUtils.isEmptyString(b, s, l)) {
-                    
ANullPrinterFactory.createInstance(configuration.get(KEY_NULL)).createPrinter().print(b,
 s, l, ps);
+                    nullPrinter.print(b, s, l, ps);
                 } else {
-                    AStringPrinterFactory
-                            .createInstance(configuration.get(KEY_QUOTE), 
configuration.get(KEY_FORCE_QUOTE),
-                                    configuration.get(KEY_ESCAPE), 
configuration.get(KEY_DELIMITER))
-                            .createPrinter().print(b, s, l, ps);
+                    stringPrinter.print(b, s, l, ps);
                 }
                 return true;
             case BINARY:
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
index ae368bdf11..d17b7cbfa7 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/csv/AStringPrinterFactory.java
@@ -25,7 +25,6 @@ import static 
org.apache.asterix.dataflow.data.nontagged.printers.csv.CSVUtils.K
 
 import java.io.IOException;
 import java.io.PrintStream;
-import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.asterix.dataflow.data.nontagged.printers.PrintTools;
 import org.apache.hyracks.algebricks.data.IPrinter;
@@ -34,7 +33,6 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
 
 public class AStringPrinterFactory implements IPrinterFactory {
     private static final long serialVersionUID = 1L;
-    private static final ConcurrentHashMap<String, AStringPrinterFactory> 
instanceCache = new ConcurrentHashMap<>();
     private static final String NONE = "none";
     private String quote;
     private Boolean forceQuote;
@@ -51,8 +49,7 @@ public class AStringPrinterFactory implements IPrinterFactory 
{
     public static AStringPrinterFactory createInstance(String quote, String 
forceQuoteStr, String escape,
             String delimiter) {
         boolean forceQuote = forceQuoteStr == null || 
Boolean.parseBoolean(forceQuoteStr);
-        String key = CSVUtils.generateKey(quote, forceQuoteStr, escape, 
delimiter);
-        return instanceCache.computeIfAbsent(key, k -> new 
AStringPrinterFactory(quote, forceQuote, escape, delimiter));
+        return new AStringPrinterFactory(quote, forceQuote, escape, delimiter);
     }
 
     private final IPrinter PRINTER = (byte[] b, int s, int l, PrintStream ps) 
-> {
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
index 22c502bc84..bbe5286445 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/pointables/printer/csv/APrintVisitor.java
@@ -44,6 +44,7 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
 public class APrintVisitor extends AbstractPrintVisitor {
     private final ARecordType itemType;
     private final Map<String, String> configuration;
+    private AObjectPrinterFactory objectPrinterFactory;
 
     public APrintVisitor(ARecordType itemType, Map<String, String> 
configuration) {
         super();
@@ -67,6 +68,9 @@ public class APrintVisitor extends AbstractPrintVisitor {
     @Override
     protected boolean printFlatValue(ATypeTag typeTag, byte[] b, int s, int l, 
PrintStream ps)
             throws HyracksDataException {
-        return AObjectPrinterFactory.createInstance(itemType, 
configuration).printFlatValue(typeTag, b, s, l, ps);
+        if (objectPrinterFactory == null) {
+            objectPrinterFactory = 
AObjectPrinterFactory.createInstance(itemType, configuration);
+        }
+        return objectPrinterFactory.printFlatValue(typeTag, b, s, l, ps);
     }
 }

Reply via email to