This is an automated email from the ASF dual-hosted git repository.

wangchao316 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 93444b53bc [IOTDB-4244] Optimize csv tool, add Options '-typeInfer' , 
'-linesPer… (#7145)
93444b53bc is described below

commit 93444b53bc939548254a757b0127b97139c6b4c1
Author: cmlmakahts <[email protected]>
AuthorDate: Wed Aug 31 09:27:41 2022 +0800

    [IOTDB-4244] Optimize csv tool, add Options '-typeInfer' , '-linesPer… 
(#7145)
    
    [IOTDB-4244] Optimize csv tool, add Options '-typeInfer' , '-linesPer… 
(#7145)
---
 .../org/apache/iotdb/tool/AbstractCsvTool.java     |  72 ++++++--
 .../main/java/org/apache/iotdb/tool/ExportCsv.java | 124 +++++++------
 .../main/java/org/apache/iotdb/tool/ImportCsv.java | 191 +++++++++++++++++----
 .../tests/tools/importCsv/ExportCsvTestIT.java     |   6 +-
 .../tests/tools/importCsv/ImportCsvTestIT.java     |   2 +-
 docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md   |  41 ++++-
 .../zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md |  34 +++-
 7 files changed, 356 insertions(+), 114 deletions(-)

diff --git a/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java 
b/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java
index 9ef2d6bea6..0d3ef345ef 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java
@@ -214,26 +214,74 @@ public abstract class AbstractCsvTool {
   public static Boolean writeCsvFile(
       List<String> headerNames, List<List<Object>> records, String filePath) {
     try {
-      CSVPrinter printer =
-          CSVFormat.Builder.create(CSVFormat.DEFAULT)
-              .setHeader()
-              .setSkipHeaderRecord(true)
-              .setEscape('\\')
-              .setQuoteMode(QuoteMode.NONE)
-              .build()
-              .print(new PrintWriter(filePath));
+      final CSVPrinterWrapper csvPrinterWrapper = new 
CSVPrinterWrapper(filePath);
       if (headerNames != null) {
-        printer.printRecord(headerNames);
+        csvPrinterWrapper.printRecord(headerNames);
       }
       for (List record : records) {
-        printer.printRecord(record);
+        csvPrinterWrapper.printRecord(record);
       }
-      printer.flush();
-      printer.close();
+      csvPrinterWrapper.flush();
+      csvPrinterWrapper.close();
       return true;
     } catch (IOException e) {
       e.printStackTrace();
       return false;
     }
   }
+
+  static class CSVPrinterWrapper {
+    private final String filePath;
+    private final CSVFormat csvFormat;
+    private CSVPrinter csvPrinter;
+
+    public CSVPrinterWrapper(String filePath) {
+      this.filePath = filePath;
+      this.csvFormat =
+          CSVFormat.Builder.create(CSVFormat.DEFAULT)
+              .setHeader()
+              .setSkipHeaderRecord(true)
+              .setEscape('\\')
+              .setQuoteMode(QuoteMode.NONE)
+              .build();
+    }
+
+    public void printRecord(final Iterable<?> values) throws IOException {
+      if (csvPrinter == null) {
+        csvPrinter = csvFormat.print(new PrintWriter(filePath));
+      }
+      csvPrinter.printRecord(values);
+    }
+
+    public void print(Object value) {
+      if (csvPrinter == null) {
+        try {
+          csvPrinter = csvFormat.print(new PrintWriter(filePath));
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+      try {
+        csvPrinter.print(value);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+
+    public void println() throws IOException {
+      csvPrinter.println();
+    }
+
+    public void close() throws IOException {
+      if (csvPrinter != null) {
+        csvPrinter.close();
+      }
+    }
+
+    public void flush() throws IOException {
+      if (csvPrinter != null) {
+        csvPrinter.flush();
+      }
+    }
+  }
 }
diff --git a/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java 
b/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java
index fc8b6e2107..3a18261cb1 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java
@@ -36,9 +36,6 @@ import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVPrinter;
-import org.apache.commons.csv.QuoteMode;
 import org.apache.thrift.TException;
 import org.jline.reader.LineReader;
 
@@ -46,7 +43,6 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
-import java.io.PrintWriter;
 import java.time.Instant;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
@@ -75,6 +71,9 @@ public class ExportCsv extends AbstractCsvTool {
   private static final String QUERY_COMMAND_ARGS = "q";
   private static final String QUERY_COMMAND_NAME = "queryCommand";
 
+  private static final String LINES_PER_FILE_ARGS = "linesPerFile";
+  private static final String LINES_PER_FILE_ARGS_NAME = "Lines Per File";
+
   private static final String TSFILEDB_CLI_PREFIX = "ExportCsv";
 
   private static final String DUMP_FILE_NAME_DEFAULT = "dump";
@@ -88,6 +87,8 @@ public class ExportCsv extends AbstractCsvTool {
 
   private static String timestampPrecision;
 
+  private static int linesPerFile = 10000;
+
   private static final int EXPORT_PER_LINE_COUNT = 10000;
 
   /** main function of export csv tool. */
@@ -194,6 +195,9 @@ public class ExportCsv extends AbstractCsvTool {
     if (!targetDirectory.endsWith("/") && !targetDirectory.endsWith("\\")) {
       targetDirectory += File.separator;
     }
+    if (commandLine.getOptionValue(LINES_PER_FILE_ARGS) != null) {
+      linesPerFile = 
Integer.parseInt(commandLine.getOptionValue(LINES_PER_FILE_ARGS));
+    }
   }
 
   /**
@@ -267,6 +271,14 @@ public class ExportCsv extends AbstractCsvTool {
             .build();
     options.addOption(opQuery);
 
+    Option opLinesPerFile =
+        Option.builder(LINES_PER_FILE_ARGS)
+            .argName(LINES_PER_FILE_ARGS_NAME)
+            .hasArg()
+            .desc("Lines per dump file.")
+            .build();
+    options.addOption(opLinesPerFile);
+
     Option opHelp =
         Option.builder(HELP_ARGS)
             .longOpt(HELP_ARGS)
@@ -302,10 +314,24 @@ public class ExportCsv extends AbstractCsvTool {
    * @param index used to create dump file name
    */
   private static void dumpResult(String sql, int index) {
-    final String path = targetDirectory + targetFile + index + ".csv";
+    final String path = targetDirectory + targetFile + index;
     try {
       SessionDataSet sessionDataSet = session.executeQueryStatement(sql);
-      writeCsvFile(sessionDataSet, path);
+      List<Object> headers = new ArrayList<>();
+      List<String> names = sessionDataSet.getColumnNames();
+      List<String> types = sessionDataSet.getColumnTypes();
+      if (needDataTypePrinted) {
+        for (int i = 0; i < names.size(); i++) {
+          if (!"Time".equals(names.get(i)) && !"Device".equals(names.get(i))) {
+            headers.add(String.format("%s(%s)", names.get(i), types.get(i)));
+          } else {
+            headers.add(names.get(i));
+          }
+        }
+      } else {
+        headers.addAll(names);
+      }
+      writeCsvFile(sessionDataSet, path, headers, linesPerFile);
       sessionDataSet.closeOperationHandle();
       System.out.println("Export completely!");
     } catch (StatementExecutionException | IoTDBConnectionException | 
IOException e) {
@@ -328,60 +354,46 @@ public class ExportCsv extends AbstractCsvTool {
     }
   }
 
-  public static Boolean writeCsvFile(SessionDataSet sessionDataSet, String 
filePath)
+  public static void writeCsvFile(
+      SessionDataSet sessionDataSet, String filePath, List<Object> headers, 
int linesPerFile)
       throws IOException, IoTDBConnectionException, 
StatementExecutionException {
-    CSVPrinter printer =
-        CSVFormat.Builder.create(CSVFormat.DEFAULT)
-            .setHeader()
-            .setSkipHeaderRecord(true)
-            .setEscape('\\')
-            .setQuoteMode(QuoteMode.NONE)
-            .build()
-            .print(new PrintWriter(filePath));
-
-    List<Object> headers = new ArrayList<>();
-    List<String> names = sessionDataSet.getColumnNames();
-    List<String> types = sessionDataSet.getColumnTypes();
-
-    if (needDataTypePrinted) {
-      for (int i = 0; i < names.size(); i++) {
-        if (!"Time".equals(names.get(i)) && !"Device".equals(names.get(i))) {
-          headers.add(String.format("%s(%s)", names.get(i), types.get(i)));
+    int fileIndex = 0;
+    boolean hasNext = true;
+    while (hasNext) {
+      int i = 0;
+      final String finalFilePath = filePath + "_" + fileIndex + ".csv";
+      final CSVPrinterWrapper csvPrinterWrapper = new 
CSVPrinterWrapper(finalFilePath);
+      csvPrinterWrapper.printRecord(headers);
+      while (i++ < linesPerFile) {
+        if (sessionDataSet.hasNext()) {
+          RowRecord rowRecord = sessionDataSet.next();
+          if (rowRecord.getTimestamp() != 0) {
+            csvPrinterWrapper.print(timeTrans(rowRecord.getTimestamp()));
+          }
+          rowRecord
+              .getFields()
+              .forEach(
+                  field -> {
+                    String fieldStringValue = field.getStringValue();
+                    if (!"null".equals(field.getStringValue())) {
+                      if (field.getDataType() == TSDataType.TEXT
+                          && !fieldStringValue.startsWith("root.")) {
+                        fieldStringValue = "\"" + fieldStringValue + "\"";
+                      }
+                      csvPrinterWrapper.print(fieldStringValue);
+                    } else {
+                      csvPrinterWrapper.print("");
+                    }
+                  });
+          csvPrinterWrapper.println();
         } else {
-          headers.add(names.get(i));
+          hasNext = false;
+          break;
         }
       }
-    } else {
-      headers.addAll(names);
+      fileIndex++;
+      csvPrinterWrapper.flush();
+      csvPrinterWrapper.close();
     }
-    printer.printRecord(headers);
-
-    while (sessionDataSet.hasNext()) {
-      RowRecord rowRecord = sessionDataSet.next();
-      ArrayList<String> record = new ArrayList<>();
-      if (rowRecord.getTimestamp() != 0) {
-        record.add(timeTrans(rowRecord.getTimestamp()));
-      }
-      rowRecord
-          .getFields()
-          .forEach(
-              field -> {
-                String fieldStringValue = field.getStringValue();
-                if (!"null".equals(field.getStringValue())) {
-                  if (field.getDataType() == TSDataType.TEXT
-                      && !fieldStringValue.startsWith("root.")) {
-                    fieldStringValue = "\"" + fieldStringValue + "\"";
-                  }
-                  record.add(fieldStringValue);
-                } else {
-                  record.add("");
-                }
-              });
-      printer.printRecord(record);
-    }
-
-    printer.flush();
-    printer.close();
-    return true;
   }
 }
diff --git a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java 
b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
index 89dfa4f176..b7535a5838 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
@@ -19,12 +19,14 @@
 
 package org.apache.iotdb.tool;
 
+import org.apache.iotdb.db.qp.constant.SQLConstant;
 import org.apache.iotdb.db.qp.utils.DatetimeUtils;
 import org.apache.iotdb.exception.ArgsErrorException;
 import org.apache.iotdb.rpc.IoTDBConnectionException;
 import org.apache.iotdb.rpc.StatementExecutionException;
 import org.apache.iotdb.session.Session;
 import org.apache.iotdb.session.SessionDataSet;
+import org.apache.iotdb.tsfile.common.constant.TsFileConstant;
 import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
 
 import org.apache.commons.cli.CommandLine;
@@ -43,12 +45,12 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -85,10 +87,17 @@ public class ImportCsv extends AbstractCsvTool {
   private static final String TIMESTAMP_PRECISION_ARGS = "tp";
   private static final String TIMESTAMP_PRECISION_NAME = "timestamp precision 
(ms/us/ns)";
 
+  private static final String TYPE_INFER_ARGS = "typeInfer";
+  private static final String TYPE_INFER_ARGS_NAME = "type infer";
+
+  private static final String LINES_PER_FAILED_FILE_ARGS = 
"linesPerFailedFile";
+  private static final String LINES_PER_FAILED_FILE_ARGS_NAME = "Lines Per 
FailedFile";
+
   private static final String TSFILEDB_CLI_PREFIX = "ImportCsv";
 
   private static String targetPath;
   private static String failedFileDirectory = null;
+  private static int linesPerFailedFile = 10000;
   private static Boolean aligned = false;
 
   private static String timeColumn = "Time";
@@ -98,6 +107,28 @@ public class ImportCsv extends AbstractCsvTool {
 
   private static String timestampPrecision = "ms";
 
+  private static final Map<String, TSDataType> TYPE_INFER_KEY_DICT = new 
HashMap<>();
+
+  static {
+    TYPE_INFER_KEY_DICT.put("boolean", TSDataType.BOOLEAN);
+    TYPE_INFER_KEY_DICT.put("int", TSDataType.FLOAT);
+    TYPE_INFER_KEY_DICT.put("long", TSDataType.DOUBLE);
+    TYPE_INFER_KEY_DICT.put("float", TSDataType.FLOAT);
+    TYPE_INFER_KEY_DICT.put("double", TSDataType.DOUBLE);
+    TYPE_INFER_KEY_DICT.put("NaN", TSDataType.DOUBLE);
+  }
+
+  private static final Map<String, TSDataType> TYPE_INFER_VALUE_DICT = new 
HashMap<>();
+
+  static {
+    TYPE_INFER_VALUE_DICT.put("boolean", TSDataType.BOOLEAN);
+    TYPE_INFER_VALUE_DICT.put("int", TSDataType.INT32);
+    TYPE_INFER_VALUE_DICT.put("long", TSDataType.INT64);
+    TYPE_INFER_VALUE_DICT.put("float", TSDataType.FLOAT);
+    TYPE_INFER_VALUE_DICT.put("double", TSDataType.DOUBLE);
+    TYPE_INFER_VALUE_DICT.put("text", TSDataType.TEXT);
+  }
+
   /**
    * create the commandline options.
    *
@@ -160,12 +191,31 @@ public class ImportCsv extends AbstractCsvTool {
 
     Option opTimestampPrecision =
         Option.builder(TIMESTAMP_PRECISION_ARGS)
-            .argName(TIMESTAMP_PRECISION_ARGS)
+            .argName(TIMESTAMP_PRECISION_NAME)
             .hasArg()
             .desc("Timestamp precision (ms/us/ns)")
             .build();
+
     options.addOption(opTimestampPrecision);
 
+    Option opTypeInfer =
+        Option.builder(TYPE_INFER_ARGS)
+            .argName(TYPE_INFER_ARGS_NAME)
+            .numberOfArgs(5)
+            .hasArgs()
+            .valueSeparator(',')
+            .desc("Define type info by option:\"boolean=text,int=long, ...")
+            .build();
+    options.addOption(opTypeInfer);
+
+    Option opFailedLinesPerFile =
+        Option.builder(LINES_PER_FAILED_FILE_ARGS)
+            .argName(LINES_PER_FAILED_FILE_ARGS_NAME)
+            .hasArgs()
+            .desc("Lines per failedfile")
+            .build();
+    options.addOption(opFailedLinesPerFile);
+
     return options;
   }
 
@@ -174,7 +224,7 @@ public class ImportCsv extends AbstractCsvTool {
    *
    * @param commandLine
    */
-  private static void parseSpecialParams(CommandLine commandLine) {
+  private static void parseSpecialParams(CommandLine commandLine) throws 
ArgsErrorException {
     timeZoneID = commandLine.getOptionValue(TIME_ZONE_ARGS);
     targetPath = commandLine.getOptionValue(FILE_ARGS);
     if (commandLine.getOptionValue(BATCH_POINT_SIZE_ARGS) != null) {
@@ -195,6 +245,42 @@ public class ImportCsv extends AbstractCsvTool {
     if (commandLine.getOptionValue(TIMESTAMP_PRECISION_ARGS) != null) {
       timestampPrecision = 
commandLine.getOptionValue(TIMESTAMP_PRECISION_ARGS);
     }
+    final String[] opTypeInferValues = 
commandLine.getOptionValues(TYPE_INFER_ARGS);
+    if (opTypeInferValues != null && opTypeInferValues.length > 0) {
+      for (String opTypeInferValue : opTypeInferValues) {
+        if (opTypeInferValue.contains("=")) {
+          final String[] typeInfoExpressionArr = opTypeInferValue.split("=");
+          final String key = typeInfoExpressionArr[0];
+          final String value = typeInfoExpressionArr[1];
+          applyTypeInferArgs(key, value);
+        }
+      }
+    }
+    if (commandLine.getOptionValue(LINES_PER_FAILED_FILE_ARGS) != null) {
+      linesPerFailedFile = 
Integer.parseInt(commandLine.getOptionValue(LINES_PER_FAILED_FILE_ARGS));
+    }
+  }
+
+  private static void applyTypeInferArgs(String key, String value) throws 
ArgsErrorException {
+    if (!TYPE_INFER_KEY_DICT.containsKey(key)) {
+      throw new ArgsErrorException("Unknown type infer key: " + key);
+    }
+    if (!TYPE_INFER_VALUE_DICT.containsKey(value)) {
+      throw new ArgsErrorException("Unknown type infer value: " + value);
+    }
+    if (key.equals("NaN")
+        && !(value.equals("float") || value.equals("double") || 
value.equals("text"))) {
+      throw new ArgsErrorException("NaN can not convert to " + value);
+    }
+    if (key.equals("boolean") && !(value.equals("boolean") || 
value.equals("text"))) {
+      throw new ArgsErrorException("Boolean can not convert to " + value);
+    }
+    final TSDataType srcType = TYPE_INFER_VALUE_DICT.get(key);
+    final TSDataType dstType = TYPE_INFER_VALUE_DICT.get(value);
+    if (dstType.getType() < srcType.getType()) {
+      throw new ArgsErrorException(key + " can not convert to " + value);
+    }
+    TYPE_INFER_KEY_DICT.put(key, TYPE_INFER_VALUE_DICT.get(value));
   }
 
   public static void main(String[] args) throws IoTDBConnectionException {
@@ -435,7 +521,7 @@ public class ImportCsv extends AbstractCsvTool {
     }
 
     if (!failedRecords.isEmpty()) {
-      writeCsvFile(headerNames, failedRecords, failedFilePath);
+      writeFailedLinesFile(headerNames, failedFilePath, failedRecords);
     }
     if (hasStarted.get()) {
       System.out.println("Import completely!");
@@ -457,7 +543,6 @@ public class ImportCsv extends AbstractCsvTool {
     HashMap<String, String> headerNameMap = new HashMap<>();
     parseHeaders(headerNames, null, headerTypeMap, headerNameMap);
 
-    AtomicReference<SimpleDateFormat> timeFormatter = new 
AtomicReference<>(null);
     AtomicReference<String> deviceName = new AtomicReference<>(null);
 
     HashSet<String> typeQueriedDevice = new HashSet<>();
@@ -477,7 +562,6 @@ public class ImportCsv extends AbstractCsvTool {
           // only run in first record
           if (deviceName.get() == null) {
             deviceName.set(record.get(1));
-            // timeFormatter.set(formatterInit(record.get(0)));
           } else if (!Objects.equals(deviceName.get(), record.get(1))) {
             // if device changed
             writeAndEmptyDataSet(
@@ -549,11 +633,7 @@ public class ImportCsv extends AbstractCsvTool {
             failedRecords.add(record.stream().collect(Collectors.toList()));
           }
           if (!measurements.isEmpty()) {
-            if (timeFormatter.get() == null) {
-              times.add(Long.valueOf(record.get(timeColumn)));
-            } else {
-              times.add(parseTimestamp(record.get(timeColumn)));
-            }
+            times.add(parseTimestamp(record.get(timeColumn)));
             typesList.add(types);
             valuesList.add(values);
             measurementsList.add(measurements);
@@ -564,11 +644,28 @@ public class ImportCsv extends AbstractCsvTool {
       pointSize.set(0);
     }
     if (!failedRecords.isEmpty()) {
-      writeCsvFile(headerNames, failedRecords, failedFilePath);
+      writeFailedLinesFile(headerNames, failedFilePath, failedRecords);
     }
     System.out.println("Import completely!");
   }
 
+  private static void writeFailedLinesFile(
+      List<String> headerNames, String failedFilePath, ArrayList<List<Object>> 
failedRecords) {
+    int fileIndex = 0;
+    int from = 0;
+    int failedRecordsSize = failedRecords.size();
+    int restFailedRecords = failedRecordsSize;
+    while (from < failedRecordsSize) {
+      int step = Math.min(restFailedRecords, linesPerFailedFile);
+      writeCsvFile(
+          headerNames,
+          failedRecords.subList(from, from + step),
+          failedFilePath + "_" + fileIndex++);
+      from += step;
+      restFailedRecords -= step;
+    }
+  }
+
   private static void writeAndEmptyDataSet(
       String device,
       List<Long> times,
@@ -767,33 +864,55 @@ public class ImportCsv extends AbstractCsvTool {
    * if data type of timeseries is not defined in headers of schema, this 
method will be called to
    * do type inference
    *
-   * @param value
+   * @param strValue
    * @return
    */
-  private static TSDataType typeInfer(String value) {
-    if (value.contains("\"")) {
+  private static TSDataType typeInfer(String strValue) {
+    if (strValue.contains("\"")) {
       return TEXT;
-    } else if (value.equals("true") || value.equals("false")) {
-      return BOOLEAN;
-    } else if (value.equals("NaN")) {
-      return DOUBLE;
-    } else if (!value.contains(".")) {
-      try {
-        Integer.valueOf(value);
-        return INT32;
-      } catch (Exception e) {
-        try {
-          Long.valueOf(value);
-          return INT64;
-        } catch (Exception exception) {
-          return null;
+    }
+    if (isBoolean(strValue)) {
+      return TYPE_INFER_KEY_DICT.get("boolean");
+    } else if (isNumber(strValue)) {
+      if (!strValue.contains(TsFileConstant.PATH_SEPARATOR)) {
+        if (isConvertFloatPrecisionLack(StringUtils.trim(strValue))) {
+          return TYPE_INFER_KEY_DICT.get("long");
         }
+        return TYPE_INFER_KEY_DICT.get("int");
+      } else {
+        return TYPE_INFER_KEY_DICT.get("float");
       }
+    } else if ("null".equals(strValue) || "NULL".equals(strValue)) {
+      return null;
+      // "NaN" is returned if the NaN Literal is given in Parser
+    } else if ("NaN".equals(strValue)) {
+      return TYPE_INFER_KEY_DICT.get("NaN");
     } else {
-      return DOUBLE;
+      return TSDataType.TEXT;
     }
   }
 
+  static boolean isNumber(String s) {
+    if (s == null || s.equals("NaN")) {
+      return false;
+    }
+    try {
+      Double.parseDouble(s);
+    } catch (NumberFormatException e) {
+      return false;
+    }
+    return true;
+  }
+
+  private static boolean isBoolean(String s) {
+    return s.equalsIgnoreCase(SQLConstant.BOOLEAN_TRUE)
+        || s.equalsIgnoreCase(SQLConstant.BOOLEAN_FALSE);
+  }
+
+  private static boolean isConvertFloatPrecisionLack(String s) {
+    return Long.parseLong(s) > (2 << 24);
+  }
+
   /**
    * @param value
    * @param type
@@ -808,18 +927,18 @@ public class ImportCsv extends AbstractCsvTool {
           }
           return value;
         case BOOLEAN:
-          if (!"true".equals(value) && !"false".equals(value)) {
+          if (!"true".equalsIgnoreCase(value) && 
!"false".equalsIgnoreCase(value)) {
             return null;
           }
-          return Boolean.valueOf(value);
+          return Boolean.parseBoolean(value);
         case INT32:
-          return Integer.valueOf(value);
+          return Integer.parseInt(value);
         case INT64:
-          return Long.valueOf(value);
+          return Long.parseLong(value);
         case FLOAT:
-          return Float.valueOf(value);
+          return Float.parseFloat(value);
         case DOUBLE:
-          return Double.valueOf(value);
+          return Double.parseDouble(value);
         default:
           return null;
       }
diff --git 
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
 
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
index 57c7d84f31..a492dd5505 100644
--- 
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
+++ 
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
@@ -70,7 +70,7 @@ public class ExportCsvTestIT extends AbstractScript {
     String[] params = {"-td", "target/", "-q", "select c1,c2,c3 from 
root.test.t1"};
     prepareData();
     testMethod(params, null);
-    CSVParser parser = readCsvFile("target/dump0.csv");
+    CSVParser parser = readCsvFile("target/dump0_0.csv");
     String[] realRecords = {
       "root.test.t1.c1,root.test.t1.c2,root.test.t1.c3", 
"1.0,\"\"abc\",aa\",\"abbe's\""
     };
@@ -90,7 +90,7 @@ public class ExportCsvTestIT extends AbstractScript {
     };
     prepareData();
     testMethod(params, null);
-    CSVParser parser = readCsvFile("target/dump0.csv");
+    CSVParser parser = readCsvFile("target/dump0_0.csv");
     String[] realRecords = {
       "root.test.t1.c1(FLOAT),root.test.t1.c2(TEXT),root.test.t1.c3(TEXT)",
       "1.0,\"\"abc\",aa\",\"abbe's\""
@@ -111,7 +111,7 @@ public class ExportCsvTestIT extends AbstractScript {
     };
     prepareData();
     testMethod(params, null);
-    CSVParser parser = readCsvFile("target/dump0.csv");
+    CSVParser parser = readCsvFile("target/dump0_0.csv");
     String[] realRecords = {
       "count(root.test.t1.c1),count(root.test.t1.c2),count(root.test.t1.c3)", 
"1,1,1"
     };
diff --git 
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
 
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
index c419909bb0..13fb2e0315 100644
--- 
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
+++ 
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
@@ -309,7 +309,7 @@ public class ImportCsvTestIT extends AbstractScript {
       file.delete();
     }
     // check the failed file
-    List<CSVRecord> records = readCsvFile(CSV_FILE + ".failed").getRecords();
+    List<CSVRecord> records = readCsvFile(CSV_FILE + ".failed_0").getRecords();
     String[] realRecords = {
       
"Time,root.fit.d1.s1(INT32),root.fit.d1.s2(TEXT),root.fit.d2.s1(INT32),root.fit.d2.s3(INT32),root.fit.p.s1(INT32)",
       "1,100,\"hello\",200,\"300\",400"
diff --git a/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md 
b/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md
index 1f4f9c6cc1..6f933e6c5a 100644
--- a/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md
+++ b/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md
@@ -29,10 +29,10 @@ The CSV tool can help you import data in CSV format to 
IoTDB or export data from
 
 ```shell
 # Unix/OS X
-> tools/export-csv.sh  -h <ip> -p <port> -u <username> -pw <password> -td 
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s 
<sql file>]
+> tools/export-csv.sh  -h <ip> -p <port> -u <username> -pw <password> -td 
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s 
<sql file> -linesPerFile <int>]
 
 # Windows
-> tools\export-csv.bat -h <ip> -p <port> -u <username> -pw <password> -td 
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s 
<sql file>]
+> tools\export-csv.bat -h <ip> -p <port> -u <username> -pw <password> -td 
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s 
<sql file> -linesPerFile <int>]
 ```
 
 Description:
@@ -50,6 +50,10 @@ Description:
 * `-tf <time-format>`:
   - specifying a time format that you want. The time format have to obey [ISO 
8601](https://calendars.wikia.org/wiki/ISO_8601) standard. If you want to save 
the time as the timestamp, then setting `-tf timestamp`
   - example: `-tf yyyy-MM-dd\ HH:mm:ss` or `-tf timestamp`
+* `-linesPerFile <int>`:
+  - Specifying lines of each dump file, `10000` is default.
+  - example: `-linesPerFile 1`
+
 
 More, if you don't use one of `-s` and `-q`, you need to enter some queries 
after running the export script. The results of the different query will be 
saved to different CSV files.
 
@@ -66,6 +70,8 @@ More, if you don't use one of `-s` and `-q`, you need to 
enter some queries afte
 > tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
 # Or
 > tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
 > yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
 
 # Windows
 > tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./
@@ -77,6 +83,8 @@ More, if you don't use one of `-s` and `-q`, you need to 
enter some queries afte
 > tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
 # Or
 > tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
 > yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
 ```
 
 ### Sample SQL file
@@ -174,9 +182,9 @@ Time,Device,str(TEXT),int(INT32)
 
 ```shell
 # Unix/OS X
-> tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+> tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>] 
[-typeInfer <boolean=text,float=double...>]
 # Windows
-> tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+> tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>] 
[-typeInfer <boolean=text,float=double...>]
 ```
 
 Description:
@@ -200,6 +208,19 @@ Description:
 * `-tp <time-precision>`:
   - specifying a time precision. Options includes `ms`(millisecond), 
`ns`(nanosecond), and `us`(microsecond), `ms` is default.
 
+* `-typeInfer 
<srcTsDataType1=dstTsDataType1,srcTsDataType2=dstTsDataType2,...>`:
+  - specifying rules of type inference. 
+  - Option `srcTsDataType` includes 
`boolean`,`int`,`long`,`float`,`double`,`NaN`.
+  - Option `dstTsDataType` includes 
`boolean`,`int`,`long`,`float`,`double`,`text`.
+  - When `srcTsDataType` is `boolean`, `dstTsDataType` should be between 
`boolean` and `text`.
+  - When `srcTsDataType` is `NaN`, `dstTsDataType` should be among `float`, 
`double` and `text`.
+  - When `srcTsDataType` is Numeric type, `dstTsDataType` precision should be 
greater than `srcTsDataType`.
+  - example: `-typeInfer boolean=text,float=double`
+  
+* `-linesPerFailedFile <int>`:
+  - Specifying lines of each failed file, `10000` is default.
+  - example: `-linesPerFailedFile 1`
+
 ### Example
 
 ```sh
@@ -207,12 +228,24 @@ Description:
 > tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
 > example-filename.csv -fd ./failed
 # or
 > tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
 > example-filename.csv -fd ./failed
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd ./failed -tp ns
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double 
-linesPerFailedFile 10
+
 # Windows
 > tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
 > example-filename.csv
 # or
 > tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
 > example-filename.csv -fd .\failed
 # or
 > tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
 > example-filename.csv -fd .\failed -tp ns
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double 
-linesPerFailedFile 10
+
 ```
 
 ### Note
diff --git a/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md 
b/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md
index 10028269f2..a023c3896a 100644
--- a/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md
+++ b/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md
@@ -50,6 +50,9 @@ CSV 工具可帮您将 CSV 格式的数据导入到 IoTDB 或者将数据从 IoT
 * `-tf <time-format>`:
   - 指定一个你想要得到的时间格式。时间格式必须遵守[ISO 
8601](https://calendars.wikia.org/wiki/ISO_8601)标准。如果说你想要以时间戳来保存时间,那就设置为`-tf 
timestamp`。
   - 例如: `-tf yyyy-MM-dd\ HH:mm:ss` or `-tf timestamp`
+* `-linesPerFile <int>`:
+  - 指定导出的dump文件最大行数,默认值为`10000`。
+  - 例如: `-linesPerFile 1`
 
 除此之外,如果你没有使用`-s`和`-q`参数,在导出脚本被启动之后你需要按照程序提示输入查询语句,不同的查询结果会被保存到不同的CSV文件中。
 
@@ -66,6 +69,8 @@ CSV 工具可帮您将 CSV 格式的数据导入到 IoTDB 或者将数据从 IoT
 > tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
 # Or
 > tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
 > yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
 
 # Windows
 > tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./
@@ -77,6 +82,8 @@ CSV 工具可帮您将 CSV 格式的数据导入到 IoTDB 或者将数据从 IoT
 > tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
 # Or
 > tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
 > yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf 
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
 ```
 
 ### SQL 文件示例
@@ -175,9 +182,9 @@ Time,Device,str(TEXT),int(INT32)
 
 ```shell
 # Unix/OS X
->tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+>tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>] 
[-typeInfer <boolean=text,float=double...>] [-linesPerFailedFile <int_value>]
 # Windows
->tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+>tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f 
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>] 
[-typeInfer <boolean=text,float=double...>] [-linesPerFailedFile <int_value>]
 ```
 
 参数:
@@ -201,6 +208,19 @@ Time,Device,str(TEXT),int(INT32)
 * `-tp`:
   - 用于指定时间精度,可选值包括`ms`(毫秒),`ns`(纳秒),`us`(微秒),默认值为`ms`。
 
+* `-typeInfer 
<srcTsDataType1=dstTsDataType1,srcTsDataType2=dstTsDataType2,...>`:
+  - 用于指定类型推断规则.
+  - `srcTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`NaN`.
+  - `dstTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`text`.
+  - 当`srcTsDataType`为`boolean`, `dstTsDataType`只能为`boolean`或`text`.
+  - 当`srcTsDataType`为`NaN`, `dstTsDataType`只能为`float`, `double`或`text`.
+  - 当`srcTsDataType`为数值类型, `dstTsDataType`的精度需要高于`srcTsDataType`.
+  - 例如:`-typeInfer boolean=text,float=double`
+
+* `-linesPerFailedFile <int>`:
+  - 用于指定每个导入失败文件写入数据的行数,默认值为10000。
+  - 例如:`-linesPerFailedFile 1`
+
 ### 运行示例
 
 ```sh
@@ -208,12 +228,22 @@ Time,Device,str(TEXT),int(INT32)
 >tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
 >example-filename.csv -fd ./failed
 # or
 >tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
 >example-filename.csv -fd ./failed
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd ./failed -tp ns
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double 
-linesPerFailedFile 10
 # Windows
 >tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
 >example-filename.csv
 # or
 >tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
 >example-filename.csv -fd .\failed
 # or
 > tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
 > example-filename.csv -fd .\failed -tp ns
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f 
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double 
-linesPerFailedFile 10
 ```
 
 ### 注意

Reply via email to