This is an automated email from the ASF dual-hosted git repository.
wangchao316 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new 93444b53bc [IOTDB-4244] Optimize csv tool, add Options '-typeInfer' ,
'-linesPer… (#7145)
93444b53bc is described below
commit 93444b53bc939548254a757b0127b97139c6b4c1
Author: cmlmakahts <[email protected]>
AuthorDate: Wed Aug 31 09:27:41 2022 +0800
[IOTDB-4244] Optimize csv tool, add Options '-typeInfer' , '-linesPer…
(#7145)
[IOTDB-4244] Optimize csv tool, add Options '-typeInfer' , '-linesPer…
(#7145)
---
.../org/apache/iotdb/tool/AbstractCsvTool.java | 72 ++++++--
.../main/java/org/apache/iotdb/tool/ExportCsv.java | 124 +++++++------
.../main/java/org/apache/iotdb/tool/ImportCsv.java | 191 +++++++++++++++++----
.../tests/tools/importCsv/ExportCsvTestIT.java | 6 +-
.../tests/tools/importCsv/ImportCsvTestIT.java | 2 +-
docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md | 41 ++++-
.../zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md | 34 +++-
7 files changed, 356 insertions(+), 114 deletions(-)
diff --git a/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java
b/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java
index 9ef2d6bea6..0d3ef345ef 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/AbstractCsvTool.java
@@ -214,26 +214,74 @@ public abstract class AbstractCsvTool {
public static Boolean writeCsvFile(
List<String> headerNames, List<List<Object>> records, String filePath) {
try {
- CSVPrinter printer =
- CSVFormat.Builder.create(CSVFormat.DEFAULT)
- .setHeader()
- .setSkipHeaderRecord(true)
- .setEscape('\\')
- .setQuoteMode(QuoteMode.NONE)
- .build()
- .print(new PrintWriter(filePath));
+ final CSVPrinterWrapper csvPrinterWrapper = new
CSVPrinterWrapper(filePath);
if (headerNames != null) {
- printer.printRecord(headerNames);
+ csvPrinterWrapper.printRecord(headerNames);
}
for (List record : records) {
- printer.printRecord(record);
+ csvPrinterWrapper.printRecord(record);
}
- printer.flush();
- printer.close();
+ csvPrinterWrapper.flush();
+ csvPrinterWrapper.close();
return true;
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
+
+ static class CSVPrinterWrapper {
+ private final String filePath;
+ private final CSVFormat csvFormat;
+ private CSVPrinter csvPrinter;
+
+ public CSVPrinterWrapper(String filePath) {
+ this.filePath = filePath;
+ this.csvFormat =
+ CSVFormat.Builder.create(CSVFormat.DEFAULT)
+ .setHeader()
+ .setSkipHeaderRecord(true)
+ .setEscape('\\')
+ .setQuoteMode(QuoteMode.NONE)
+ .build();
+ }
+
+ public void printRecord(final Iterable<?> values) throws IOException {
+ if (csvPrinter == null) {
+ csvPrinter = csvFormat.print(new PrintWriter(filePath));
+ }
+ csvPrinter.printRecord(values);
+ }
+
+ public void print(Object value) {
+ if (csvPrinter == null) {
+ try {
+ csvPrinter = csvFormat.print(new PrintWriter(filePath));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ try {
+ csvPrinter.print(value);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public void println() throws IOException {
+ csvPrinter.println();
+ }
+
+ public void close() throws IOException {
+ if (csvPrinter != null) {
+ csvPrinter.close();
+ }
+ }
+
+ public void flush() throws IOException {
+ if (csvPrinter != null) {
+ csvPrinter.flush();
+ }
+ }
+ }
}
diff --git a/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java
b/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java
index fc8b6e2107..3a18261cb1 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/ExportCsv.java
@@ -36,9 +36,6 @@ import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVPrinter;
-import org.apache.commons.csv.QuoteMode;
import org.apache.thrift.TException;
import org.jline.reader.LineReader;
@@ -46,7 +43,6 @@ import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
-import java.io.PrintWriter;
import java.time.Instant;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
@@ -75,6 +71,9 @@ public class ExportCsv extends AbstractCsvTool {
private static final String QUERY_COMMAND_ARGS = "q";
private static final String QUERY_COMMAND_NAME = "queryCommand";
+ private static final String LINES_PER_FILE_ARGS = "linesPerFile";
+ private static final String LINES_PER_FILE_ARGS_NAME = "Lines Per File";
+
private static final String TSFILEDB_CLI_PREFIX = "ExportCsv";
private static final String DUMP_FILE_NAME_DEFAULT = "dump";
@@ -88,6 +87,8 @@ public class ExportCsv extends AbstractCsvTool {
private static String timestampPrecision;
+ private static int linesPerFile = 10000;
+
private static final int EXPORT_PER_LINE_COUNT = 10000;
/** main function of export csv tool. */
@@ -194,6 +195,9 @@ public class ExportCsv extends AbstractCsvTool {
if (!targetDirectory.endsWith("/") && !targetDirectory.endsWith("\\")) {
targetDirectory += File.separator;
}
+ if (commandLine.getOptionValue(LINES_PER_FILE_ARGS) != null) {
+ linesPerFile =
Integer.parseInt(commandLine.getOptionValue(LINES_PER_FILE_ARGS));
+ }
}
/**
@@ -267,6 +271,14 @@ public class ExportCsv extends AbstractCsvTool {
.build();
options.addOption(opQuery);
+ Option opLinesPerFile =
+ Option.builder(LINES_PER_FILE_ARGS)
+ .argName(LINES_PER_FILE_ARGS_NAME)
+ .hasArg()
+ .desc("Lines per dump file.")
+ .build();
+ options.addOption(opLinesPerFile);
+
Option opHelp =
Option.builder(HELP_ARGS)
.longOpt(HELP_ARGS)
@@ -302,10 +314,24 @@ public class ExportCsv extends AbstractCsvTool {
* @param index used to create dump file name
*/
private static void dumpResult(String sql, int index) {
- final String path = targetDirectory + targetFile + index + ".csv";
+ final String path = targetDirectory + targetFile + index;
try {
SessionDataSet sessionDataSet = session.executeQueryStatement(sql);
- writeCsvFile(sessionDataSet, path);
+ List<Object> headers = new ArrayList<>();
+ List<String> names = sessionDataSet.getColumnNames();
+ List<String> types = sessionDataSet.getColumnTypes();
+ if (needDataTypePrinted) {
+ for (int i = 0; i < names.size(); i++) {
+ if (!"Time".equals(names.get(i)) && !"Device".equals(names.get(i))) {
+ headers.add(String.format("%s(%s)", names.get(i), types.get(i)));
+ } else {
+ headers.add(names.get(i));
+ }
+ }
+ } else {
+ headers.addAll(names);
+ }
+ writeCsvFile(sessionDataSet, path, headers, linesPerFile);
sessionDataSet.closeOperationHandle();
System.out.println("Export completely!");
} catch (StatementExecutionException | IoTDBConnectionException |
IOException e) {
@@ -328,60 +354,46 @@ public class ExportCsv extends AbstractCsvTool {
}
}
- public static Boolean writeCsvFile(SessionDataSet sessionDataSet, String
filePath)
+ public static void writeCsvFile(
+ SessionDataSet sessionDataSet, String filePath, List<Object> headers,
int linesPerFile)
throws IOException, IoTDBConnectionException,
StatementExecutionException {
- CSVPrinter printer =
- CSVFormat.Builder.create(CSVFormat.DEFAULT)
- .setHeader()
- .setSkipHeaderRecord(true)
- .setEscape('\\')
- .setQuoteMode(QuoteMode.NONE)
- .build()
- .print(new PrintWriter(filePath));
-
- List<Object> headers = new ArrayList<>();
- List<String> names = sessionDataSet.getColumnNames();
- List<String> types = sessionDataSet.getColumnTypes();
-
- if (needDataTypePrinted) {
- for (int i = 0; i < names.size(); i++) {
- if (!"Time".equals(names.get(i)) && !"Device".equals(names.get(i))) {
- headers.add(String.format("%s(%s)", names.get(i), types.get(i)));
+ int fileIndex = 0;
+ boolean hasNext = true;
+ while (hasNext) {
+ int i = 0;
+ final String finalFilePath = filePath + "_" + fileIndex + ".csv";
+ final CSVPrinterWrapper csvPrinterWrapper = new
CSVPrinterWrapper(finalFilePath);
+ csvPrinterWrapper.printRecord(headers);
+ while (i++ < linesPerFile) {
+ if (sessionDataSet.hasNext()) {
+ RowRecord rowRecord = sessionDataSet.next();
+ if (rowRecord.getTimestamp() != 0) {
+ csvPrinterWrapper.print(timeTrans(rowRecord.getTimestamp()));
+ }
+ rowRecord
+ .getFields()
+ .forEach(
+ field -> {
+ String fieldStringValue = field.getStringValue();
+ if (!"null".equals(field.getStringValue())) {
+ if (field.getDataType() == TSDataType.TEXT
+ && !fieldStringValue.startsWith("root.")) {
+ fieldStringValue = "\"" + fieldStringValue + "\"";
+ }
+ csvPrinterWrapper.print(fieldStringValue);
+ } else {
+ csvPrinterWrapper.print("");
+ }
+ });
+ csvPrinterWrapper.println();
} else {
- headers.add(names.get(i));
+ hasNext = false;
+ break;
}
}
- } else {
- headers.addAll(names);
+ fileIndex++;
+ csvPrinterWrapper.flush();
+ csvPrinterWrapper.close();
}
- printer.printRecord(headers);
-
- while (sessionDataSet.hasNext()) {
- RowRecord rowRecord = sessionDataSet.next();
- ArrayList<String> record = new ArrayList<>();
- if (rowRecord.getTimestamp() != 0) {
- record.add(timeTrans(rowRecord.getTimestamp()));
- }
- rowRecord
- .getFields()
- .forEach(
- field -> {
- String fieldStringValue = field.getStringValue();
- if (!"null".equals(field.getStringValue())) {
- if (field.getDataType() == TSDataType.TEXT
- && !fieldStringValue.startsWith("root.")) {
- fieldStringValue = "\"" + fieldStringValue + "\"";
- }
- record.add(fieldStringValue);
- } else {
- record.add("");
- }
- });
- printer.printRecord(record);
- }
-
- printer.flush();
- printer.close();
- return true;
}
}
diff --git a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
index 89dfa4f176..b7535a5838 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
@@ -19,12 +19,14 @@
package org.apache.iotdb.tool;
+import org.apache.iotdb.db.qp.constant.SQLConstant;
import org.apache.iotdb.db.qp.utils.DatetimeUtils;
import org.apache.iotdb.exception.ArgsErrorException;
import org.apache.iotdb.rpc.IoTDBConnectionException;
import org.apache.iotdb.rpc.StatementExecutionException;
import org.apache.iotdb.session.Session;
import org.apache.iotdb.session.SessionDataSet;
+import org.apache.iotdb.tsfile.common.constant.TsFileConstant;
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.commons.cli.CommandLine;
@@ -43,12 +45,12 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
-import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@@ -85,10 +87,17 @@ public class ImportCsv extends AbstractCsvTool {
private static final String TIMESTAMP_PRECISION_ARGS = "tp";
private static final String TIMESTAMP_PRECISION_NAME = "timestamp precision
(ms/us/ns)";
+ private static final String TYPE_INFER_ARGS = "typeInfer";
+ private static final String TYPE_INFER_ARGS_NAME = "type infer";
+
+ private static final String LINES_PER_FAILED_FILE_ARGS =
"linesPerFailedFile";
+ private static final String LINES_PER_FAILED_FILE_ARGS_NAME = "Lines Per
FailedFile";
+
private static final String TSFILEDB_CLI_PREFIX = "ImportCsv";
private static String targetPath;
private static String failedFileDirectory = null;
+ private static int linesPerFailedFile = 10000;
private static Boolean aligned = false;
private static String timeColumn = "Time";
@@ -98,6 +107,28 @@ public class ImportCsv extends AbstractCsvTool {
private static String timestampPrecision = "ms";
+ private static final Map<String, TSDataType> TYPE_INFER_KEY_DICT = new
HashMap<>();
+
+ static {
+ TYPE_INFER_KEY_DICT.put("boolean", TSDataType.BOOLEAN);
+ TYPE_INFER_KEY_DICT.put("int", TSDataType.FLOAT);
+ TYPE_INFER_KEY_DICT.put("long", TSDataType.DOUBLE);
+ TYPE_INFER_KEY_DICT.put("float", TSDataType.FLOAT);
+ TYPE_INFER_KEY_DICT.put("double", TSDataType.DOUBLE);
+ TYPE_INFER_KEY_DICT.put("NaN", TSDataType.DOUBLE);
+ }
+
+ private static final Map<String, TSDataType> TYPE_INFER_VALUE_DICT = new
HashMap<>();
+
+ static {
+ TYPE_INFER_VALUE_DICT.put("boolean", TSDataType.BOOLEAN);
+ TYPE_INFER_VALUE_DICT.put("int", TSDataType.INT32);
+ TYPE_INFER_VALUE_DICT.put("long", TSDataType.INT64);
+ TYPE_INFER_VALUE_DICT.put("float", TSDataType.FLOAT);
+ TYPE_INFER_VALUE_DICT.put("double", TSDataType.DOUBLE);
+ TYPE_INFER_VALUE_DICT.put("text", TSDataType.TEXT);
+ }
+
/**
* create the commandline options.
*
@@ -160,12 +191,31 @@ public class ImportCsv extends AbstractCsvTool {
Option opTimestampPrecision =
Option.builder(TIMESTAMP_PRECISION_ARGS)
- .argName(TIMESTAMP_PRECISION_ARGS)
+ .argName(TIMESTAMP_PRECISION_NAME)
.hasArg()
.desc("Timestamp precision (ms/us/ns)")
.build();
+
options.addOption(opTimestampPrecision);
+ Option opTypeInfer =
+ Option.builder(TYPE_INFER_ARGS)
+ .argName(TYPE_INFER_ARGS_NAME)
+ .numberOfArgs(5)
+ .hasArgs()
+ .valueSeparator(',')
+ .desc("Define type info by option:\"boolean=text,int=long, ...")
+ .build();
+ options.addOption(opTypeInfer);
+
+ Option opFailedLinesPerFile =
+ Option.builder(LINES_PER_FAILED_FILE_ARGS)
+ .argName(LINES_PER_FAILED_FILE_ARGS_NAME)
+ .hasArgs()
+ .desc("Lines per failedfile")
+ .build();
+ options.addOption(opFailedLinesPerFile);
+
return options;
}
@@ -174,7 +224,7 @@ public class ImportCsv extends AbstractCsvTool {
*
* @param commandLine
*/
- private static void parseSpecialParams(CommandLine commandLine) {
+ private static void parseSpecialParams(CommandLine commandLine) throws
ArgsErrorException {
timeZoneID = commandLine.getOptionValue(TIME_ZONE_ARGS);
targetPath = commandLine.getOptionValue(FILE_ARGS);
if (commandLine.getOptionValue(BATCH_POINT_SIZE_ARGS) != null) {
@@ -195,6 +245,42 @@ public class ImportCsv extends AbstractCsvTool {
if (commandLine.getOptionValue(TIMESTAMP_PRECISION_ARGS) != null) {
timestampPrecision =
commandLine.getOptionValue(TIMESTAMP_PRECISION_ARGS);
}
+ final String[] opTypeInferValues =
commandLine.getOptionValues(TYPE_INFER_ARGS);
+ if (opTypeInferValues != null && opTypeInferValues.length > 0) {
+ for (String opTypeInferValue : opTypeInferValues) {
+ if (opTypeInferValue.contains("=")) {
+ final String[] typeInfoExpressionArr = opTypeInferValue.split("=");
+ final String key = typeInfoExpressionArr[0];
+ final String value = typeInfoExpressionArr[1];
+ applyTypeInferArgs(key, value);
+ }
+ }
+ }
+ if (commandLine.getOptionValue(LINES_PER_FAILED_FILE_ARGS) != null) {
+ linesPerFailedFile =
Integer.parseInt(commandLine.getOptionValue(LINES_PER_FAILED_FILE_ARGS));
+ }
+ }
+
+ private static void applyTypeInferArgs(String key, String value) throws
ArgsErrorException {
+ if (!TYPE_INFER_KEY_DICT.containsKey(key)) {
+ throw new ArgsErrorException("Unknown type infer key: " + key);
+ }
+ if (!TYPE_INFER_VALUE_DICT.containsKey(value)) {
+ throw new ArgsErrorException("Unknown type infer value: " + value);
+ }
+ if (key.equals("NaN")
+ && !(value.equals("float") || value.equals("double") ||
value.equals("text"))) {
+ throw new ArgsErrorException("NaN can not convert to " + value);
+ }
+ if (key.equals("boolean") && !(value.equals("boolean") ||
value.equals("text"))) {
+ throw new ArgsErrorException("Boolean can not convert to " + value);
+ }
+ final TSDataType srcType = TYPE_INFER_VALUE_DICT.get(key);
+ final TSDataType dstType = TYPE_INFER_VALUE_DICT.get(value);
+ if (dstType.getType() < srcType.getType()) {
+ throw new ArgsErrorException(key + " can not convert to " + value);
+ }
+ TYPE_INFER_KEY_DICT.put(key, TYPE_INFER_VALUE_DICT.get(value));
}
public static void main(String[] args) throws IoTDBConnectionException {
@@ -435,7 +521,7 @@ public class ImportCsv extends AbstractCsvTool {
}
if (!failedRecords.isEmpty()) {
- writeCsvFile(headerNames, failedRecords, failedFilePath);
+ writeFailedLinesFile(headerNames, failedFilePath, failedRecords);
}
if (hasStarted.get()) {
System.out.println("Import completely!");
@@ -457,7 +543,6 @@ public class ImportCsv extends AbstractCsvTool {
HashMap<String, String> headerNameMap = new HashMap<>();
parseHeaders(headerNames, null, headerTypeMap, headerNameMap);
- AtomicReference<SimpleDateFormat> timeFormatter = new
AtomicReference<>(null);
AtomicReference<String> deviceName = new AtomicReference<>(null);
HashSet<String> typeQueriedDevice = new HashSet<>();
@@ -477,7 +562,6 @@ public class ImportCsv extends AbstractCsvTool {
// only run in first record
if (deviceName.get() == null) {
deviceName.set(record.get(1));
- // timeFormatter.set(formatterInit(record.get(0)));
} else if (!Objects.equals(deviceName.get(), record.get(1))) {
// if device changed
writeAndEmptyDataSet(
@@ -549,11 +633,7 @@ public class ImportCsv extends AbstractCsvTool {
failedRecords.add(record.stream().collect(Collectors.toList()));
}
if (!measurements.isEmpty()) {
- if (timeFormatter.get() == null) {
- times.add(Long.valueOf(record.get(timeColumn)));
- } else {
- times.add(parseTimestamp(record.get(timeColumn)));
- }
+ times.add(parseTimestamp(record.get(timeColumn)));
typesList.add(types);
valuesList.add(values);
measurementsList.add(measurements);
@@ -564,11 +644,28 @@ public class ImportCsv extends AbstractCsvTool {
pointSize.set(0);
}
if (!failedRecords.isEmpty()) {
- writeCsvFile(headerNames, failedRecords, failedFilePath);
+ writeFailedLinesFile(headerNames, failedFilePath, failedRecords);
}
System.out.println("Import completely!");
}
+ private static void writeFailedLinesFile(
+ List<String> headerNames, String failedFilePath, ArrayList<List<Object>>
failedRecords) {
+ int fileIndex = 0;
+ int from = 0;
+ int failedRecordsSize = failedRecords.size();
+ int restFailedRecords = failedRecordsSize;
+ while (from < failedRecordsSize) {
+ int step = Math.min(restFailedRecords, linesPerFailedFile);
+ writeCsvFile(
+ headerNames,
+ failedRecords.subList(from, from + step),
+ failedFilePath + "_" + fileIndex++);
+ from += step;
+ restFailedRecords -= step;
+ }
+ }
+
private static void writeAndEmptyDataSet(
String device,
List<Long> times,
@@ -767,33 +864,55 @@ public class ImportCsv extends AbstractCsvTool {
* if data type of timeseries is not defined in headers of schema, this
method will be called to
* do type inference
*
- * @param value
+ * @param strValue
* @return
*/
- private static TSDataType typeInfer(String value) {
- if (value.contains("\"")) {
+ private static TSDataType typeInfer(String strValue) {
+ if (strValue.contains("\"")) {
return TEXT;
- } else if (value.equals("true") || value.equals("false")) {
- return BOOLEAN;
- } else if (value.equals("NaN")) {
- return DOUBLE;
- } else if (!value.contains(".")) {
- try {
- Integer.valueOf(value);
- return INT32;
- } catch (Exception e) {
- try {
- Long.valueOf(value);
- return INT64;
- } catch (Exception exception) {
- return null;
+ }
+ if (isBoolean(strValue)) {
+ return TYPE_INFER_KEY_DICT.get("boolean");
+ } else if (isNumber(strValue)) {
+ if (!strValue.contains(TsFileConstant.PATH_SEPARATOR)) {
+ if (isConvertFloatPrecisionLack(StringUtils.trim(strValue))) {
+ return TYPE_INFER_KEY_DICT.get("long");
}
+ return TYPE_INFER_KEY_DICT.get("int");
+ } else {
+ return TYPE_INFER_KEY_DICT.get("float");
}
+ } else if ("null".equals(strValue) || "NULL".equals(strValue)) {
+ return null;
+ // "NaN" is returned if the NaN Literal is given in Parser
+ } else if ("NaN".equals(strValue)) {
+ return TYPE_INFER_KEY_DICT.get("NaN");
} else {
- return DOUBLE;
+ return TSDataType.TEXT;
}
}
+ static boolean isNumber(String s) {
+ if (s == null || s.equals("NaN")) {
+ return false;
+ }
+ try {
+ Double.parseDouble(s);
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean isBoolean(String s) {
+ return s.equalsIgnoreCase(SQLConstant.BOOLEAN_TRUE)
+ || s.equalsIgnoreCase(SQLConstant.BOOLEAN_FALSE);
+ }
+
+ private static boolean isConvertFloatPrecisionLack(String s) {
+ return Long.parseLong(s) > (2 << 24);
+ }
+
/**
* @param value
* @param type
@@ -808,18 +927,18 @@ public class ImportCsv extends AbstractCsvTool {
}
return value;
case BOOLEAN:
- if (!"true".equals(value) && !"false".equals(value)) {
+ if (!"true".equalsIgnoreCase(value) &&
!"false".equalsIgnoreCase(value)) {
return null;
}
- return Boolean.valueOf(value);
+ return Boolean.parseBoolean(value);
case INT32:
- return Integer.valueOf(value);
+ return Integer.parseInt(value);
case INT64:
- return Long.valueOf(value);
+ return Long.parseLong(value);
case FLOAT:
- return Float.valueOf(value);
+ return Float.parseFloat(value);
case DOUBLE:
- return Double.valueOf(value);
+ return Double.parseDouble(value);
default:
return null;
}
diff --git
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
index 57c7d84f31..a492dd5505 100644
---
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
+++
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ExportCsvTestIT.java
@@ -70,7 +70,7 @@ public class ExportCsvTestIT extends AbstractScript {
String[] params = {"-td", "target/", "-q", "select c1,c2,c3 from
root.test.t1"};
prepareData();
testMethod(params, null);
- CSVParser parser = readCsvFile("target/dump0.csv");
+ CSVParser parser = readCsvFile("target/dump0_0.csv");
String[] realRecords = {
"root.test.t1.c1,root.test.t1.c2,root.test.t1.c3",
"1.0,\"\"abc\",aa\",\"abbe's\""
};
@@ -90,7 +90,7 @@ public class ExportCsvTestIT extends AbstractScript {
};
prepareData();
testMethod(params, null);
- CSVParser parser = readCsvFile("target/dump0.csv");
+ CSVParser parser = readCsvFile("target/dump0_0.csv");
String[] realRecords = {
"root.test.t1.c1(FLOAT),root.test.t1.c2(TEXT),root.test.t1.c3(TEXT)",
"1.0,\"\"abc\",aa\",\"abbe's\""
@@ -111,7 +111,7 @@ public class ExportCsvTestIT extends AbstractScript {
};
prepareData();
testMethod(params, null);
- CSVParser parser = readCsvFile("target/dump0.csv");
+ CSVParser parser = readCsvFile("target/dump0_0.csv");
String[] realRecords = {
"count(root.test.t1.c1),count(root.test.t1.c2),count(root.test.t1.c3)",
"1,1,1"
};
diff --git
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
index c419909bb0..13fb2e0315 100644
---
a/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
+++
b/cross-tests/src/test/java/org/apache/iotdb/cross/tests/tools/importCsv/ImportCsvTestIT.java
@@ -309,7 +309,7 @@ public class ImportCsvTestIT extends AbstractScript {
file.delete();
}
// check the failed file
- List<CSVRecord> records = readCsvFile(CSV_FILE + ".failed").getRecords();
+ List<CSVRecord> records = readCsvFile(CSV_FILE + ".failed_0").getRecords();
String[] realRecords = {
"Time,root.fit.d1.s1(INT32),root.fit.d1.s2(TEXT),root.fit.d2.s1(INT32),root.fit.d2.s3(INT32),root.fit.p.s1(INT32)",
"1,100,\"hello\",200,\"300\",400"
diff --git a/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md
b/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md
index 1f4f9c6cc1..6f933e6c5a 100644
--- a/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md
+++ b/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md
@@ -29,10 +29,10 @@ The CSV tool can help you import data in CSV format to
IoTDB or export data from
```shell
# Unix/OS X
-> tools/export-csv.sh -h <ip> -p <port> -u <username> -pw <password> -td
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s
<sql file>]
+> tools/export-csv.sh -h <ip> -p <port> -u <username> -pw <password> -td
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s
<sql file> -linesPerFile <int>]
# Windows
-> tools\export-csv.bat -h <ip> -p <port> -u <username> -pw <password> -td
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s
<sql file>]
+> tools\export-csv.bat -h <ip> -p <port> -u <username> -pw <password> -td
<directory> [-tf <time-format> -datatype <true/false> -q <query command> -s
<sql file> -linesPerFile <int>]
```
Description:
@@ -50,6 +50,10 @@ Description:
* `-tf <time-format>`:
- specifying a time format that you want. The time format have to obey [ISO
8601](https://calendars.wikia.org/wiki/ISO_8601) standard. If you want to save
the time as the timestamp, then setting `-tf timestamp`
- example: `-tf yyyy-MM-dd\ HH:mm:ss` or `-tf timestamp`
+* `-linesPerFile <int>`:
+ - Specifying lines of each dump file, `10000` is default.
+ - example: `-linesPerFile 1`
+
More, if you don't use one of `-s` and `-q`, you need to enter some queries
after running the export script. The results of the different query will be
saved to different CSV files.
@@ -66,6 +70,8 @@ More, if you don't use one of `-s` and `-q`, you need to
enter some queries afte
> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
# Or
> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
> yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
# Windows
> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./
@@ -77,6 +83,8 @@ More, if you don't use one of `-s` and `-q`, you need to
enter some queries afte
> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
# Or
> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
> yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
```
### Sample SQL file
@@ -174,9 +182,9 @@ Time,Device,str(TEXT),int(INT32)
```shell
# Unix/OS X
-> tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+> tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
[-typeInfer <boolean=text,float=double...>]
# Windows
-> tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+> tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
[-typeInfer <boolean=text,float=double...>]
```
Description:
@@ -200,6 +208,19 @@ Description:
* `-tp <time-precision>`:
- specifying a time precision. Options includes `ms`(millisecond),
`ns`(nanosecond), and `us`(microsecond), `ms` is default.
+* `-typeInfer
<srcTsDataType1=dstTsDataType1,srcTsDataType2=dstTsDataType2,...>`:
+ - specifying rules of type inference.
+ - Option `srcTsDataType` includes
`boolean`,`int`,`long`,`float`,`double`,`NaN`.
+ - Option `dstTsDataType` includes
`boolean`,`int`,`long`,`float`,`double`,`text`.
+ - When `srcTsDataType` is `boolean`, `dstTsDataType` should be between
`boolean` and `text`.
+ - When `srcTsDataType` is `NaN`, `dstTsDataType` should be among `float`,
`double` and `text`.
+ - When `srcTsDataType` is Numeric type, `dstTsDataType` precision should be
greater than `srcTsDataType`.
+ - example: `-typeInfer boolean=text,float=double`
+
+* `-linesPerFailedFile <int>`:
+ - Specifying lines of each failed file, `10000` is default.
+ - example: `-linesPerFailedFile 1`
+
### Example
```sh
@@ -207,12 +228,24 @@ Description:
> tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
> example-filename.csv -fd ./failed
# or
> tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
> example-filename.csv -fd ./failed
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd ./failed -tp ns
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double
-linesPerFailedFile 10
+
# Windows
> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
> example-filename.csv
# or
> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
> example-filename.csv -fd .\failed
# or
> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
> example-filename.csv -fd .\failed -tp ns
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double
-linesPerFailedFile 10
+
```
### Note
diff --git a/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md
b/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md
index 10028269f2..a023c3896a 100644
--- a/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md
+++ b/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md
@@ -50,6 +50,9 @@ CSV 工具可帮您将 CSV 格式的数据导入到 IoTDB 或者将数据从 IoT
* `-tf <time-format>`:
- 指定一个你想要得到的时间格式。时间格式必须遵守[ISO
8601](https://calendars.wikia.org/wiki/ISO_8601)标准。如果说你想要以时间戳来保存时间,那就设置为`-tf
timestamp`。
- 例如: `-tf yyyy-MM-dd\ HH:mm:ss` or `-tf timestamp`
+* `-linesPerFile <int>`:
+ - 指定导出的dump文件最大行数,默认值为`10000`。
+ - 例如: `-linesPerFile 1`
除此之外,如果你没有使用`-s`和`-q`参数,在导出脚本被启动之后你需要按照程序提示输入查询语句,不同的查询结果会被保存到不同的CSV文件中。
@@ -66,6 +69,8 @@ CSV 工具可帮您将 CSV 格式的数据导入到 IoTDB 或者将数据从 IoT
> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
# Or
> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
> yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
# Windows
> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./
@@ -77,6 +82,8 @@ CSV 工具可帮您将 CSV 格式的数据导入到 IoTDB 或者将数据从 IoT
> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -s sql.txt
# Or
> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
> yyyy-MM-dd\ HH:mm:ss -s sql.txt
+# Or
+> tools/export-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -td ./ -tf
yyyy-MM-dd\ HH:mm:ss -s sql.txt -linesPerFile 10
```
### SQL 文件示例
@@ -175,9 +182,9 @@ Time,Device,str(TEXT),int(INT32)
```shell
# Unix/OS X
->tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+>tools/import-csv.sh -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
[-typeInfer <boolean=text,float=double...>] [-linesPerFailedFile <int_value>]
# Windows
->tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
+>tools\import-csv.bat -h <ip> -p <port> -u <username> -pw <password> -f
<xxx.csv> [-fd <./failedDirectory>] [-aligned <true>] [-tp <ms/ns/us>]
[-typeInfer <boolean=text,float=double...>] [-linesPerFailedFile <int_value>]
```
参数:
@@ -201,6 +208,19 @@ Time,Device,str(TEXT),int(INT32)
* `-tp`:
- 用于指定时间精度,可选值包括`ms`(毫秒),`ns`(纳秒),`us`(微秒),默认值为`ms`。
+* `-typeInfer
<srcTsDataType1=dstTsDataType1,srcTsDataType2=dstTsDataType2,...>`:
+ - 用于指定类型推断规则.
+ - `srcTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`NaN`.
+ - `dstTsDataType` 包括 `boolean`,`int`,`long`,`float`,`double`,`text`.
+ - 当`srcTsDataType`为`boolean`, `dstTsDataType`只能为`boolean`或`text`.
+ - 当`srcTsDataType`为`NaN`, `dstTsDataType`只能为`float`, `double`或`text`.
+ - 当`srcTsDataType`为数值类型, `dstTsDataType`的精度需要高于`srcTsDataType`.
+ - 例如:`-typeInfer boolean=text,float=double`
+
+* `-linesPerFailedFile <int>`:
+ - 用于指定每个导入失败文件写入数据的行数,默认值为10000。
+ - 例如:`-linesPerFailedFile 1`
+
### 运行示例
```sh
@@ -208,12 +228,22 @@ Time,Device,str(TEXT),int(INT32)
>tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
>example-filename.csv -fd ./failed
# or
>tools/import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
>example-filename.csv -fd ./failed
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd ./failed -tp ns
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.sh -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd ./failed -tp ns -typeInfer boolean=text,float=double
-linesPerFailedFile 10
# Windows
>tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
>example-filename.csv
# or
>tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
>example-filename.csv -fd .\failed
# or
> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
> example-filename.csv -fd .\failed -tp ns
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double
+# or
+> tools\import-csv.bat -h 127.0.0.1 -p 6667 -u root -pw root -f
example-filename.csv -fd .\failed -tp ns -typeInfer boolean=text,float=double
-linesPerFailedFile 10
```
### 注意