This is an automated email from the ASF dual-hosted git repository.

qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 3463fa6  [IOTDB-708] add config for inferring data type from string 
value  (#1264)
3463fa6 is described below

commit 3463fa624c418b1405c6297ae76719869051ebca
Author: Jialin Qiao <[email protected]>
AuthorDate: Tue May 26 15:35:50 2020 +0800

    [IOTDB-708] add config for inferring data type from string value  (#1264)
    
    * add infer type config
---
 .../resources/conf/iotdb-engine.properties         | 12 ++++++
 .../java/org/apache/iotdb/db/conf/IoTDBConfig.java | 43 ++++++++++++++++++++++
 .../org/apache/iotdb/db/conf/IoTDBDescriptor.java  |  7 ++++
 .../apache/iotdb/db/utils/TypeInferenceUtils.java  | 41 +++++++++++++++++----
 .../iotdb/db/integration/IoTDBRestartIT.java       | 18 ++++-----
 .../iotdb/db/utils/TypeInferenceUtilsTest.java     | 13 +++++--
 6 files changed, 113 insertions(+), 21 deletions(-)

diff --git a/server/src/assembly/resources/conf/iotdb-engine.properties 
b/server/src/assembly/resources/conf/iotdb-engine.properties
index 6ecb307..e892740 100644
--- a/server/src/assembly/resources/conf/iotdb-engine.properties
+++ b/server/src/assembly/resources/conf/iotdb-engine.properties
@@ -380,6 +380,18 @@ enable_auto_create_schema=true
 #      we will set root.sg0 as the storage group if storage group level is 1
 default_storage_group_level=1
 
+# ALL data types: BOOLEAN, INT32, INT64, FLOAT, DOUBLE, TEXT
+
+# register time series as which type when receiving boolean string "true" or 
"false"
+boolean_string_infer_type=BOOLEAN
+
+# register time series as which type when receiving an integer string "67"
+integer_string_infer_type=FLOAT
+
+# register time series as which type when receiving a floating number string 
"6.7"
+floating_string_infer_type=FLOAT
+
+
 # BOOLEAN encoding when creating schema automatically is enabled
 default_boolean_encoding=RLE
 
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java 
b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
index aa0f6d2..64d87fa 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
@@ -27,6 +27,7 @@ import 
org.apache.iotdb.db.exception.LoadConfigurationException;
 import org.apache.iotdb.db.metadata.MManager;
 import org.apache.iotdb.db.service.TSServiceImpl;
 import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
 import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.iotdb.tsfile.fileSystem.FSType;
 import org.slf4j.Logger;
@@ -352,6 +353,21 @@ public class IoTDBConfig {
   private boolean enableAutoCreateSchema = true;
 
   /**
+   * register time series as which type when receiving boolean string "true" 
or "false"
+   */
+  private TSDataType booleanStringInferType = TSDataType.BOOLEAN;
+
+  /**
+   * register time series as which type when receiving an integer string "67"
+   */
+  private TSDataType integerStringInferType = TSDataType.FLOAT;
+
+  /**
+   * register time series as which type when receiving a floating number 
string "6.7"
+   */
+  private TSDataType floatingStringInferType = TSDataType.FLOAT;
+
+  /**
    * Storage group level when creating schema automatically is enabled
    */
   private int defaultStorageGroupLevel = 1;
@@ -1225,6 +1241,33 @@ public class IoTDBConfig {
     this.enableAutoCreateSchema = enableAutoCreateSchema;
   }
 
+  public TSDataType getBooleanStringInferType() {
+    return booleanStringInferType;
+  }
+
+  public void setBooleanStringInferType(
+      TSDataType booleanStringInferType) {
+    this.booleanStringInferType = booleanStringInferType;
+  }
+
+  public TSDataType getIntegerStringInferType() {
+    return integerStringInferType;
+  }
+
+  public void setIntegerStringInferType(
+      TSDataType integerStringInferType) {
+    this.integerStringInferType = integerStringInferType;
+  }
+
+  public TSDataType getFloatingStringInferType() {
+    return floatingStringInferType;
+  }
+
+  public void setFloatingStringInferType(
+      TSDataType floatingNumberStringInferType) {
+    this.floatingStringInferType = floatingNumberStringInferType;
+  }
+
   public int getDefaultStorageGroupLevel() {
     return defaultStorageGroupLevel;
   }
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java 
b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
index 05117c2..40599ff 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java
@@ -36,6 +36,7 @@ import org.apache.iotdb.db.conf.directories.DirectoryManager;
 import org.apache.iotdb.db.exception.query.QueryProcessException;
 import org.apache.iotdb.db.utils.FilePathUtils;
 import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
 import org.apache.iotdb.tsfile.fileSystem.FSType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -471,6 +472,12 @@ public class IoTDBDescriptor {
     conf.setAutoCreateSchemaEnabled(
         
Boolean.parseBoolean(properties.getProperty("enable_auto_create_schema",
             Boolean.toString(conf.isAutoCreateSchemaEnabled()).trim())));
+    
conf.setBooleanStringInferType(TSDataType.valueOf(properties.getProperty("boolean_string_infer_type",
+        conf.getBooleanStringInferType().toString())));
+    
conf.setIntegerStringInferType(TSDataType.valueOf(properties.getProperty("integer_string_infer_type",
+        conf.getIntegerStringInferType().toString())));
+    
conf.setFloatingStringInferType(TSDataType.valueOf(properties.getProperty("floating_string_infer_type",
+        conf.getFloatingStringInferType().toString())));
     conf.setDefaultStorageGroupLevel(
         Integer.parseInt(properties.getProperty("default_storage_group_level",
             Integer.toString(conf.getDefaultStorageGroupLevel()))));
diff --git 
a/server/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java 
b/server/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java
index 63c79a3..9e2bba1 100644
--- a/server/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java
+++ b/server/src/main/java/org/apache/iotdb/db/utils/TypeInferenceUtils.java
@@ -19,11 +19,19 @@
 
 package org.apache.iotdb.db.utils;
 
+import org.apache.iotdb.db.conf.IoTDBDescriptor;
 import org.apache.iotdb.db.qp.constant.SQLConstant;
 import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.utils.Binary;
 
 public class TypeInferenceUtils {
 
+  private static TSDataType booleanStringInferType = 
IoTDBDescriptor.getInstance().getConfig().getBooleanStringInferType();
+
+  private static TSDataType integerStringInferType = 
IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType();
+
+  private static TSDataType floatingStringInferType = 
IoTDBDescriptor.getInstance().getConfig().getFloatingStringInferType();
+
   private TypeInferenceUtils() {
 
   }
@@ -46,17 +54,34 @@ public class TypeInferenceUtils {
    * Get predicted DataType of the given value
    */
   public static TSDataType getPredictedDataType(Object value) {
-    if (value instanceof Boolean || (value instanceof String && 
isBoolean((String) value))) {
-      return TSDataType.BOOLEAN;
-    } else if (value instanceof Number || (value instanceof String && 
isNumber((String) value))) {
-      String v = String.valueOf(value);
-      if (!v.contains(".")) {
-        return TSDataType.INT64;
+
+    if (value instanceof String) {
+      String strValue = (String) value;
+      if (isBoolean(strValue)) {
+        return booleanStringInferType;
+      } else if (isNumber(strValue)){
+        if (!strValue.contains(".")) {
+          return integerStringInferType;
+        } else {
+          return floatingStringInferType;
+        }
       } else {
-        return TSDataType.DOUBLE;
+        return TSDataType.TEXT;
       }
-    } else {
+    } else if (value instanceof Boolean) {
+      return TSDataType.BOOLEAN;
+    } else if (value instanceof Integer) {
+      return TSDataType.INT32;
+    } else if (value instanceof Long) {
+      return TSDataType.INT64;
+    } else if (value instanceof Float) {
+      return TSDataType.FLOAT;
+    } else if (value instanceof Double) {
+      return TSDataType.DOUBLE;
+    } else if (value instanceof Binary) {
       return TSDataType.TEXT;
     }
+
+    return TSDataType.TEXT;
   }
 }
diff --git 
a/server/src/test/java/org/apache/iotdb/db/integration/IoTDBRestartIT.java 
b/server/src/test/java/org/apache/iotdb/db/integration/IoTDBRestartIT.java
index e74b252..e0a657e 100644
--- a/server/src/test/java/org/apache/iotdb/db/integration/IoTDBRestartIT.java
+++ b/server/src/test/java/org/apache/iotdb/db/integration/IoTDBRestartIT.java
@@ -45,7 +45,7 @@ public class IoTDBRestartIT {
         .getConnection(Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root",
             "root");
         Statement statement = connection.createStatement()){
-      statement.execute("insert into root.turbine.d1(timestamp,s1) 
values(1,1)");
+      statement.execute("insert into root.turbine.d1(timestamp,s1) 
values(1,1.0)");
       statement.execute("flush");
     }
 
@@ -55,7 +55,7 @@ public class IoTDBRestartIT {
         .getConnection(Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root",
             "root");
         Statement statement = connection.createStatement()){
-      statement.execute("insert into root.turbine.d1(timestamp,s1) 
values(2,1)");
+      statement.execute("insert into root.turbine.d1(timestamp,s1) 
values(2,1.0)");
     }
 
     EnvironmentUtils.restartDaemon();
@@ -64,14 +64,14 @@ public class IoTDBRestartIT {
         .getConnection(Config.IOTDB_URL_PREFIX + "127.0.0.1:6667/", "root",
             "root");
         Statement statement = connection.createStatement()){
-      statement.execute("insert into root.turbine.d1(timestamp,s1) 
values(3,1)");
+      statement.execute("insert into root.turbine.d1(timestamp,s1) 
values(3,1.0)");
 
       boolean hasResultSet = statement.execute("SELECT s1 FROM 
root.turbine.d1");
       assertTrue(hasResultSet);
       String[] exp = new String[]{
-          "1,1",
-          "2,1",
-          "3,1"
+          "1,1.0",
+          "2,1.0",
+          "3,1.0"
       };
       ResultSet resultSet = statement.getResultSet();
       int cnt = 0;
@@ -112,8 +112,8 @@ public class IoTDBRestartIT {
       boolean hasResultSet = statement.execute("SELECT s1 FROM 
root.turbine.d1");
       assertTrue(hasResultSet);
       String[] exp = new String[]{
-          "2,2",
-          "3,3"
+          "2,2.0",
+          "3,3.0"
       };
       ResultSet resultSet = statement.getResultSet();
       int cnt = 0;
@@ -129,7 +129,7 @@ public class IoTDBRestartIT {
       hasResultSet = statement.execute("SELECT s1 FROM root.turbine.d1");
       assertTrue(hasResultSet);
       exp = new String[]{
-          "3,3"
+          "3,3.0"
       };
       resultSet = statement.getResultSet();
       cnt = 0;
diff --git 
a/server/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java 
b/server/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java
index 9f46666..69b4e5c 100644
--- a/server/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java
+++ b/server/src/test/java/org/apache/iotdb/db/utils/TypeInferenceUtilsTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.iotdb.db.utils;
 
+import org.apache.iotdb.db.conf.IoTDBDescriptor;
 import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
 import org.junit.Test;
 
@@ -36,18 +37,22 @@ public class TypeInferenceUtilsTest {
         false, true, false, false, false};
 
     for (int i = 0; i < values.length; i++) {
-      System.out.println(values[i]);
       assertEquals(TypeInferenceUtils.isNumber(values[i]), results[i]);
     }
   }
 
   @Test
   public void getPredictedDataTypeTest() {
-    Object[] values = {123, "abc", 123.123, true};
-    TSDataType[] encodings = {TSDataType.INT64, TSDataType.TEXT, 
TSDataType.DOUBLE, TSDataType.BOOLEAN};
+    Object[] values = {123, "abc", 123.123d, true, 123.1f, "123", "12.2", 
"true"};
+    TSDataType[] encodings = {TSDataType.INT32, TSDataType.TEXT, 
TSDataType.DOUBLE,
+        TSDataType.BOOLEAN, TSDataType.FLOAT,
+        IoTDBDescriptor.getInstance().getConfig().getIntegerStringInferType(),
+        IoTDBDescriptor.getInstance().getConfig().getFloatingStringInferType(),
+        IoTDBDescriptor.getInstance().getConfig().getBooleanStringInferType()
+    };
 
     for (int i = 0; i < values.length; i++) {
-      assertEquals(TypeInferenceUtils.getPredictedDataType(values[i]), 
encodings[i]);
+      assertEquals(encodings[i], 
TypeInferenceUtils.getPredictedDataType(values[i]));
     }
   }
 }

Reply via email to