This is an automated email from the ASF dual-hosted git repository. jiangtian pushed a commit to branch support_string_dictionart_200 in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit c8f4b50bd5a5099ccdf0a8809d3333fca3ef4544 Author: Tian Jiang <[email protected]> AuthorDate: Wed Sep 11 11:20:03 2024 +0800 Support dictionary encoding for STRING & refactor encoding support condition --- .../org/apache/iotdb/db/it/IoTDBEncodingIT.java | 100 +++++++++++++++++++-- .../iotdb/session/it/IoTDBSessionSimpleIT.java | 22 ++++- .../org/apache/iotdb/db/utils/SchemaUtils.java | 51 +---------- pom.xml | 2 +- 4 files changed, 115 insertions(+), 60 deletions(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBEncodingIT.java b/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBEncodingIT.java index aa7b7024677..6bcbc762fc5 100644 --- a/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBEncodingIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/db/it/IoTDBEncodingIT.java @@ -19,13 +19,17 @@ package org.apache.iotdb.db.it; +import org.apache.iotdb.isession.ISession; import org.apache.iotdb.it.env.EnvFactory; import org.apache.iotdb.it.framework.IoTDBTestRunner; import org.apache.iotdb.itbase.category.LocalStandaloneIT; import org.apache.iotdb.rpc.TSStatusCode; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.junit.After; -import org.junit.Before; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -36,22 +40,37 @@ import java.sql.SQLException; import java.sql.Statement; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.fail; @RunWith(IoTDBTestRunner.class) @Category({LocalStandaloneIT.class}) public class IoTDBEncodingIT { - @Before - public void setUp() throws Exception { + private static final String[] databasesToClear = + new String[] {"root.db_0", "root.db1", "root.turbine1"}; + + @BeforeClass + public static void setUpClass() throws Exception { EnvFactory.getEnv().initClusterEnvironment(); } - @After - public void tearDown() throws Exception { + @AfterClass + public static void tearDownClass() throws Exception { EnvFactory.getEnv().cleanClusterEnvironment(); } + @After + public void tearDown() { + for (String database : databasesToClear) { + try (ISession session = EnvFactory.getEnv().getSessionConnection()) { + session.executeNonQueryStatement("DELETE DATABASE " + database); + } catch (Exception ignored) { + + } + } + } + @Test public void testSetEncodingRegularFailed() { try (Connection connection = EnvFactory.getEnv().getConnection(); @@ -432,4 +451,75 @@ public class IoTDBEncodingIT { fail(); } } + + @Test + public void testCreateNewTypes() throws Exception { + String currDB = "root.db1"; + int seriesCnt = 0; + TSDataType[] dataTypes = + new TSDataType[] { + TSDataType.STRING, TSDataType.BLOB, TSDataType.TIMESTAMP, TSDataType.DATE + }; + + // supported encodings + try (Connection connection = EnvFactory.getEnv().getConnection(); + Statement statement = connection.createStatement()) { + for (TSDataType dataType : dataTypes) { + for (TSEncoding encoding : TSEncoding.values()) { + if (encoding.isSupported(dataType)) { + statement.execute( + "create timeseries " + + currDB + + ".d1.s" + + seriesCnt + + " with datatype=" + + dataType + + ", encoding=" + + encoding + + ", compression=SNAPPY"); + seriesCnt++; + } + } + } + + ResultSet resultSet = statement.executeQuery("SHOW TIMESERIES"); + + while (resultSet.next()) { + seriesCnt--; + } + assertEquals(0, seriesCnt); + statement.execute("DROP DATABASE " + currDB); + } + + // unsupported encodings + try (Connection connection = EnvFactory.getEnv().getConnection(); + Statement statement = connection.createStatement()) { + for (TSDataType dataType : dataTypes) { + for (TSEncoding encoding : TSEncoding.values()) { + if (!encoding.isSupported(dataType)) { + try { + statement.execute( + "create timeseries " + + currDB + + ".d1.s" + + seriesCnt + + " with datatype=" + + dataType + + ", encoding=" + + encoding + + ", compression=SNAPPY"); + fail("Should have thrown an exception"); + } catch (SQLException e) { + assertEquals( + "507: encoding " + encoding + " does not support " + dataType, e.getMessage()); + } + seriesCnt++; + } + } + } + + ResultSet resultSet = statement.executeQuery("SHOW TIMESERIES"); + assertFalse(resultSet.next()); + } + } } diff --git a/integration-test/src/test/java/org/apache/iotdb/session/it/IoTDBSessionSimpleIT.java b/integration-test/src/test/java/org/apache/iotdb/session/it/IoTDBSessionSimpleIT.java index 18a918070b7..c277435bb84 100644 --- a/integration-test/src/test/java/org/apache/iotdb/session/it/IoTDBSessionSimpleIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/session/it/IoTDBSessionSimpleIT.java @@ -46,8 +46,9 @@ import org.apache.tsfile.write.record.Tablet; import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; import org.junit.After; +import org.junit.AfterClass; import org.junit.Assert; -import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -76,13 +77,26 @@ public class IoTDBSessionSimpleIT { private static Logger LOGGER = LoggerFactory.getLogger(IoTDBSessionSimpleIT.class); - @Before - public void setUp() throws Exception { + private static final String[] databasesToClear = new String[] {"root.sg", "root.sg1"}; + + @BeforeClass + public static void setUpClass() throws Exception { EnvFactory.getEnv().initClusterEnvironment(); } @After - public void tearDown() throws Exception { + public void tearDown() { + for (String database : databasesToClear) { + try (ISession session = EnvFactory.getEnv().getSessionConnection()) { + session.executeNonQueryStatement("DELETE DATABASE " + database); + } catch (Exception ignored) { + + } + } + } + + @AfterClass + public static void tearDownClass() throws Exception { EnvFactory.getEnv().cleanClusterEnvironment(); } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java index 74d753da9d5..29e2fcbc6bc 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java @@ -28,11 +28,7 @@ import org.apache.tsfile.file.metadata.enums.TSEncoding; import java.util.Arrays; import java.util.Collections; -import java.util.EnumMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Set; import static org.apache.iotdb.db.queryengine.execution.operator.AggregationUtil.addPartialSuffix; @@ -40,51 +36,6 @@ public class SchemaUtils { private SchemaUtils() {} - private static final Map<TSDataType, Set<TSEncoding>> schemaChecker = - new EnumMap<>(TSDataType.class); - - static { - Set<TSEncoding> booleanSet = new HashSet<>(); - booleanSet.add(TSEncoding.PLAIN); - booleanSet.add(TSEncoding.RLE); - schemaChecker.put(TSDataType.BOOLEAN, booleanSet); - - Set<TSEncoding> intSet = new HashSet<>(); - intSet.add(TSEncoding.PLAIN); - intSet.add(TSEncoding.RLE); - intSet.add(TSEncoding.TS_2DIFF); - intSet.add(TSEncoding.GORILLA); - intSet.add(TSEncoding.ZIGZAG); - intSet.add(TSEncoding.CHIMP); - intSet.add(TSEncoding.SPRINTZ); - intSet.add(TSEncoding.RLBE); - - schemaChecker.put(TSDataType.INT32, intSet); - schemaChecker.put(TSDataType.INT64, intSet); - schemaChecker.put(TSDataType.TIMESTAMP, intSet); - schemaChecker.put(TSDataType.DATE, intSet); - - Set<TSEncoding> floatSet = new HashSet<>(); - floatSet.add(TSEncoding.PLAIN); - floatSet.add(TSEncoding.RLE); - floatSet.add(TSEncoding.TS_2DIFF); - floatSet.add(TSEncoding.GORILLA_V1); - floatSet.add(TSEncoding.GORILLA); - floatSet.add(TSEncoding.CHIMP); - floatSet.add(TSEncoding.SPRINTZ); - floatSet.add(TSEncoding.RLBE); - - schemaChecker.put(TSDataType.FLOAT, floatSet); - schemaChecker.put(TSDataType.DOUBLE, floatSet); - - Set<TSEncoding> textSet = new HashSet<>(); - textSet.add(TSEncoding.PLAIN); - textSet.add(TSEncoding.DICTIONARY); - schemaChecker.put(TSDataType.TEXT, textSet); - schemaChecker.put(TSDataType.BLOB, textSet); - schemaChecker.put(TSDataType.STRING, textSet); - } - /** * If the datatype of 'aggregation' depends on 'measurementDataType' (min_value, max_value), * return 'measurementDataType' directly, or return a list whose elements are all the datatype of @@ -256,7 +207,7 @@ public class SchemaUtils { public static void checkDataTypeWithEncoding(TSDataType dataType, TSEncoding encoding) throws MetadataException { - if (!schemaChecker.get(dataType).contains(encoding)) { + if (!encoding.isSupported(dataType)) { throw new MetadataException( String.format("encoding %s does not support %s", encoding, dataType), true); } diff --git a/pom.xml b/pom.xml index f1f2b3f9c62..526b3b6602d 100644 --- a/pom.xml +++ b/pom.xml @@ -166,7 +166,7 @@ <thrift.version>0.14.1</thrift.version> <xz.version>1.9</xz.version> <zstd-jni.version>1.5.6-3</zstd-jni.version> - <tsfile.version>1.2.0-240904-SNAPSHOT</tsfile.version> + <tsfile.version>1.2.0-240911-SNAPSHOT</tsfile.version> </properties> <!-- if we claim dependencies in dependencyManagement, then we do not claim
