This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch rc/1.1
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/rc/1.1 by this push:
new f36fc747 Support dictionary encoding for STRING data type. Move
datatype-encoding map into TsFile.
f36fc747 is described below
commit f36fc74708a0a8ad040ed6837792a9ed90e091e7
Author: Tian Jiang <[email protected]>
AuthorDate: Wed Sep 11 09:48:27 2024 +0800
Support dictionary encoding for STRING data type.
Move datatype-encoding map into TsFile.
(cherry picked from commit fdc362e846d63f768ac698b36edaf77fae2a6512)
---
.../tsfile/encoding/encoder/TSEncodingBuilder.java | 2 +-
.../tsfile/file/metadata/enums/TSEncoding.java | 60 ++++++++++++++++++++++
2 files changed, 61 insertions(+), 1 deletion(-)
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
index 01b1daa8..68c7e56b 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
@@ -370,7 +370,7 @@ public abstract class TSEncodingBuilder {
@Override
public Encoder getEncoder(TSDataType type) {
- if (type == TSDataType.TEXT) {
+ if (type == TSDataType.TEXT || type == TSDataType.STRING) {
return new DictionaryEncoder();
}
throw new UnSupportedDataTypeException("DICTIONARY doesn't support data
type: " + type);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
index 43de4b8f..77482c6e 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
@@ -19,6 +19,13 @@
package org.apache.tsfile.file.metadata.enums;
+import org.apache.tsfile.enums.TSDataType;
+
+import java.util.EnumMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
public enum TSEncoding {
PLAIN((byte) 0),
DICTIONARY((byte) 1),
@@ -37,6 +44,55 @@ public enum TSEncoding {
RLBE((byte) 13);
private final byte type;
+ @SuppressWarnings("java:S2386") // used by other projects
+ public static final Map<TSDataType, Set<TSEncoding>>
TYPE_SUPPORTED_ENCODINGS =
+ new EnumMap<>(TSDataType.class);
+
+ static {
+ Set<TSEncoding> booleanSet = new HashSet<>();
+ booleanSet.add(TSEncoding.PLAIN);
+ booleanSet.add(TSEncoding.RLE);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BOOLEAN, booleanSet);
+
+ Set<TSEncoding> intSet = new HashSet<>();
+ intSet.add(TSEncoding.PLAIN);
+ intSet.add(TSEncoding.RLE);
+ intSet.add(TSEncoding.TS_2DIFF);
+ intSet.add(TSEncoding.GORILLA);
+ intSet.add(TSEncoding.ZIGZAG);
+ intSet.add(TSEncoding.CHIMP);
+ intSet.add(TSEncoding.SPRINTZ);
+ intSet.add(TSEncoding.RLBE);
+
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT32, intSet);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT64, intSet);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TIMESTAMP, intSet);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DATE, intSet);
+
+ Set<TSEncoding> floatSet = new HashSet<>();
+ floatSet.add(TSEncoding.PLAIN);
+ floatSet.add(TSEncoding.RLE);
+ floatSet.add(TSEncoding.TS_2DIFF);
+ floatSet.add(TSEncoding.GORILLA_V1);
+ floatSet.add(TSEncoding.GORILLA);
+ floatSet.add(TSEncoding.CHIMP);
+ floatSet.add(TSEncoding.SPRINTZ);
+ floatSet.add(TSEncoding.RLBE);
+
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.FLOAT, floatSet);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DOUBLE, floatSet);
+
+ Set<TSEncoding> textSet = new HashSet<>();
+ textSet.add(TSEncoding.PLAIN);
+ textSet.add(TSEncoding.DICTIONARY);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TEXT, textSet);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.STRING, textSet);
+
+ Set<TSEncoding> blobSet = new HashSet<>();
+ blobSet.add(TSEncoding.PLAIN);
+ TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BLOB, blobSet);
+ }
+
TSEncoding(byte type) {
this.type = type;
}
@@ -84,6 +140,10 @@ public enum TSEncoding {
}
}
+ public boolean isSupported(TSDataType type, TSEncoding encoding) {
+ return TYPE_SUPPORTED_ENCODINGS.get(type).contains(encoding);
+ }
+
public static int getSerializedSize() {
return Byte.BYTES;
}