This is an automated email from the ASF dual-hosted git repository. jackietien pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new af8f6afaad [IOTDB-5683] Add special case process for aggregation
function Mode
af8f6afaad is described below
commit af8f6afaad222640c69df887f7997f31f8a2aee9
Author: Weihao Li <[email protected]>
AuthorDate: Wed Mar 29 16:49:58 2023 +0800
[IOTDB-5683] Add special case process for aggregation function Mode
---
docs/UserGuide/Operators-Functions/Aggregation.md | 30 ++++++-------
.../UserGuide/Operators-Functions/Aggregation.md | 30 ++++++-------
.../iotdb/db/it/aggregation/IoTDBModeIT.java | 24 ++++++++++-
.../src/main/codegen/templates/ModeAccumulator.ftl | 49 ++++++++++++++++------
4 files changed, 89 insertions(+), 44 deletions(-)
diff --git a/docs/UserGuide/Operators-Functions/Aggregation.md
b/docs/UserGuide/Operators-Functions/Aggregation.md
index a6c0e07126..1a73578784 100644
--- a/docs/UserGuide/Operators-Functions/Aggregation.md
+++ b/docs/UserGuide/Operators-Functions/Aggregation.md
@@ -27,21 +27,21 @@ All aggregate functions except `COUNT()`, `COUNT_IF()`
ignore null values and re
The aggregate functions supported by IoTDB are as follows:
-| Function Name | Function Description
| Allowed Input Data Types | required attributes
[...]
-|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------|
------------------------
|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[...]
-| SUM | Summation.
| INT32 INT64 FLOAT DOUBLE | No
[...]
-| COUNT | Counts the number of data points.
| All types | No
[...]
-| AVG | Average.
| INT32 INT64 FLOAT DOUBLE | No
[...]
-| EXTREME | Finds the value with the largest absolute value. Returns a
positive value if the maximum absolute value of positive and negative values is
equal. | INT32 INT64 FLOAT DOUBLE | No
[...]
-| MAX_VALUE | Find the maximum value.
| INT32 INT64 FLOAT DOUBLE | No
[...]
-| MIN_VALUE | Find the minimum value.
| INT32 INT64 FLOAT DOUBLE | No
[...]
-| FIRST_VALUE | Find the value with the smallest timestamp.
| All data types | No
[...]
-| LAST_VALUE | Find the value with the largest timestamp.
| All data types | No
[...]
-| MAX_TIME | Find the maximum timestamp.
| All data Types | No
[...]
-| MIN_TIME | Find the minimum timestamp.
| All data Types | No
[...]
-| COUNT_IF | Find the number of data points that continuously meet a
given condition and the number of data points that meet the condition
(represented by keep) meet the specified threshold. | BOOLEAN
| `[keep >=/>/=/!=/</<=]threshold`:The specified threshold or threshold
condition, it is equivalent to `keep >= threshold` if `threshold` is used
alone, type of `threshold` is `INT64`<br/> `ignoreNull`:Optional, default value
is `true`;If the value is `true`, null valu [...]
-| TIME_DURATION | Find the difference between the timestamp of the largest
non-null value and the timestamp of the smallest non-null value in a column
| All data Types | No
[...]
-| MODE | Find the mode. Note: Having too many different values in the
input series risks a memory exception.
| All data Types | No
[...]
+| Function Name | Function Description
|
Allowed Input Data Types | required attributes
[...]
+|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
------------------------
|---------------------------------------------------------------------------------------------------------------------------------------------------------
[...]
+| SUM | Summation.
| INT32
INT64 FLOAT DOUBLE | No
[...]
+| COUNT | Counts the number of data points.
| All
types | No
[...]
+| AVG | Average.
| INT32
INT64 FLOAT DOUBLE | No
[...]
+| EXTREME | Finds the value with the largest absolute value. Returns a
positive value if the maximum absolute value of positive and negative values is
equal.
|
INT32 INT64 FLOAT DOUBLE | No
[...]
+| MAX_VALUE | Find the maximum value.
| INT32
INT64 FLOAT DOUBLE | No
[...]
+| MIN_VALUE | Find the minimum value.
| INT32
INT64 FLOAT DOUBLE | No
[...]
+| FIRST_VALUE | Find the value with the smallest timestamp.
| All
data types | No
[...]
+| LAST_VALUE | Find the value with the largest timestamp.
| All
data types | No
[...]
+| MAX_TIME | Find the maximum timestamp.
| All
data Types | No
[...]
+| MIN_TIME | Find the minimum timestamp.
| All
data Types | No
[...]
+| COUNT_IF | Find the number of data points that continuously meet a
given condition and the number of data points that meet the condition
(represented by keep) meet the specified threshold.
| BOOLEAN | `[keep >=/>/=/!=/</<=]threshold`:The
specified threshold or threshold condition, it is equivalent to `keep >=
threshold` if `threshold` is used alone, t [...]
+| TIME_DURATION | Find the difference between the timestamp of the largest
non-null value and the timestamp of the smallest non-null value in a column
|
All data Types | No
[...]
+| MODE | Find the mode. Note: </br>1.Having too many different values
in the input series risks a memory exception; </br>2.If all the elements have
the same number of occurrences, that is no Mode, return the value with earliest
time; </br>3.If there are many Modes, return the Mode with earliest time. | All
data Types | No
[...]
## COUNT
### example
diff --git a/docs/zh/UserGuide/Operators-Functions/Aggregation.md
b/docs/zh/UserGuide/Operators-Functions/Aggregation.md
index 995aef81fa..87177d6145 100644
--- a/docs/zh/UserGuide/Operators-Functions/Aggregation.md
+++ b/docs/zh/UserGuide/Operators-Functions/Aggregation.md
@@ -27,21 +27,21 @@
IoTDB 支持的聚合函数如下:
-| 函数名 | 功能描述 | 允许的输入类型
| 必要的属性参数
| 输出类型
|
-|---------------|-----------------------------------------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|
-| SUM | 求和。 | INT32 INT64
FLOAT DOUBLE | 无
|
DOUBLE |
-| COUNT | 计算数据点数。 | 所有类型
| 无
|
INT64 |
-| AVG | 求平均值。 | INT32 INT64
FLOAT DOUBLE | 无
|
DOUBLE |
-| EXTREME | 求具有最大绝对值的值。如果正值和负值的最大绝对值相等,则返回正值。 | INT32 INT64
FLOAT DOUBLE | 无
|
与输入类型一致 |
-| MAX_VALUE | 求最大值。 | INT32 INT64
FLOAT DOUBLE | 无
|
与输入类型一致 |
-| MIN_VALUE | 求最小值。 | INT32 INT64
FLOAT DOUBLE | 无
|
与输入类型一致 |
-| FIRST_VALUE | 求时间戳最小的值。 | 所有类型
| 无
|
与输入类型一致 |
-| LAST_VALUE | 求时间戳最大的值。 | 所有类型
| 无
|
与输入类型一致 |
-| MAX_TIME | 求最大时间戳。 | 所有类型
| 无
|
Timestamp |
-| MIN_TIME | 求最小时间戳。 | 所有类型
| 无
|
Timestamp |
-| COUNT_IF | 求数据点连续满足某一给定条件,且满足条件的数据点个数(用keep表示)满足指定阈值的次数。 | BOOLEAN
| `[keep
>=/>/=/!=/</<=]threshold`:被指定的阈值或阈值条件,若只使用`threshold`则等价于`keep >=
threshold`,`threshold`类型为`INT64`<br/>
`ignoreNull`:可选,默认为`true`;为`true`表示忽略null值,即如果中间出现null值,直接忽略,不会打断连续性;为`false`表示不忽略null值,即如果中间出现null值,会打断连续性
| INT64 |
-| TIME_DURATION | 求某一列最大一个不为NULL的值所在时间戳与最小一个不为NULL的值所在时间戳的时间戳差 | 所有类型
| 无
|
INT64 |
-| MODE | 求众数。注意:输入序列的不同值个数过多时会有内存异常风险。 | 所有类型
| 无
|
INT64 |
+| 函数名 | 功能描述
| 允许的输入类型 |
必要的属性参数
| 输出类型 |
+|---------------|-------------------------------------------------------------------------------------------------------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|
+| SUM | 求和。
| INT32 INT64 FLOAT DOUBLE | 无
| DOUBLE |
+| COUNT | 计算数据点数。
| 所有类型 | 无
| INT64 |
+| AVG | 求平均值。
| INT32 INT64 FLOAT DOUBLE | 无
| DOUBLE |
+| EXTREME | 求具有最大绝对值的值。如果正值和负值的最大绝对值相等,则返回正值。
| INT32 INT64 FLOAT DOUBLE | 无
| 与输入类型一致 |
+| MAX_VALUE | 求最大值。
| INT32 INT64 FLOAT DOUBLE | 无
| 与输入类型一致 |
+| MIN_VALUE | 求最小值。
| INT32 INT64 FLOAT DOUBLE | 无
| 与输入类型一致 |
+| FIRST_VALUE | 求时间戳最小的值。
| 所有类型 | 无
| 与输入类型一致 |
+| LAST_VALUE | 求时间戳最大的值。
| 所有类型 | 无
| 与输入类型一致 |
+| MAX_TIME | 求最大时间戳。
| 所有类型 | 无
| Timestamp |
+| MIN_TIME | 求最小时间戳。
| 所有类型 | 无
| Timestamp |
+| COUNT_IF | 求数据点连续满足某一给定条件,且满足条件的数据点个数(用keep表示)满足指定阈值的次数。
| BOOLEAN |
`[keep >=/>/=/!=/</<=]threshold`:被指定的阈值或阈值条件,若只使用`threshold`则等价于`keep >=
threshold`,`threshold`类型为`INT64`<br/>
`ignoreNull`:可选,默认为`true`;为`true`表示忽略null值,即如果中间出现null值,直接忽略,不会打断连续性;为`false`表示不忽略null值,即如果中间出现null值,会打断连续性
| INT64 |
+| TIME_DURATION | 求某一列最大一个不为NULL的值所在时间戳与最小一个不为NULL的值所在时间戳的时间戳差
| 所有类型 | 无
| INT64 |
+| MODE | 求众数。注意:</br>1.输入序列的不同值个数过多时会有内存异常风险;
</br>2.如果所有元素出现的频次相同,即没有众数,则返回对应时间戳最小的值; </br>3.如果有多个众数,则返回对应时间戳最小的众数。 | 所有类型
| 无
| INT64 |
### COUNT_IF
#### 语法
diff --git
a/integration-test/src/test/java/org/apache/iotdb/db/it/aggregation/IoTDBModeIT.java
b/integration-test/src/test/java/org/apache/iotdb/db/it/aggregation/IoTDBModeIT.java
index 37e8883182..49bee98459 100644
---
a/integration-test/src/test/java/org/apache/iotdb/db/it/aggregation/IoTDBModeIT.java
+++
b/integration-test/src/test/java/org/apache/iotdb/db/it/aggregation/IoTDBModeIT.java
@@ -66,6 +66,13 @@ public class IoTDBModeIT {
"INSERT INTO root.db.d2(timestamp,s1,s2,s3,s4,s5,s6)
values(10000000001, 2, 2, false, 2, 2, \"2\")",
"INSERT INTO root.db.d2(timestamp,s1,s2,s3,s4,s5,s6)
values(10000000002, 2, 2, false, 2, 2, \"2\")",
"INSERT INTO root.db.d2(timestamp,s1,s2,s3,s4,s5,s6)
values(10000000003, 2, 2, false, 2, 2, \"2\")",
+ "INSERT INTO root.test.d1(timestamp,s1) values(1, 5)",
+ "INSERT INTO root.test.d1(timestamp,s1) values(2, 5)",
+ "INSERT INTO root.test.d1(timestamp,s1) values(3, 2)",
+ "INSERT INTO root.test.d1(timestamp,s1) values(4, 2)",
+ "INSERT INTO root.test.d1(timestamp,s1) values(5, 8)",
+ "INSERT INTO root.test.d1(timestamp,s1) values(6, 8)",
+ "INSERT INTO root.test.d1(timestamp,s1) values(7, 1)",
"flush"
};
@@ -145,7 +152,7 @@ public class IoTDBModeIT {
};
String[] retArray = new String[] {"2,2,false,2.0,2.0,2,"};
resultSetEqualTest(
- "select mode(s1),mode(s2),mode(s3),mode(s4),mode(s5),mode(s6) from
root.** group by level = 0",
+ "select mode(s1),mode(s2),mode(s3),mode(s4),mode(s5),mode(s6) from
root.db.* group by level = 0",
expectedHeader,
retArray);
}
@@ -157,4 +164,19 @@ public class IoTDBModeIT {
TSStatusCode.EXECUTE_STATEMENT_ERROR.getStatusCode()
+ ": MODE with slidingWindow is not supported now");
}
+
+ @Test
+ public void testNoMode() {
+ String[] expectedHeader = new String[] {mode("root.test.d1.s1")};
+ String[] retArray = new String[] {"5.0,"};
+ resultSetEqualTest(
+ "select mode(s1) from root.test.d1 where time <= 6", expectedHeader,
retArray);
+ }
+
+ @Test
+ public void testManyModes() {
+ String[] expectedHeader = new String[] {mode("root.test.d1.s1")};
+ String[] retArray = new String[] {"5.0,"};
+ resultSetEqualTest("select mode(s1) from root.test.d1", expectedHeader,
retArray);
+ }
}
diff --git a/server/src/main/codegen/templates/ModeAccumulator.ftl
b/server/src/main/codegen/templates/ModeAccumulator.ftl
index 62175bf628..e81ae32d15 100644
--- a/server/src/main/codegen/templates/ModeAccumulator.ftl
+++ b/server/src/main/codegen/templates/ModeAccumulator.ftl
@@ -25,6 +25,8 @@
package org.apache.iotdb.db.mpp.aggregation;
+import com.google.common.collect.ImmutableList;
+import org.apache.commons.collections4.comparators.ComparatorChain;
import org.apache.iotdb.db.conf.IoTDBDescriptor;
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics;
@@ -32,6 +34,7 @@ import
org.apache.iotdb.tsfile.read.common.block.column.Column;
import org.apache.iotdb.tsfile.read.common.block.column.ColumnBuilder;
import org.apache.iotdb.tsfile.utils.Binary;
import org.apache.iotdb.tsfile.utils.BitMap;
+import org.apache.iotdb.tsfile.utils.Pair;
import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
import java.io.ByteArrayInputStream;
@@ -49,7 +52,8 @@ import static
com.google.common.base.Preconditions.checkArgument;
*/
@SuppressWarnings("unused")
public class ${className} implements Accumulator {
- private final Map<${type.javaBoxName}, Long> countMap = new HashMap<>();
+ // pair left records count of element, pair right records min time of element
+ private final Map<${type.javaBoxName}, Pair<Long, Long>> countMap = new
HashMap<>();
<#if type.dataType != "boolean">
private final int MAP_SIZE_THRESHOLD =
IoTDBDescriptor.getInstance().getConfig().getModeMapSizeThreshold();
@@ -62,7 +66,11 @@ public class ${className} implements Accumulator {
continue;
}
if (!column[1].isNull(i)) {
- countMap.compute(column[1].get${type.dataType?cap_first}(i), (k, v) ->
v == null ? 1 : v + 1);
+ final long time = column[0].getLong(i);
+ countMap.compute(
+ column[1].get${type.dataType?cap_first}(i),
+ (k, v) ->
+ v == null ? new Pair<>(1L, time) : new Pair<>(v.left + 1,
Math.min(v.right, time)));
<#if type.dataType != "boolean">
if (countMap.size() > MAP_SIZE_THRESHOLD) {
@@ -96,7 +104,7 @@ public class ${className} implements Accumulator {
// Step of ModeAccumulator is STATIC,
// countMap only need to record one entry which key is finalResult
- countMap.put(finalResult.get${type.dataType?cap_first}(0), 0L);
+ countMap.put(finalResult.get${type.dataType?cap_first}(0), new Pair<>(0L,
Long.MIN_VALUE));
}
@Override
@@ -110,7 +118,14 @@ public class ${className} implements Accumulator {
tsBlockBuilder.appendNull();
} else {
tsBlockBuilder.write${type.dataType?cap_first}(
- Collections.max(countMap.entrySet(),
Map.Entry.comparingByValue()).getKey());
+ Collections.max(
+ countMap.entrySet(),
+ Map.Entry.comparingByValue(
+ new ComparatorChain<>(
+ ImmutableList.of(
+ (v1, v2) -> v1.left.compareTo(v2.left),
+ (v1, v2) -> v2.right.compareTo(v1.right)))))
+ .getKey());
}
}
@@ -138,9 +153,10 @@ public class ${className} implements Accumulator {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
try {
ReadWriteIOUtils.write(countMap.size(), stream);
- for (Map.Entry<${type.javaBoxName}, Long> entry : countMap.entrySet()) {
+ for (Map.Entry<${type.javaBoxName}, Pair<Long, Long>> entry :
countMap.entrySet()) {
ReadWriteIOUtils.write(entry.getKey(), stream);
- ReadWriteIOUtils.write(entry.getValue(), stream);
+ ReadWriteIOUtils.write(entry.getValue().left, stream);
+ ReadWriteIOUtils.write(entry.getValue().right, stream);
}
} catch (IOException e) {
// Totally memory operation. This case won't happen.
@@ -153,13 +169,20 @@ public class ${className} implements Accumulator {
try {
int size = ReadWriteIOUtils.readInt(stream);
for (int i = 0; i < size; i++) {
-
countMap.compute(ReadWriteIOUtils.read${type.dataType?cap_first}(stream), (k,
v) -> {
- try {
- return v == null ? ReadWriteIOUtils.readLong(stream) : v +
ReadWriteIOUtils.readLong(stream);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- });
+ countMap.compute(
+ ReadWriteIOUtils.read${type.dataType?cap_first}(stream),
+ (k, v) -> {
+ try {
+ return v == null
+ ? new Pair<>(
+ ReadWriteIOUtils.readLong(stream),
ReadWriteIOUtils.readLong(stream))
+ : new Pair<>(
+ v.left + ReadWriteIOUtils.readLong(stream),
+ Math.min(v.right, ReadWriteIOUtils.readLong(stream)));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
<#if type.dataType != "boolean">
if (countMap.size() > MAP_SIZE_THRESHOLD) {
