This is an automated email from the ASF dual-hosted git repository.
jackietien pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 676117fb Optimize time and value in filter by TsFile statistics.
676117fb is described below
commit 676117fbf91db0dc9b2e2badf51c42517b240884
Author: Zhihao Shen <[email protected]>
AuthorDate: Thu Jul 18 09:23:59 2024 +0800
Optimize time and value in filter by TsFile statistics.
---
.../read/filter/operator/TimeFilterOperators.java | 32 ++++++-
.../read/filter/operator/ValueFilterOperators.java | 101 ++++++++++++++++++++-
.../tsfile/read/filter/StatisticsFilterTest.java | 68 ++++++++++++++
3 files changed, 194 insertions(+), 7 deletions(-)
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
index 2c3214b4..ed0d0c57 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/TimeFilterOperators.java
@@ -20,7 +20,6 @@
package org.apache.tsfile.read.filter.operator;
import org.apache.tsfile.read.common.TimeRange;
-import org.apache.tsfile.read.filter.basic.DisableStatisticsTimeFilter;
import org.apache.tsfile.read.filter.basic.Filter;
import org.apache.tsfile.read.filter.basic.OperatorType;
import org.apache.tsfile.read.filter.basic.TimeFilter;
@@ -492,16 +491,22 @@ public final class TimeFilterOperators {
}
// base class for TimeIn, TimeNotIn
- abstract static class TimeColumnSetFilter extends
DisableStatisticsTimeFilter {
+ abstract static class TimeColumnSetFilter extends TimeFilter {
protected final Set<Long> candidates;
+ protected final Long candidatesMin;
+ protected final Long candidatesMax;
protected TimeColumnSetFilter(Set<Long> candidates) {
this.candidates = Objects.requireNonNull(candidates, "candidates cannot
be null");
+ this.candidatesMin = Collections.min(candidates);
+ this.candidatesMax = Collections.max(candidates);
}
protected TimeColumnSetFilter(ByteBuffer buffer) {
this.candidates = ReadWriteIOUtils.readLongSet(buffer);
+ this.candidatesMin = Collections.min(candidates);
+ this.candidatesMax = Collections.max(candidates);
}
@Override
@@ -548,6 +553,17 @@ public final class TimeFilterOperators {
return candidates.contains(time);
}
+ @Override
+ public boolean satisfyStartEndTime(long startTime, long endTime) {
+ return startTime <= candidatesMax && endTime >= candidatesMin;
+ }
+
+ @Override
+ public boolean containStartEndTime(long startTime, long endTime) {
+ // Make `allSatisfy` always return false
+ return false;
+ }
+
@Override
public List<TimeRange> getTimeRanges() {
List<TimeRange> res = new ArrayList<>();
@@ -583,6 +599,18 @@ public final class TimeFilterOperators {
return !candidates.contains(time);
}
+ @Override
+ public boolean satisfyStartEndTime(long startTime, long endTime) {
+ // Make `canSkip` always return false
+ return true;
+ }
+
+ @Override
+ public boolean containStartEndTime(long startTime, long endTime) {
+ // Make `allSatisfy` always return false
+ return false;
+ }
+
@Override
public List<TimeRange> getTimeRanges() {
return Collections.singletonList(new TimeRange(Long.MIN_VALUE,
Long.MAX_VALUE));
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
index c2b7284c..905c364b 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/filter/operator/ValueFilterOperators.java
@@ -35,6 +35,7 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
+import java.util.Collections;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
@@ -706,18 +707,24 @@ public final class ValueFilterOperators {
}
// base class for ValueIn, ValueNotIn
- abstract static class ValueColumnSetFilter<T> extends
DisableStatisticsValueFilter {
+ abstract static class ValueColumnSetFilter<T extends Comparable<T>> extends
ValueFilter {
protected final Set<T> candidates;
+ protected final T candidatesMin;
+ protected final T candidatesMax;
protected ValueColumnSetFilter(int measurementIndex, Set<T> candidates) {
super(measurementIndex);
this.candidates = Objects.requireNonNull(candidates, "candidates cannot
be null");
+ this.candidatesMin = candidates.size() != 0 ?
Collections.min(candidates) : null;
+ this.candidatesMax = candidates.size() != 0 ?
Collections.max(candidates) : null;
}
protected ValueColumnSetFilter(ByteBuffer buffer) {
super(buffer);
candidates = ReadWriteIOUtils.readObjectSet(buffer);
+ this.candidatesMin = candidates.size() != 0 ?
Collections.min(candidates) : null;
+ this.candidatesMax = candidates.size() != 0 ?
Collections.max(candidates) : null;
}
@Override
@@ -753,8 +760,7 @@ public final class ValueFilterOperators {
}
}
- public static final class ValueIn<T> extends ValueColumnSetFilter<T> {
-
+ public static final class ValueIn<T extends Comparable<T>> extends
ValueColumnSetFilter<T> {
public ValueIn(int measurementIndex, Set<T> candidates) {
super(measurementIndex, candidates);
}
@@ -765,7 +771,78 @@ public final class ValueFilterOperators {
@Override
public boolean valueSatisfy(Object value) {
- return candidates.contains(value);
+ return candidates.contains((T) value);
+ }
+
+ @Override
+ public boolean canSkip(IMetadata metadata) {
+ Optional<Statistics<? extends Serializable>> statistics =
+ metadata.getMeasurementStatistics(measurementIndex);
+
+ // All values are null, but candidates do not contain null
+ if ((!statistics.isPresent() || isAllNulls(statistics.get())) &&
candidates.size() != 0) {
+ return true;
+ }
+
+ // All values are not null, but candidate is one null value
+ if (!metadata.hasNullValue(measurementIndex) && candidates.size() == 0) {
+ return true;
+ }
+
+ if (statistics.isPresent()) {
+ T valuesMin = (T) statistics.get().getMinValue();
+ T valuesMax = (T) statistics.get().getMaxValue();
+ // All values are same
+ if (valuesMin.compareTo(valuesMax) == 0) {
+ return !candidates.contains(valuesMin);
+ } else {
+ if (candidates.size() != 0) {
+ // All values are less than min, or greater than max
+ if (candidatesMin.compareTo(valuesMax) > 0) {
+ return true;
+ }
+ if (candidatesMax.compareTo(valuesMin) < 0) {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+ }
+
+ @Override
+ protected boolean canSkip(Statistics<? extends Serializable> statistics) {
+ throw new NotImplementedException();
+ }
+
+ @Override
+ public boolean allSatisfy(IMetadata metadata) {
+ Optional<Statistics<? extends Serializable>> statistics =
+ metadata.getMeasurementStatistics(measurementIndex);
+
+ // All values are null, and candidate contains null
+ // Note null value cannot be added to set
+ if ((!statistics.isPresent() || isAllNulls(statistics.get())) &&
candidates.size() == 0) {
+ return true;
+ }
+
+ // All values are same
+ if (statistics.isPresent()) {
+ T valuesMin = (T) statistics.get().getMinValue();
+ T valuesMax = (T) statistics.get().getMaxValue();
+ // All values are same
+ if (valuesMin.compareTo(valuesMax) == 0) {
+ return candidates.contains(valuesMin);
+ }
+ }
+
+ return false;
+ }
+
+ @Override
+ protected boolean allSatisfy(Statistics<? extends Serializable>
statistics) {
+ throw new NotImplementedException();
}
@Override
@@ -777,9 +854,13 @@ public final class ValueFilterOperators {
public OperatorType getOperatorType() {
return OperatorType.VALUE_IN;
}
+
+ private boolean isAllNulls(Statistics<? extends Serializable> statistics) {
+ return statistics.getCount() == 0;
+ }
}
- public static final class ValueNotIn<T> extends ValueColumnSetFilter<T> {
+ public static final class ValueNotIn<T extends Comparable<T>> extends
ValueColumnSetFilter<T> {
public ValueNotIn(int measurementIndex, Set<T> candidates) {
super(measurementIndex, candidates);
@@ -794,6 +875,16 @@ public final class ValueFilterOperators {
return !candidates.contains(value);
}
+ @Override
+ protected boolean canSkip(Statistics<? extends Serializable> statistics) {
+ return false;
+ }
+
+ @Override
+ protected boolean allSatisfy(Statistics<? extends Serializable>
statistics) {
+ return false;
+ }
+
@Override
public Filter reverse() {
return new ValueIn<>(measurementIndex, candidates);
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
b/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
index 2acf3e35..d967756f 100644
---
a/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
+++
b/java/tsfile/src/test/java/org/apache/tsfile/read/filter/StatisticsFilterTest.java
@@ -30,6 +30,9 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import java.util.HashSet;
+import java.util.Set;
+
import static org.apache.tsfile.read.filter.FilterTestUtil.newAlignedMetadata;
import static org.apache.tsfile.read.filter.FilterTestUtil.newMetadata;
import static org.apache.tsfile.read.filter.operator.Not.CONTAIN_NOT_ERR_MSG;
@@ -280,4 +283,69 @@ public class StatisticsFilterTest {
Assert.assertFalse(valueIsNotNull.allSatisfy(alignedMetadata2));
Assert.assertFalse(valueIsNotNull.allSatisfy(alignedMetadata3));
}
+
+ @Test
+ public void testIn() {
+ // Non-null candidates
+ Set<Long> candidates = new HashSet<>();
+ candidates.add(1L);
+ candidates.add(10L);
+
+ // Time
+ Filter timeIn = TimeFilterApi.in(candidates);
+ Assert.assertFalse(timeIn.canSkip(metadata1));
+ Assert.assertTrue(timeIn.canSkip(metadata2));
+ Assert.assertFalse(timeIn.canSkip(metadata3));
+ Assert.assertFalse(timeIn.allSatisfy(metadata1));
+ Assert.assertFalse(timeIn.allSatisfy(metadata2));
+ Assert.assertFalse(timeIn.allSatisfy(metadata3));
+ // Value
+ Filter valueIn = ValueFilterApi.in(candidates);
+ Assert.assertFalse(valueIn.canSkip(metadata1));
+ Assert.assertTrue(valueIn.canSkip(metadata2));
+ Assert.assertFalse(valueIn.canSkip(metadata3));
+ Assert.assertTrue(valueIn.canSkip(alignedMetadata3));
+ Assert.assertFalse(valueIn.allSatisfy(metadata1));
+ Assert.assertFalse(valueIn.allSatisfy(metadata2));
+ Assert.assertTrue(valueIn.allSatisfy(metadata3));
+ Assert.assertFalse(valueIn.allSatisfy(alignedMetadata3));
+
+ // Null candidate
+ Set<Long> nullCandidate = new HashSet<>();
+ candidates.add(null);
+
+ // Time cannot be null
+ Filter valueIn2 = ValueFilterApi.in(nullCandidate);
+ Assert.assertTrue(valueIn2.canSkip(metadata1));
+ Assert.assertTrue(valueIn2.canSkip(metadata2));
+ Assert.assertTrue(valueIn2.canSkip(metadata3));
+ Assert.assertFalse(valueIn2.canSkip(alignedMetadata3));
+ Assert.assertFalse(valueIn2.allSatisfy(metadata1));
+ Assert.assertFalse(valueIn2.allSatisfy(metadata2));
+ Assert.assertFalse(valueIn2.allSatisfy(metadata3));
+ Assert.assertTrue(valueIn2.allSatisfy(alignedMetadata3));
+ }
+
+ @Test
+ public void testNotIn() {
+ Set<Long> candidates = new HashSet<>();
+ candidates.add(1L);
+ candidates.add(10L);
+
+ Filter timeNotIn = TimeFilterApi.notIn(candidates);
+ Assert.assertFalse(timeNotIn.canSkip(metadata1));
+ Assert.assertFalse(timeNotIn.canSkip(metadata2));
+ Assert.assertFalse(timeNotIn.canSkip(metadata3));
+ Assert.assertFalse(timeNotIn.allSatisfy(metadata1));
+ Assert.assertFalse(timeNotIn.allSatisfy(metadata2));
+ Assert.assertFalse(timeNotIn.allSatisfy(metadata3));
+
+ Filter valueNotIn = ValueFilterApi.notIn(candidates);
+ Assert.assertFalse(valueNotIn.canSkip(metadata1));
+ Assert.assertFalse(valueNotIn.canSkip(metadata2));
+ Assert.assertFalse(valueNotIn.canSkip(metadata3));
+ Assert.assertFalse(valueNotIn.allSatisfy(metadata1));
+ Assert.assertFalse(valueNotIn.allSatisfy(metadata2));
+ Assert.assertFalse(valueNotIn.allSatisfy(metadata3));
+ }
}