jon-wei closed pull request #6307: add PrefixFilteredDimensionSpec for
multi-value dimensions
URL: https://github.com/apache/incubator-druid/pull/6307
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/docs/content/querying/dimensionspecs.md
b/docs/content/querying/dimensionspecs.md
index b70fa68583a..e9a41e00fd1 100644
--- a/docs/content/querying/dimensionspecs.md
+++ b/docs/content/querying/dimensionspecs.md
@@ -66,6 +66,12 @@ Following filtered dimension spec retains only the values
matching regex. Note t
{ "type" : "regexFiltered", "delegate" : <dimensionSpec>, "pattern": <java
regex pattern> }
```
+Following filtered dimension spec retains only the values starting with the
same prefix.
+
+```json
+{ "type" : "prefixFiltered", "delegate" : <dimensionSpec>, "prefix": <prefix
string> }
+```
+
For more details and examples, see [multi-value
dimensions](multi-value-dimensions.html).
### Lookup DimensionSpecs
diff --git
a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java
b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java
index fa12358f38f..44d0a807b9f 100644
---
a/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java
+++
b/processing/src/main/java/org/apache/druid/query/dimension/DimensionSpec.java
@@ -35,7 +35,8 @@
@JsonSubTypes.Type(name = "default", value = DefaultDimensionSpec.class),
@JsonSubTypes.Type(name = "extraction", value =
ExtractionDimensionSpec.class),
@JsonSubTypes.Type(name = "regexFiltered", value =
RegexFilteredDimensionSpec.class),
- @JsonSubTypes.Type(name = "listFiltered", value =
ListFilteredDimensionSpec.class)
+ @JsonSubTypes.Type(name = "listFiltered", value =
ListFilteredDimensionSpec.class),
+ @JsonSubTypes.Type(name = "prefixFiltered", value =
PrefixFilteredDimensionSpec.class)
})
public interface DimensionSpec extends Cacheable
{
diff --git
a/processing/src/main/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpec.java
b/processing/src/main/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpec.java
new file mode 100644
index 00000000000..d4904b5c5c0
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpec.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.dimension;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.query.filter.DimFilterUtils;
+import org.apache.druid.segment.DimensionSelector;
+import it.unimi.dsi.fastutil.ints.Int2IntMap;
+import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
+import java.nio.ByteBuffer;
+import javax.annotation.Nullable;
+
+/**
+ */
+public class PrefixFilteredDimensionSpec extends BaseFilteredDimensionSpec
+{
+
+ private static final byte CACHE_TYPE_ID = 0x4;
+
+ private final String prefix;
+
+ public PrefixFilteredDimensionSpec(
+ @JsonProperty("delegate") DimensionSpec delegate,
+ @JsonProperty("prefix") String prefix //rows not starting with the
prefix will be discarded
+ )
+ {
+ super(delegate);
+ this.prefix = Preconditions.checkNotNull(prefix, "prefix must not be
null");
+ }
+
+ @JsonProperty
+ public String getPrefix()
+ {
+ return prefix;
+ }
+
+ @Override
+ public DimensionSelector decorate(final DimensionSelector selector)
+ {
+ if (selector == null) {
+ return null;
+ }
+
+ final int selectorCardinality = selector.getValueCardinality();
+ if (selectorCardinality < 0 || !selector.nameLookupPossibleInAdvance()) {
+ return new PredicateFilteredDimensionSelector(
+ selector,
+ new Predicate<String>()
+ {
+ @Override
+ public boolean apply(@Nullable String input)
+ {
+ String val = NullHandling.nullToEmptyIfNeeded(input);
+ return val == null ? false : val.startsWith(prefix);
+ }
+ }
+ );
+ }
+
+ int count = 0;
+ final Int2IntOpenHashMap forwardMapping = new Int2IntOpenHashMap();
+ forwardMapping.defaultReturnValue(-1);
+ for (int i = 0; i < selectorCardinality; i++) {
+ String val = NullHandling.nullToEmptyIfNeeded(selector.lookupName(i));
+ if (val != null && val.startsWith(prefix)) {
+ forwardMapping.put(i, count++);
+ }
+ }
+
+ final int[] reverseMapping = new int[forwardMapping.size()];
+ for (Int2IntMap.Entry e : forwardMapping.int2IntEntrySet()) {
+ reverseMapping[e.getIntValue()] = e.getIntKey();
+ }
+ return new ForwardingFilteredDimensionSelector(selector, forwardMapping,
reverseMapping);
+ }
+
+ @Override
+ public byte[] getCacheKey()
+ {
+ byte[] delegateCacheKey = delegate.getCacheKey();
+ byte[] prefixBytes = StringUtils.toUtf8(prefix);
+ return ByteBuffer.allocate(2 + delegateCacheKey.length +
prefixBytes.length)
+ .put(CACHE_TYPE_ID)
+ .put(delegateCacheKey)
+ .put(DimFilterUtils.STRING_SEPARATOR)
+ .put(prefixBytes)
+ .array();
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ PrefixFilteredDimensionSpec that = (PrefixFilteredDimensionSpec) o;
+
+ if (!delegate.equals(that.delegate)) {
+ return false;
+ }
+ return prefix.equals(that.prefix);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ int result = delegate.hashCode();
+ result = 31 * result + prefix.hashCode();
+ return result;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "PrefixFilteredDimensionSpec{" +
+ "Prefix='" + prefix + '\'' +
+ '}';
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java
b/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java
index 12746c7b6e7..fc708451a3a 100644
---
a/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java
+++
b/processing/src/main/java/org/apache/druid/query/dimension/RegexFilteredDimensionSpec.java
@@ -76,7 +76,8 @@ public DimensionSelector decorate(final DimensionSelector
selector)
@Override
public boolean apply(@Nullable String input)
{
- return
compiledRegex.matcher(NullHandling.nullToEmptyIfNeeded(input)).matches();
+ String val = NullHandling.nullToEmptyIfNeeded(input);
+ return val == null ? false :
compiledRegex.matcher(val).matches();
}
}
);
diff --git
a/processing/src/test/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpecTest.java
b/processing/src/test/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpecTest.java
new file mode 100644
index 00000000000..2dc8fecb025
--- /dev/null
+++
b/processing/src/test/java/org/apache/druid/query/dimension/PrefixFilteredDimensionSpecTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.dimension;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.data.IndexedInts;
+import java.util.Arrays;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ */
+public class PrefixFilteredDimensionSpecTest
+{
+
+ @Test
+ public void testSerde() throws Exception
+ {
+ ObjectMapper mapper = TestHelper.makeJsonMapper();
+
+ String jsonStr = "{\n"
+ + " \"type\": \"prefixFiltered\",\n"
+ + " \"delegate\": {\n"
+ + " \"type\": \"default\",\n"
+ + " \"dimension\": \"foo\",\n"
+ + " \"outputName\": \"bar\"\n"
+ + " },\n"
+ + " \"prefix\": \"xxx\"\n"
+ + "}";
+
+ PrefixFilteredDimensionSpec actual = (PrefixFilteredDimensionSpec)
mapper.readValue(
+ mapper.writeValueAsString(mapper.readValue(jsonStr,
DimensionSpec.class)),
+ DimensionSpec.class);
+
+ PrefixFilteredDimensionSpec expected = new PrefixFilteredDimensionSpec(
+ new DefaultDimensionSpec("foo", "bar"),
+ "xxx"
+ );
+
+ Assert.assertEquals(expected, actual);
+ }
+
+ @Test
+ public void testGetCacheKey()
+ {
+ PrefixFilteredDimensionSpec spec1 = new PrefixFilteredDimensionSpec(
+ new DefaultDimensionSpec("foo", "bar"),
+ "xxx"
+ );
+
+ PrefixFilteredDimensionSpec spec2 = new PrefixFilteredDimensionSpec(
+ new DefaultDimensionSpec("foo", "bar"),
+ "xyz"
+ );
+
+ Assert.assertFalse(Arrays.equals(spec1.getCacheKey(),
spec2.getCacheKey()));
+ }
+
+ @Test
+ public void testDecorator()
+ {
+ PrefixFilteredDimensionSpec spec = new PrefixFilteredDimensionSpec(
+ new DefaultDimensionSpec("foo", "far"),
+ "c"
+ );
+
+ DimensionSelector selector = spec.decorate(TestDimensionSelector.instance);
+
+ Assert.assertEquals(1, selector.getValueCardinality());
+
+ IndexedInts row = selector.getRow();
+ Assert.assertEquals(1, row.size());
+ Assert.assertEquals(0, row.get(0));
+
+ Assert.assertEquals("c", selector.lookupName(0));
+
+ Assert.assertEquals(0, selector.idLookup().lookupId("c"));
+ }
+}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]