This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new f25f889230 Extract json individual array elements from json index for
the transform function jsonExtractIndex (#12466)
f25f889230 is described below
commit f25f889230741cf9a1f6837206bc27b95836c966
Author: Xuanyi Li <[email protected]>
AuthorDate: Tue Mar 12 12:01:26 2024 -0700
Extract json individual array elements from json index for the transform
function jsonExtractIndex (#12466)
---
.../JsonExtractIndexTransformFunction.java | 5 +
.../function/BaseTransformFunctionTest.java | 9 +-
.../JsonExtractIndexTransformFunctionTest.java | 25 +++
.../realtime/impl/json/MutableJsonIndexImpl.java | 100 +++++++----
.../readers/json/ImmutableJsonIndexReader.java | 188 ++++++++++++---------
.../segment/local/segment/index/JsonIndexTest.java | 58 +++++++
6 files changed, 265 insertions(+), 120 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
index 46441a3155..fac2ab5fdb 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
@@ -90,9 +90,14 @@ public class JsonExtractIndexTransformFunction extends
BaseTransformFunction {
}
String resultsType = ((LiteralTransformFunction)
thirdArgument).getStringLiteral().toUpperCase();
boolean isSingleValue = !resultsType.endsWith("_ARRAY");
+ // TODO: will support array type; the underlying
jsonIndexReader.getMatchingDocsMap supports the json path [*]
if (!isSingleValue) {
throw new IllegalArgumentException("jsonExtractIndex only supports
single value type");
}
+ if (isSingleValue && inputJsonPath.contains("[*]")) {
+ throw new IllegalArgumentException("[*] syntax in json path is
unsupported as json_extract_index"
+ + "currently does not support returning array types");
+ }
DataType dataType = DataType.valueOf(resultsType);
if (arguments.size() == 4) {
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
index 2c685a82bc..5ad67f5681 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
@@ -24,7 +24,6 @@ import java.sql.Timestamp;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@@ -116,6 +115,7 @@ public abstract class BaseTransformFunctionTest {
protected final BigDecimal[] _bigDecimalSVValues = new BigDecimal[NUM_ROWS];
protected final String[] _stringSVValues = new String[NUM_ROWS];
protected final String[] _jsonSVValues = new String[NUM_ROWS];
+ protected final String[] _jsonArrayValues = new String[NUM_ROWS];
protected final String[] _stringAlphaNumericSVValues = new String[NUM_ROWS];
protected final byte[][] _bytesSVValues = new byte[NUM_ROWS][];
protected final int[][] _intMVValues = new int[NUM_ROWS][];
@@ -155,7 +155,10 @@ public abstract class BaseTransformFunctionTest {
_stringSVValues[i] = df.format(_intSVValues[i] * RANDOM.nextDouble());
_jsonSVValues[i] = String.format(
"{\"intVal\":%s, \"longVal\":%s, \"floatVal\":%s, \"doubleVal\":%s,
\"bigDecimalVal\":%s, "
- + "\"stringVal\":\"%s\"}", RANDOM.nextInt(), RANDOM.nextLong(),
RANDOM.nextFloat(), RANDOM.nextDouble(),
+ + "\"stringVal\":\"%s\", "
+ + "\"intVals\":[0,1], \"longVals\":[0,1],
\"floatVals\":[0.0,1.0], \"doubleVals\":[0.0,1.0], "
+ + "\"bigDecimalVals\":[0.0,1.0], \"stringVals\":[\"0\",\"1\"]}",
+ RANDOM.nextInt(), RANDOM.nextLong(), RANDOM.nextFloat(),
RANDOM.nextDouble(),
BigDecimal.valueOf(RANDOM.nextDouble()).multiply(BigDecimal.valueOf(RANDOM.nextInt())),
df.format(RANDOM.nextInt() * RANDOM.nextDouble()));
_stringAlphaNumericSVValues[i] =
RandomStringUtils.randomAlphanumeric(26);
@@ -302,7 +305,7 @@ public abstract class BaseTransformFunctionTest {
.addTime(new TimeGranularitySpec(FieldSpec.DataType.LONG,
TimeUnit.MILLISECONDS, TIME_COLUMN), null).build();
TableConfig tableConfig =
new
TableConfigBuilder(TableType.OFFLINE).setTableName("test").setTimeColumnName(TIME_COLUMN)
-
.setJsonIndexColumns(Collections.singletonList(JSON_STRING_SV_COLUMN))
+ .setJsonIndexColumns(List.of(JSON_STRING_SV_COLUMN))
.setNullHandlingEnabled(true).build();
SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
schema);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
index c7510bea58..6704c303f3 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
@@ -125,6 +125,31 @@ public class JsonExtractIndexTransformFunctionTest extends
BaseTransformFunction
String.format("jsonExtractIndex(%s,'%s','STRING')",
JSON_STRING_SV_COLUMN,
"$.stringVal"), "$.stringVal", DataType.STRING, true
});
+
+ testArguments.add(new Object[]{
+ String.format("jsonExtractIndex(%s,'%s','INT')",
JSON_STRING_SV_COLUMN,
+ "$.intVals[0]"), "$.intVals[0]", DataType.INT, true
+ });
+ testArguments.add(new Object[]{
+ String.format("jsonExtractIndex(%s,'%s','LONG')",
JSON_STRING_SV_COLUMN,
+ "$.longVals[1]"), "$.longVals[1]", DataType.LONG, true
+ });
+ testArguments.add(new Object[]{
+ String.format("jsonExtractIndex(%s,'%s','FLOAT')",
JSON_STRING_SV_COLUMN,
+ "$.floatVals[0]"), "$.floatVals[0]", DataType.FLOAT, true
+ });
+ testArguments.add(new Object[]{
+ String.format("jsonExtractIndex(%s,'%s','DOUBLE')",
JSON_STRING_SV_COLUMN,
+ "$.doubleVals[1]"), "$.doubleVals[1]", DataType.DOUBLE,
true
+ });
+ testArguments.add(new Object[]{
+ String.format("jsonExtractIndex(%s,'%s','BIG_DECIMAL')",
JSON_STRING_SV_COLUMN,
+ "$.bigDecimalVals[0]"), "$.bigDecimalVals[0]",
DataType.BIG_DECIMAL, true
+ });
+ testArguments.add(new Object[]{
+ String.format("jsonExtractIndex(%s,'%s','STRING')",
JSON_STRING_SV_COLUMN,
+ "$.stringVals[1]"), "$.stringVals[1]", DataType.STRING,
true
+ });
return testArguments.toArray(new Object[0][]);
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
index 78925a55e2..d072314d75 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
@@ -29,6 +29,7 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Pattern;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -213,41 +214,11 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
} else {
key = JsonUtils.KEY_SEPARATOR + key;
}
-
- // Process the array index within the key if exists
- // E.g. "[*]"=1 -> "."='1'
- // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
- // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
- // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
- // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' &&
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
- // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' && ".foo..$index"='1'
&& ".foo...bar"='abc'
- RoaringBitmap matchingDocIds = null;
- int leftBracketIndex;
- while ((leftBracketIndex = key.indexOf('[')) >= 0) {
- int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
- Preconditions.checkArgument(rightBracketIndex > 0, "Missing right
bracket in key: %s", key);
-
- String leftPart = key.substring(0, leftBracketIndex);
- String arrayIndex = key.substring(leftBracketIndex + 1,
rightBracketIndex);
- String rightPart = key.substring(rightBracketIndex + 1);
-
- if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
- // "[0]"=1 -> ".$index"='0' && "."='1'
- // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
- String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY +
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
- RoaringBitmap docIds = _postingListMap.get(searchKey);
- if (docIds != null) {
- if (matchingDocIds == null) {
- matchingDocIds = docIds.clone();
- } else {
- matchingDocIds.and(docIds);
- }
- } else {
- return new RoaringBitmap();
- }
- }
-
- key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
+ Pair<String, RoaringBitmap> pair = getKeyAndFlattenDocId(key);
+ key = pair.getLeft();
+ RoaringBitmap matchingDocIds = pair.getRight();
+ if (matchingDocIds != null && matchingDocIds.isEmpty()) {
+ return new RoaringBitmap();
}
Predicate.Type predicateType = predicate.getType();
@@ -384,10 +355,20 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>();
_readLock.lock();
try {
+ Pair<String, RoaringBitmap> result = getKeyAndFlattenDocId(key);
+ key = result.getLeft();
+ RoaringBitmap arrayIndexFlattenDocIds = result.getRight();
+ if (arrayIndexFlattenDocIds != null &&
arrayIndexFlattenDocIds.isEmpty()) {
+ return matchingDocsMap;
+ }
Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
- MutableRoaringBitmap flattenedDocIds =
entry.getValue().toMutableRoaringBitmap();
- PeekableIntIterator it = flattenedDocIds.getIntIterator();
+ RoaringBitmap kvPairFlattenedDocIds = entry.getValue();
+ PeekableIntIterator it = arrayIndexFlattenDocIds == null ?
kvPairFlattenedDocIds.getIntIterator()
+ : RoaringBitmap.and(arrayIndexFlattenDocIds,
kvPairFlattenedDocIds).getIntIterator();
+ if (!it.hasNext()) {
+ continue;
+ }
MutableRoaringBitmap postingList = new MutableRoaringBitmap();
while (it.hasNext()) {
postingList.add(_docIdMapping.getInt(it.next()));
@@ -423,6 +404,51 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
return values;
}
+ /**
+ * If key doesn't contain the array index, return <original key, null
bitmap>
+ * Elif the key, i.e. the json path provided by user doesn't match any
data, return <null, empty bitmap>
+ * Else, return the json path that is generated by replacing array index
with . on the original key
+ * and the associated flattenDocId bitmap
+ */
+ private Pair<String, RoaringBitmap> getKeyAndFlattenDocId(String key) {
+ // Process the array index within the key if exists
+ // E.g. "[*]"=1 -> "."='1'
+ // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
+ // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
+ // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
+ // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' &&
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
+ // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' && ".foo..$index"='1'
&& ".foo...bar"='abc'
+ RoaringBitmap matchingDocIds = null;
+ int leftBracketIndex;
+ while ((leftBracketIndex = key.indexOf('[')) >= 0) {
+ int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
+ Preconditions.checkArgument(rightBracketIndex > 0, "Missing right
bracket in key: %s", key);
+
+ String leftPart = key.substring(0, leftBracketIndex);
+ String arrayIndex = key.substring(leftBracketIndex + 1,
rightBracketIndex);
+ String rightPart = key.substring(rightBracketIndex + 1);
+
+ if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
+ // "[0]"=1 -> ".$index"='0' && "."='1'
+ // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
+ String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY +
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+ RoaringBitmap docIds = _postingListMap.get(searchKey);
+ if (docIds != null) {
+ if (matchingDocIds == null) {
+ matchingDocIds = docIds.clone();
+ } else {
+ matchingDocIds.and(docIds);
+ }
+ } else {
+ return Pair.of(null, new RoaringBitmap());
+ }
+ }
+
+ key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
+ }
+ return Pair.of(key, matchingDocIds);
+ }
+
private Map<String, RoaringBitmap> getMatchingKeysMap(String key) {
return _postingListMap.subMap(key + JsonIndexCreator.KEY_VALUE_SEPARATOR,
false,
key + JsonIndexCreator.KEY_VALUE_SEPARATOR_NEXT_CHAR, false);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
index ee3dc5bcad..d692a978b6 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
@@ -25,6 +25,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -165,94 +166,26 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
"Left-hand side of the predicate must be an identifier, got: %s (%s).
Put double quotes around the identifier"
+ " if needed.", lhs, lhs.getType());
String key = lhs.getIdentifier();
-
- MutableRoaringBitmap matchingDocIds = null;
+ // Support 2 formats:
+ // - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
+ // - Legacy format (e.g. "a[1].b"='abc')
if (_version == BaseJsonIndexCreator.VERSION_2) {
- // Support 2 formats:
- // - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
- // - Legacy format (e.g. "a[1].b"='abc')
- if (key.charAt(0) == '$') {
+ if (key.startsWith("$")) {
key = key.substring(1);
} else {
key = JsonUtils.KEY_SEPARATOR + key;
}
-
- // Process the array index within the key if exists
- // E.g. "[*]"=1 -> "."='1'
- // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
- // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
- // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
- // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' &&
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
- // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' &&
".foo..$index"='1' && ".foo...bar"='abc'
- int leftBracketIndex;
- while ((leftBracketIndex = key.indexOf('[')) >= 0) {
- int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
- Preconditions.checkArgument(rightBracketIndex > 0, "Missing right
bracket in key: %s", key);
-
- String leftPart = key.substring(0, leftBracketIndex);
- String arrayIndex = key.substring(leftBracketIndex + 1,
rightBracketIndex);
- String rightPart = key.substring(rightBracketIndex + 1);
-
- if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
- // "[0]"=1 -> ".$index"='0' && "."='1'
- // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
- String searchKey =
- leftPart + JsonUtils.ARRAY_INDEX_KEY +
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
- int dictId = _dictionary.indexOf(searchKey);
- if (dictId >= 0) {
- ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
- if (matchingDocIds == null) {
- matchingDocIds = docIds.toMutableRoaringBitmap();
- } else {
- matchingDocIds.and(docIds);
- }
- } else {
- return new MutableRoaringBitmap();
- }
- }
-
- key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
- }
} else {
// For V1 backward-compatibility
-
- // Support 2 formats:
- // - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
- // - Legacy format (e.g. "a[1].b"='abc')
if (key.startsWith("$.")) {
key = key.substring(2);
}
-
- // Process the array index within the key if exists
- // E.g. "foo[0].bar[1].foobar"='abc' -> "foo.$index"=0 &&
"foo.bar.$index"=1 && "foo.bar.foobar"='abc'
- int leftBracketIndex;
- while ((leftBracketIndex = key.indexOf('[')) > 0) {
- int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
- Preconditions.checkArgument(rightBracketIndex > 0, "Missing right
bracket in key: %s", key);
-
- String leftPart = key.substring(0, leftBracketIndex);
- String arrayIndex = key.substring(leftBracketIndex + 1,
rightBracketIndex);
- String rightPart = key.substring(rightBracketIndex + 1);
-
- if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
- // "foo[1].bar"='abc' -> "foo.$index"=1 && "foo.bar"='abc'
- String searchKey =
- leftPart + JsonUtils.ARRAY_INDEX_KEY +
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
- int dictId = _dictionary.indexOf(searchKey);
- if (dictId >= 0) {
- ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
- if (matchingDocIds == null) {
- matchingDocIds = docIds.toMutableRoaringBitmap();
- } else {
- matchingDocIds.and(docIds);
- }
- } else {
- return new MutableRoaringBitmap();
- }
- }
-
- key = leftPart + rightPart;
- }
+ }
+ Pair<String, MutableRoaringBitmap> pair = getKeyAndFlattenDocId(key);
+ key = pair.getLeft();
+ MutableRoaringBitmap matchingDocIds = pair.getRight();
+ if (matchingDocIds != null && matchingDocIds.isEmpty()) {
+ return matchingDocIds;
}
Predicate.Type predicateType = predicate.getType();
@@ -387,12 +320,21 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
@Override
public Map<String, RoaringBitmap> getMatchingDocsMap(String key) {
Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>();
+ Pair<String, MutableRoaringBitmap> result = getKeyAndFlattenDocId(key);
+ key = result.getLeft();
+ MutableRoaringBitmap arrayIndexFlattenDocIds = result.getRight();
+ if (arrayIndexFlattenDocIds != null && arrayIndexFlattenDocIds.isEmpty()) {
+ return matchingDocsMap;
+ }
int[] dictIds = getDictIdRangeForKey(key);
-
for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
// get docIds from posting list, convert these to the actual docIds
ImmutableRoaringBitmap flattenedDocIds =
_invertedIndex.getDocIds(dictId);
- PeekableIntIterator it = flattenedDocIds.getIntIterator();
+ PeekableIntIterator it = arrayIndexFlattenDocIds == null ?
flattenedDocIds.getIntIterator()
+ : intersect(arrayIndexFlattenDocIds.clone(), flattenedDocIds);
+ if (!it.hasNext()) {
+ continue;
+ }
RoaringBitmap realDocIds = new RoaringBitmap();
while (it.hasNext()) {
realDocIds.add(getDocId(it.next()));
@@ -448,6 +390,92 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
return new int[]{minDictId, maxDictId};
}
+ /**
+ * If key doesn't contain the array index, return <original key, null
bitmap>
+ * Elif the key, i.e. the json path provided by user doesn't match any
data, return <null, empty bitmap>
+ * Else, return the json path that is generated by replacing array index
with . on the original key
+ * and the associated flattenDocId bitmap
+ */
+ private Pair<String, MutableRoaringBitmap> getKeyAndFlattenDocId(String key)
{
+ MutableRoaringBitmap matchingDocIds = null;
+ if (_version == BaseJsonIndexCreator.VERSION_2) {
+ // Process the array index within the key if exists
+ // E.g. "[*]"=1 -> "."='1'
+ // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
+ // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
+ // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
+ // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' &&
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
+ // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' &&
".foo..$index"='1' && ".foo...bar"='abc'
+ int leftBracketIndex;
+ while ((leftBracketIndex = key.indexOf('[')) >= 0) {
+ int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
+ Preconditions.checkArgument(rightBracketIndex > 0, "Missing right
bracket in key: %s", key);
+
+ String leftPart = key.substring(0, leftBracketIndex);
+ String arrayIndex = key.substring(leftBracketIndex + 1,
rightBracketIndex);
+ String rightPart = key.substring(rightBracketIndex + 1);
+
+ if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
+ // "[0]"=1 -> ".$index"='0' && "."='1'
+ // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
+ String searchKey =
+ leftPart + JsonUtils.ARRAY_INDEX_KEY +
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+ int dictId = _dictionary.indexOf(searchKey);
+ if (dictId >= 0) {
+ ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
+ if (matchingDocIds == null) {
+ matchingDocIds = docIds.toMutableRoaringBitmap();
+ } else {
+ matchingDocIds.and(docIds);
+ }
+ } else {
+ return Pair.of(null, new MutableRoaringBitmap());
+ }
+ }
+
+ key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
+ }
+ } else {
+ // For V1 backward-compatibility
+ // Process the array index within the key if exists
+ // E.g. "foo[0].bar[1].foobar"='abc' -> "foo.$index"=0 &&
"foo.bar.$index"=1 && "foo.bar.foobar"='abc'
+ int leftBracketIndex;
+ while ((leftBracketIndex = key.indexOf('[')) > 0) {
+ int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
+ Preconditions.checkArgument(rightBracketIndex > 0, "Missing right
bracket in key: %s", key);
+
+ String leftPart = key.substring(0, leftBracketIndex);
+ String arrayIndex = key.substring(leftBracketIndex + 1,
rightBracketIndex);
+ String rightPart = key.substring(rightBracketIndex + 1);
+
+ if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
+ // "foo[1].bar"='abc' -> "foo.$index"=1 && "foo.bar"='abc'
+ String searchKey =
+ leftPart + JsonUtils.ARRAY_INDEX_KEY +
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+ int dictId = _dictionary.indexOf(searchKey);
+ if (dictId >= 0) {
+ ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
+ if (matchingDocIds == null) {
+ matchingDocIds = docIds.toMutableRoaringBitmap();
+ } else {
+ matchingDocIds.and(docIds);
+ }
+ } else {
+ return Pair.of(null, new MutableRoaringBitmap());
+ }
+ }
+
+ key = leftPart + rightPart;
+ }
+ }
+ return Pair.of(key, matchingDocIds);
+ }
+
+ private PeekableIntIterator intersect(MutableRoaringBitmap a,
ImmutableRoaringBitmap b) {
+ a.and(b);
+ return a.getIntIterator();
+ }
+
@Override
public void close() {
// NOTE: DO NOT close the PinotDataBuffer here because it is tracked by
the caller and might be reused later. The
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
index 63aa83b0e9..3b357e711a 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
@@ -24,6 +24,7 @@ import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
@@ -451,6 +452,63 @@ public class JsonIndexTest {
createIndex(true, jsonIndexConfig, records);
}
+
+ @Test
+ public void testGetMatchingValDocIdsPairForArrayPath() throws Exception {
+ String[] records = {
+
"{\"foo\":[{\"bar\":[\"x\",\"y\"]},{\"bar\":[\"a\",\"b\"]}],\"foo2\":[\"u\"]}",
+ "{\"foo\":[{\"bar\":[\"y\",\"z\"]}],\"foo2\":[\"u\"]}"
+ };
+ JsonIndexConfig jsonIndexConfig = new JsonIndexConfig();
+
+ createIndex(true, jsonIndexConfig, records);
+ File onHeapIndexFile = new File(INDEX_DIR, ON_HEAP_COLUMN_NAME +
V1Constants.Indexes.JSON_INDEX_FILE_EXTENSION);
+ Assert.assertTrue(onHeapIndexFile.exists());
+
+ createIndex(false, jsonIndexConfig, records);
+ File offHeapIndexFile = new File(INDEX_DIR, OFF_HEAP_COLUMN_NAME +
V1Constants.Indexes.JSON_INDEX_FILE_EXTENSION);
+ Assert.assertTrue(offHeapIndexFile.exists());
+
+ String[] keys = {".foo[0].bar[1]", ".foo[1].bar[0]", ".foo2[0]",
".foo[100].bar[100]", ".foo[0].bar[*]",
+ ".foo[*].bar[0]", ".foo[*].bar[*]"};
+ List<Map<String, RoaringBitmap>> expected = List.of(
+ Map.of("y", RoaringBitmap.bitmapOf(0), "z",
RoaringBitmap.bitmapOf(1)),
+ Map.of("a", RoaringBitmap.bitmapOf(0)),
+ Map.of("u", RoaringBitmap.bitmapOf(0, 1)),
+ Collections.emptyMap(),
+ Map.of("x", RoaringBitmap.bitmapOf(0),
+ "y", RoaringBitmap.bitmapOf(0, 1),
+ "z", RoaringBitmap.bitmapOf(1)),
+ Map.of("x", RoaringBitmap.bitmapOf(0),
+ "a", RoaringBitmap.bitmapOf(0),
+ "y", RoaringBitmap.bitmapOf(1)),
+ Map.of("x", RoaringBitmap.bitmapOf(0),
+ "y", RoaringBitmap.bitmapOf(0, 1),
+ "z", RoaringBitmap.bitmapOf(1),
+ "a", RoaringBitmap.bitmapOf(0),
+ "b", RoaringBitmap.bitmapOf(0))
+ );
+
+ try (PinotDataBuffer onHeapDataBuffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(onHeapIndexFile);
+ PinotDataBuffer offHeapDataBuffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(offHeapIndexFile);
+ JsonIndexReader onHeapIndexReader = new
ImmutableJsonIndexReader(onHeapDataBuffer, records.length);
+ JsonIndexReader offHeapIndexReader = new
ImmutableJsonIndexReader(offHeapDataBuffer, records.length);
+ MutableJsonIndexImpl mutableJsonIndex = new
MutableJsonIndexImpl(jsonIndexConfig)) {
+ for (String record : records) {
+ mutableJsonIndex.add(record);
+ }
+
+ for (int i = 0; i < keys.length; i++) {
+ Map<String, RoaringBitmap> onHeapRes =
onHeapIndexReader.getMatchingDocsMap(keys[i]);
+ Map<String, RoaringBitmap> offHeapRes =
offHeapIndexReader.getMatchingDocsMap(keys[i]);
+ Map<String, RoaringBitmap> mutableRes =
mutableJsonIndex.getMatchingDocsMap(keys[i]);
+ Assert.assertEquals(expected.get(i), onHeapRes);
+ Assert.assertEquals(expected.get(i), offHeapRes);
+ Assert.assertEquals(mutableRes, expected.get(i));
+ }
+ }
+ }
+
public static class ConfTest extends AbstractSerdeIndexContract {
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]