This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new f25f889230 Extract json individual array elements from json index for 
the transform function jsonExtractIndex (#12466)
f25f889230 is described below

commit f25f889230741cf9a1f6837206bc27b95836c966
Author: Xuanyi Li <[email protected]>
AuthorDate: Tue Mar 12 12:01:26 2024 -0700

    Extract json individual array elements from json index for the transform 
function jsonExtractIndex (#12466)
---
 .../JsonExtractIndexTransformFunction.java         |   5 +
 .../function/BaseTransformFunctionTest.java        |   9 +-
 .../JsonExtractIndexTransformFunctionTest.java     |  25 +++
 .../realtime/impl/json/MutableJsonIndexImpl.java   | 100 +++++++----
 .../readers/json/ImmutableJsonIndexReader.java     | 188 ++++++++++++---------
 .../segment/local/segment/index/JsonIndexTest.java |  58 +++++++
 6 files changed, 265 insertions(+), 120 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
index 46441a3155..fac2ab5fdb 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
@@ -90,9 +90,14 @@ public class JsonExtractIndexTransformFunction extends 
BaseTransformFunction {
     }
     String resultsType = ((LiteralTransformFunction) 
thirdArgument).getStringLiteral().toUpperCase();
     boolean isSingleValue = !resultsType.endsWith("_ARRAY");
+    // TODO: will support array type; the underlying 
jsonIndexReader.getMatchingDocsMap supports the json path [*]
     if (!isSingleValue) {
       throw new IllegalArgumentException("jsonExtractIndex only supports 
single value type");
     }
+    if (isSingleValue && inputJsonPath.contains("[*]")) {
+      throw new IllegalArgumentException("[*] syntax in json path is 
unsupported as json_extract_index"
+          + "currently does not support returning array types");
+    }
     DataType dataType = DataType.valueOf(resultsType);
 
     if (arguments.size() == 4) {
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
index 2c685a82bc..5ad67f5681 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
@@ -24,7 +24,6 @@ import java.sql.Timestamp;
 import java.text.DecimalFormat;
 import java.text.DecimalFormatSymbols;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
@@ -116,6 +115,7 @@ public abstract class BaseTransformFunctionTest {
   protected final BigDecimal[] _bigDecimalSVValues = new BigDecimal[NUM_ROWS];
   protected final String[] _stringSVValues = new String[NUM_ROWS];
   protected final String[] _jsonSVValues = new String[NUM_ROWS];
+  protected final String[] _jsonArrayValues = new String[NUM_ROWS];
   protected final String[] _stringAlphaNumericSVValues = new String[NUM_ROWS];
   protected final byte[][] _bytesSVValues = new byte[NUM_ROWS][];
   protected final int[][] _intMVValues = new int[NUM_ROWS][];
@@ -155,7 +155,10 @@ public abstract class BaseTransformFunctionTest {
       _stringSVValues[i] = df.format(_intSVValues[i] * RANDOM.nextDouble());
       _jsonSVValues[i] = String.format(
           "{\"intVal\":%s, \"longVal\":%s, \"floatVal\":%s, \"doubleVal\":%s, 
\"bigDecimalVal\":%s, "
-              + "\"stringVal\":\"%s\"}", RANDOM.nextInt(), RANDOM.nextLong(), 
RANDOM.nextFloat(), RANDOM.nextDouble(),
+              + "\"stringVal\":\"%s\", "
+              + "\"intVals\":[0,1], \"longVals\":[0,1], 
\"floatVals\":[0.0,1.0], \"doubleVals\":[0.0,1.0], "
+              + "\"bigDecimalVals\":[0.0,1.0], \"stringVals\":[\"0\",\"1\"]}",
+              RANDOM.nextInt(), RANDOM.nextLong(), RANDOM.nextFloat(), 
RANDOM.nextDouble(),
           
BigDecimal.valueOf(RANDOM.nextDouble()).multiply(BigDecimal.valueOf(RANDOM.nextInt())),
           df.format(RANDOM.nextInt() * RANDOM.nextDouble()));
       _stringAlphaNumericSVValues[i] = 
RandomStringUtils.randomAlphanumeric(26);
@@ -302,7 +305,7 @@ public abstract class BaseTransformFunctionTest {
         .addTime(new TimeGranularitySpec(FieldSpec.DataType.LONG, 
TimeUnit.MILLISECONDS, TIME_COLUMN), null).build();
     TableConfig tableConfig =
         new 
TableConfigBuilder(TableType.OFFLINE).setTableName("test").setTimeColumnName(TIME_COLUMN)
-            
.setJsonIndexColumns(Collections.singletonList(JSON_STRING_SV_COLUMN))
+            .setJsonIndexColumns(List.of(JSON_STRING_SV_COLUMN))
             .setNullHandlingEnabled(true).build();
 
     SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, 
schema);
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
index c7510bea58..6704c303f3 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
@@ -125,6 +125,31 @@ public class JsonExtractIndexTransformFunctionTest extends 
BaseTransformFunction
         String.format("jsonExtractIndex(%s,'%s','STRING')", 
JSON_STRING_SV_COLUMN,
             "$.stringVal"), "$.stringVal", DataType.STRING, true
     });
+
+    testArguments.add(new Object[]{
+            String.format("jsonExtractIndex(%s,'%s','INT')", 
JSON_STRING_SV_COLUMN,
+                    "$.intVals[0]"), "$.intVals[0]", DataType.INT, true
+    });
+    testArguments.add(new Object[]{
+            String.format("jsonExtractIndex(%s,'%s','LONG')", 
JSON_STRING_SV_COLUMN,
+                    "$.longVals[1]"), "$.longVals[1]", DataType.LONG, true
+    });
+    testArguments.add(new Object[]{
+            String.format("jsonExtractIndex(%s,'%s','FLOAT')", 
JSON_STRING_SV_COLUMN,
+                    "$.floatVals[0]"), "$.floatVals[0]", DataType.FLOAT, true
+    });
+    testArguments.add(new Object[]{
+            String.format("jsonExtractIndex(%s,'%s','DOUBLE')", 
JSON_STRING_SV_COLUMN,
+                    "$.doubleVals[1]"), "$.doubleVals[1]", DataType.DOUBLE, 
true
+    });
+    testArguments.add(new Object[]{
+            String.format("jsonExtractIndex(%s,'%s','BIG_DECIMAL')", 
JSON_STRING_SV_COLUMN,
+                    "$.bigDecimalVals[0]"), "$.bigDecimalVals[0]", 
DataType.BIG_DECIMAL, true
+    });
+    testArguments.add(new Object[]{
+            String.format("jsonExtractIndex(%s,'%s','STRING')", 
JSON_STRING_SV_COLUMN,
+                    "$.stringVals[1]"), "$.stringVals[1]", DataType.STRING, 
true
+    });
     return testArguments.toArray(new Object[0][]);
   }
 
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
index 78925a55e2..d072314d75 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
@@ -29,6 +29,7 @@ import java.util.Map;
 import java.util.TreeMap;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.regex.Pattern;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.request.context.FilterContext;
 import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -213,41 +214,11 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
     } else {
       key = JsonUtils.KEY_SEPARATOR + key;
     }
-
-    // Process the array index within the key if exists
-    // E.g. "[*]"=1 -> "."='1'
-    // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
-    // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
-    // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
-    // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' && 
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
-    // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' && ".foo..$index"='1' 
&& ".foo...bar"='abc'
-    RoaringBitmap matchingDocIds = null;
-    int leftBracketIndex;
-    while ((leftBracketIndex = key.indexOf('[')) >= 0) {
-      int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
-      Preconditions.checkArgument(rightBracketIndex > 0, "Missing right 
bracket in key: %s", key);
-
-      String leftPart = key.substring(0, leftBracketIndex);
-      String arrayIndex = key.substring(leftBracketIndex + 1, 
rightBracketIndex);
-      String rightPart = key.substring(rightBracketIndex + 1);
-
-      if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
-        // "[0]"=1 -> ".$index"='0' && "."='1'
-        // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
-        String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY + 
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
-        RoaringBitmap docIds = _postingListMap.get(searchKey);
-        if (docIds != null) {
-          if (matchingDocIds == null) {
-            matchingDocIds = docIds.clone();
-          } else {
-            matchingDocIds.and(docIds);
-          }
-        } else {
-          return new RoaringBitmap();
-        }
-      }
-
-      key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
+    Pair<String, RoaringBitmap> pair = getKeyAndFlattenDocId(key);
+    key = pair.getLeft();
+    RoaringBitmap matchingDocIds = pair.getRight();
+    if (matchingDocIds != null && matchingDocIds.isEmpty()) {
+      return new RoaringBitmap();
     }
 
     Predicate.Type predicateType = predicate.getType();
@@ -384,10 +355,20 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
     Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>();
     _readLock.lock();
     try {
+      Pair<String, RoaringBitmap> result = getKeyAndFlattenDocId(key);
+      key = result.getLeft();
+      RoaringBitmap arrayIndexFlattenDocIds = result.getRight();
+      if (arrayIndexFlattenDocIds != null && 
arrayIndexFlattenDocIds.isEmpty()) {
+        return matchingDocsMap;
+      }
       Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
       for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
-        MutableRoaringBitmap flattenedDocIds = 
entry.getValue().toMutableRoaringBitmap();
-        PeekableIntIterator it = flattenedDocIds.getIntIterator();
+        RoaringBitmap kvPairFlattenedDocIds = entry.getValue();
+        PeekableIntIterator it = arrayIndexFlattenDocIds == null ? 
kvPairFlattenedDocIds.getIntIterator()
+                : RoaringBitmap.and(arrayIndexFlattenDocIds, 
kvPairFlattenedDocIds).getIntIterator();
+        if (!it.hasNext()) {
+          continue;
+        }
         MutableRoaringBitmap postingList = new MutableRoaringBitmap();
         while (it.hasNext()) {
           postingList.add(_docIdMapping.getInt(it.next()));
@@ -423,6 +404,51 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
     return values;
   }
 
+  /**
+   *  If key doesn't contain the array index, return <original key, null 
bitmap>
+   *  Elif the key, i.e. the json path provided by user doesn't match any 
data, return <null, empty bitmap>
+   *  Else, return the json path that is generated by replacing array index 
with . on the original key
+   *  and the associated flattenDocId bitmap
+   */
+  private Pair<String, RoaringBitmap> getKeyAndFlattenDocId(String key) {
+    // Process the array index within the key if exists
+    // E.g. "[*]"=1 -> "."='1'
+    // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
+    // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
+    // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
+    // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' && 
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
+    // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' && ".foo..$index"='1' 
&& ".foo...bar"='abc'
+    RoaringBitmap matchingDocIds = null;
+    int leftBracketIndex;
+    while ((leftBracketIndex = key.indexOf('[')) >= 0) {
+      int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
+      Preconditions.checkArgument(rightBracketIndex > 0, "Missing right 
bracket in key: %s", key);
+
+      String leftPart = key.substring(0, leftBracketIndex);
+      String arrayIndex = key.substring(leftBracketIndex + 1, 
rightBracketIndex);
+      String rightPart = key.substring(rightBracketIndex + 1);
+
+      if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
+        // "[0]"=1 -> ".$index"='0' && "."='1'
+        // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
+        String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY + 
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+        RoaringBitmap docIds = _postingListMap.get(searchKey);
+        if (docIds != null) {
+          if (matchingDocIds == null) {
+            matchingDocIds = docIds.clone();
+          } else {
+            matchingDocIds.and(docIds);
+          }
+        } else {
+          return Pair.of(null, new RoaringBitmap());
+        }
+      }
+
+      key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
+    }
+    return Pair.of(key, matchingDocIds);
+  }
+
   private Map<String, RoaringBitmap> getMatchingKeysMap(String key) {
     return _postingListMap.subMap(key + JsonIndexCreator.KEY_VALUE_SEPARATOR, 
false,
         key + JsonIndexCreator.KEY_VALUE_SEPARATOR_NEXT_CHAR, false);
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
index ee3dc5bcad..d692a978b6 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
@@ -25,6 +25,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.request.context.FilterContext;
 import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -165,94 +166,26 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
         "Left-hand side of the predicate must be an identifier, got: %s (%s). 
Put double quotes around the identifier"
             + " if needed.", lhs, lhs.getType());
     String key = lhs.getIdentifier();
-
-    MutableRoaringBitmap matchingDocIds = null;
+    // Support 2 formats:
+    // - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
+    // - Legacy format (e.g. "a[1].b"='abc')
     if (_version == BaseJsonIndexCreator.VERSION_2) {
-      // Support 2 formats:
-      // - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
-      // - Legacy format (e.g. "a[1].b"='abc')
-      if (key.charAt(0) == '$') {
+      if (key.startsWith("$")) {
         key = key.substring(1);
       } else {
         key = JsonUtils.KEY_SEPARATOR + key;
       }
-
-      // Process the array index within the key if exists
-      // E.g. "[*]"=1 -> "."='1'
-      // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
-      // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
-      // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
-      // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' && 
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
-      // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' && 
".foo..$index"='1' && ".foo...bar"='abc'
-      int leftBracketIndex;
-      while ((leftBracketIndex = key.indexOf('[')) >= 0) {
-        int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
-        Preconditions.checkArgument(rightBracketIndex > 0, "Missing right 
bracket in key: %s", key);
-
-        String leftPart = key.substring(0, leftBracketIndex);
-        String arrayIndex = key.substring(leftBracketIndex + 1, 
rightBracketIndex);
-        String rightPart = key.substring(rightBracketIndex + 1);
-
-        if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
-          // "[0]"=1 -> ".$index"='0' && "."='1'
-          // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
-          String searchKey =
-              leftPart + JsonUtils.ARRAY_INDEX_KEY + 
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
-          int dictId = _dictionary.indexOf(searchKey);
-          if (dictId >= 0) {
-            ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
-            if (matchingDocIds == null) {
-              matchingDocIds = docIds.toMutableRoaringBitmap();
-            } else {
-              matchingDocIds.and(docIds);
-            }
-          } else {
-            return new MutableRoaringBitmap();
-          }
-        }
-
-        key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
-      }
     } else {
       // For V1 backward-compatibility
-
-      // Support 2 formats:
-      // - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
-      // - Legacy format (e.g. "a[1].b"='abc')
       if (key.startsWith("$.")) {
         key = key.substring(2);
       }
-
-      // Process the array index within the key if exists
-      // E.g. "foo[0].bar[1].foobar"='abc' -> "foo.$index"=0 && 
"foo.bar.$index"=1 && "foo.bar.foobar"='abc'
-      int leftBracketIndex;
-      while ((leftBracketIndex = key.indexOf('[')) > 0) {
-        int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
-        Preconditions.checkArgument(rightBracketIndex > 0, "Missing right 
bracket in key: %s", key);
-
-        String leftPart = key.substring(0, leftBracketIndex);
-        String arrayIndex = key.substring(leftBracketIndex + 1, 
rightBracketIndex);
-        String rightPart = key.substring(rightBracketIndex + 1);
-
-        if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
-          // "foo[1].bar"='abc' -> "foo.$index"=1 && "foo.bar"='abc'
-          String searchKey =
-              leftPart + JsonUtils.ARRAY_INDEX_KEY + 
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
-          int dictId = _dictionary.indexOf(searchKey);
-          if (dictId >= 0) {
-            ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
-            if (matchingDocIds == null) {
-              matchingDocIds = docIds.toMutableRoaringBitmap();
-            } else {
-              matchingDocIds.and(docIds);
-            }
-          } else {
-            return new MutableRoaringBitmap();
-          }
-        }
-
-        key = leftPart + rightPart;
-      }
+    }
+    Pair<String, MutableRoaringBitmap> pair = getKeyAndFlattenDocId(key);
+    key = pair.getLeft();
+    MutableRoaringBitmap matchingDocIds = pair.getRight();
+    if (matchingDocIds != null && matchingDocIds.isEmpty()) {
+      return matchingDocIds;
     }
 
     Predicate.Type predicateType = predicate.getType();
@@ -387,12 +320,21 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
   @Override
   public Map<String, RoaringBitmap> getMatchingDocsMap(String key) {
     Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>();
+    Pair<String, MutableRoaringBitmap> result = getKeyAndFlattenDocId(key);
+    key = result.getLeft();
+    MutableRoaringBitmap arrayIndexFlattenDocIds = result.getRight();
+    if (arrayIndexFlattenDocIds != null && arrayIndexFlattenDocIds.isEmpty()) {
+      return matchingDocsMap;
+    }
     int[] dictIds = getDictIdRangeForKey(key);
-
     for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
       // get docIds from posting list, convert these to the actual docIds
       ImmutableRoaringBitmap flattenedDocIds = 
_invertedIndex.getDocIds(dictId);
-      PeekableIntIterator it = flattenedDocIds.getIntIterator();
+      PeekableIntIterator it = arrayIndexFlattenDocIds == null ? 
flattenedDocIds.getIntIterator()
+              : intersect(arrayIndexFlattenDocIds.clone(), flattenedDocIds);
+      if (!it.hasNext()) {
+        continue;
+      }
       RoaringBitmap realDocIds = new RoaringBitmap();
       while (it.hasNext()) {
         realDocIds.add(getDocId(it.next()));
@@ -448,6 +390,92 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
     return new int[]{minDictId, maxDictId};
   }
 
+  /**
+   *  If key doesn't contain the array index, return <original key, null 
bitmap>
+   *  Elif the key, i.e. the json path provided by user doesn't match any 
data, return <null, empty bitmap>
+   *  Else, return the json path that is generated by replacing array index 
with . on the original key
+   *  and the associated flattenDocId bitmap
+   */
+  private Pair<String, MutableRoaringBitmap> getKeyAndFlattenDocId(String key) 
{
+    MutableRoaringBitmap matchingDocIds = null;
+    if (_version == BaseJsonIndexCreator.VERSION_2) {
+      // Process the array index within the key if exists
+      // E.g. "[*]"=1 -> "."='1'
+      // E.g. "[0]"=1 -> ".$index"='0' && "."='1'
+      // E.g. "[0][1]"=1 -> ".$index"='0' && "..$index"='1' && ".."='1'
+      // E.g. ".foo[*].bar[*].foobar"='abc' -> ".foo..bar..foobar"='abc'
+      // E.g. ".foo[0].bar[1].foobar"='abc' -> ".foo.$index"='0' && 
".foo..bar.$index"='1' && ".foo..bar..foobar"='abc'
+      // E.g. ".foo[0][1].bar"='abc' -> ".foo.$index"='0' && 
".foo..$index"='1' && ".foo...bar"='abc'
+      int leftBracketIndex;
+      while ((leftBracketIndex = key.indexOf('[')) >= 0) {
+        int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
+        Preconditions.checkArgument(rightBracketIndex > 0, "Missing right 
bracket in key: %s", key);
+
+        String leftPart = key.substring(0, leftBracketIndex);
+        String arrayIndex = key.substring(leftBracketIndex + 1, 
rightBracketIndex);
+        String rightPart = key.substring(rightBracketIndex + 1);
+
+        if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
+          // "[0]"=1 -> ".$index"='0' && "."='1'
+          // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
+          String searchKey =
+                  leftPart + JsonUtils.ARRAY_INDEX_KEY + 
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+          int dictId = _dictionary.indexOf(searchKey);
+          if (dictId >= 0) {
+            ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
+            if (matchingDocIds == null) {
+              matchingDocIds = docIds.toMutableRoaringBitmap();
+            } else {
+              matchingDocIds.and(docIds);
+            }
+          } else {
+            return Pair.of(null, new MutableRoaringBitmap());
+          }
+        }
+
+        key = leftPart + JsonUtils.KEY_SEPARATOR + rightPart;
+      }
+    } else {
+      // For V1 backward-compatibility
+      // Process the array index within the key if exists
+      // E.g. "foo[0].bar[1].foobar"='abc' -> "foo.$index"=0 && 
"foo.bar.$index"=1 && "foo.bar.foobar"='abc'
+      int leftBracketIndex;
+      while ((leftBracketIndex = key.indexOf('[')) > 0) {
+        int rightBracketIndex = key.indexOf(']', leftBracketIndex + 2);
+        Preconditions.checkArgument(rightBracketIndex > 0, "Missing right 
bracket in key: %s", key);
+
+        String leftPart = key.substring(0, leftBracketIndex);
+        String arrayIndex = key.substring(leftBracketIndex + 1, 
rightBracketIndex);
+        String rightPart = key.substring(rightBracketIndex + 1);
+
+        if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
+          // "foo[1].bar"='abc' -> "foo.$index"=1 && "foo.bar"='abc'
+          String searchKey =
+                  leftPart + JsonUtils.ARRAY_INDEX_KEY + 
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+          int dictId = _dictionary.indexOf(searchKey);
+          if (dictId >= 0) {
+            ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
+            if (matchingDocIds == null) {
+              matchingDocIds = docIds.toMutableRoaringBitmap();
+            } else {
+              matchingDocIds.and(docIds);
+            }
+          } else {
+            return Pair.of(null, new MutableRoaringBitmap());
+          }
+        }
+
+        key = leftPart + rightPart;
+      }
+    }
+    return Pair.of(key, matchingDocIds);
+  }
+
+  private PeekableIntIterator intersect(MutableRoaringBitmap a, 
ImmutableRoaringBitmap b) {
+    a.and(b);
+    return a.getIntIterator();
+  }
+
   @Override
   public void close() {
     // NOTE: DO NOT close the PinotDataBuffer here because it is tracked by 
the caller and might be reused later. The
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
index 63aa83b0e9..3b357e711a 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
@@ -24,6 +24,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 import org.apache.commons.io.FileUtils;
@@ -451,6 +452,63 @@ public class JsonIndexTest {
     createIndex(true, jsonIndexConfig, records);
   }
 
+
+  @Test
+  public void testGetMatchingValDocIdsPairForArrayPath() throws Exception {
+    String[] records = {
+            
"{\"foo\":[{\"bar\":[\"x\",\"y\"]},{\"bar\":[\"a\",\"b\"]}],\"foo2\":[\"u\"]}",
+            "{\"foo\":[{\"bar\":[\"y\",\"z\"]}],\"foo2\":[\"u\"]}"
+    };
+    JsonIndexConfig jsonIndexConfig = new JsonIndexConfig();
+
+    createIndex(true, jsonIndexConfig, records);
+    File onHeapIndexFile = new File(INDEX_DIR, ON_HEAP_COLUMN_NAME + 
V1Constants.Indexes.JSON_INDEX_FILE_EXTENSION);
+    Assert.assertTrue(onHeapIndexFile.exists());
+
+    createIndex(false, jsonIndexConfig, records);
+    File offHeapIndexFile = new File(INDEX_DIR, OFF_HEAP_COLUMN_NAME + 
V1Constants.Indexes.JSON_INDEX_FILE_EXTENSION);
+    Assert.assertTrue(offHeapIndexFile.exists());
+
+    String[] keys = {".foo[0].bar[1]", ".foo[1].bar[0]", ".foo2[0]", 
".foo[100].bar[100]", ".foo[0].bar[*]",
+            ".foo[*].bar[0]", ".foo[*].bar[*]"};
+    List<Map<String, RoaringBitmap>> expected = List.of(
+            Map.of("y", RoaringBitmap.bitmapOf(0), "z", 
RoaringBitmap.bitmapOf(1)),
+            Map.of("a", RoaringBitmap.bitmapOf(0)),
+            Map.of("u", RoaringBitmap.bitmapOf(0, 1)),
+            Collections.emptyMap(),
+            Map.of("x", RoaringBitmap.bitmapOf(0),
+                    "y", RoaringBitmap.bitmapOf(0, 1),
+                    "z", RoaringBitmap.bitmapOf(1)),
+            Map.of("x", RoaringBitmap.bitmapOf(0),
+                    "a", RoaringBitmap.bitmapOf(0),
+                    "y", RoaringBitmap.bitmapOf(1)),
+            Map.of("x", RoaringBitmap.bitmapOf(0),
+                    "y", RoaringBitmap.bitmapOf(0, 1),
+                    "z", RoaringBitmap.bitmapOf(1),
+                    "a", RoaringBitmap.bitmapOf(0),
+                    "b", RoaringBitmap.bitmapOf(0))
+    );
+
+    try (PinotDataBuffer onHeapDataBuffer = 
PinotDataBuffer.mapReadOnlyBigEndianFile(onHeapIndexFile);
+         PinotDataBuffer offHeapDataBuffer = 
PinotDataBuffer.mapReadOnlyBigEndianFile(offHeapIndexFile);
+         JsonIndexReader onHeapIndexReader = new 
ImmutableJsonIndexReader(onHeapDataBuffer, records.length);
+         JsonIndexReader offHeapIndexReader = new 
ImmutableJsonIndexReader(offHeapDataBuffer, records.length);
+         MutableJsonIndexImpl mutableJsonIndex = new 
MutableJsonIndexImpl(jsonIndexConfig)) {
+      for (String record : records) {
+        mutableJsonIndex.add(record);
+      }
+
+      for (int i = 0; i < keys.length; i++) {
+        Map<String, RoaringBitmap> onHeapRes = 
onHeapIndexReader.getMatchingDocsMap(keys[i]);
+        Map<String, RoaringBitmap> offHeapRes = 
offHeapIndexReader.getMatchingDocsMap(keys[i]);
+        Map<String, RoaringBitmap> mutableRes = 
mutableJsonIndex.getMatchingDocsMap(keys[i]);
+        Assert.assertEquals(expected.get(i), onHeapRes);
+        Assert.assertEquals(expected.get(i), offHeapRes);
+        Assert.assertEquals(mutableRes, expected.get(i));
+      }
+    }
+  }
+
   public static class ConfTest extends AbstractSerdeIndexContract {
 
     @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to