This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new e273264  Fix two id-over-maxId errors in StringDimensionIndexer. 
(#10245)
e273264 is described below

commit e273264332f5d1e619c50c6ee9727058ccdfb9b2
Author: Gian Merlino <[email protected]>
AuthorDate: Tue Aug 11 20:32:10 2020 -0700

    Fix two id-over-maxId errors in StringDimensionIndexer. (#10245)
    
    1) lookupId could return IDs beyond maxId if called with a recently added 
value.
    2) getRow could return an ID for null beyond maxId, if null was recently
       encountered in a dimension that initially didn't appear at all. (In this 
case,
       the dictionary ID for null can be > 0).
    
    Also add a comment explaining how this stuff is supposed to work.
---
 .../druid/segment/StringDimensionIndexer.java      | 30 +++++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index bca0a5c..a6b1219 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -51,6 +51,7 @@ import org.apache.druid.segment.filter.BooleanValueMatcher;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexRow;
 import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
 
 import javax.annotation.Nullable;
 import java.util.ArrayList;
@@ -87,7 +88,7 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
     public DimensionDictionary()
     {
       this.lock = new ReentrantReadWriteLock();
-      valueToId.defaultReturnValue(-1);
+      valueToId.defaultReturnValue(ABSENT_VALUE_ID);
     }
 
     public int getId(@Nullable String value)
@@ -513,11 +514,21 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
     final ExtractionFn extractionFn = spec.getExtractionFn();
 
     final int dimIndex = desc.getIndex();
+
+    // maxId is used in concert with getLastRowIndex() in IncrementalIndex to 
ensure that callers do not encounter
+    // rows that contain IDs over the initially-reported cardinality. The main 
idea is that IncrementalIndex establishes
+    // a watermark at the time a cursor is created, and doesn't allow the 
cursor to walk past that watermark.
+    //
+    // Additionally, this selector explicitly blocks knowledge of IDs past 
maxId that may occur from other causes
+    // (for example: nulls getting generated for empty arrays, or calls to 
lookupId).
     final int maxId = getCardinality();
 
     class IndexerDimensionSelector implements DimensionSelector, IdLookup
     {
       private final ArrayBasedIndexedInts indexedInts = new 
ArrayBasedIndexedInts();
+
+      @Nullable
+      @MonotonicNonNull
       private int[] nullIdIntArray;
 
       @Override
@@ -542,14 +553,15 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
             rowSize = 0;
           } else {
             final int nullId = getEncodedValue(null, false);
-            if (nullId > -1) {
+            if (nullId >= 0 && nullId < maxId) {
+              // null was added to the dictionary before this selector was 
created; return its ID.
               if (nullIdIntArray == null) {
                 nullIdIntArray = new int[]{nullId};
               }
               row = nullIdIntArray;
               rowSize = 1;
             } else {
-              // doesn't contain nullId, then empty array is used
+              // null doesn't exist in the dictionary; return an empty array.
               // Choose to use ArrayBasedIndexedInts later, instead of special 
"empty" IndexedInts, for monomorphism
               row = IntArrays.EMPTY_ARRAY;
               rowSize = 0;
@@ -668,6 +680,7 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
       public String lookupName(int id)
       {
         if (id >= maxId) {
+          // Sanity check; IDs beyond maxId should not be known to callers. 
(See comment above.)
           throw new ISE("id[%d] >= maxId[%d]", id, maxId);
         }
         final String strValue = getActualValue(id, false);
@@ -695,7 +708,16 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
               "cannot perform lookup when applying an extraction function"
           );
         }
-        return getEncodedValue(name, false);
+
+        final int id = getEncodedValue(name, false);
+
+        if (id < maxId) {
+          return id;
+        } else {
+          // Can happen if a value was added to our dimLookup after this 
selector was created. Act like it
+          // doesn't exist.
+          return ABSENT_VALUE_ID;
+        }
       }
 
       @SuppressWarnings("deprecation")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to