This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new e273264 Fix two id-over-maxId errors in StringDimensionIndexer.
(#10245)
e273264 is described below
commit e273264332f5d1e619c50c6ee9727058ccdfb9b2
Author: Gian Merlino <[email protected]>
AuthorDate: Tue Aug 11 20:32:10 2020 -0700
Fix two id-over-maxId errors in StringDimensionIndexer. (#10245)
1) lookupId could return IDs beyond maxId if called with a recently added
value.
2) getRow could return an ID for null beyond maxId, if null was recently
encountered in a dimension that initially didn't appear at all. (In this
case,
the dictionary ID for null can be > 0).
Also add a comment explaining how this stuff is supposed to work.
---
.../druid/segment/StringDimensionIndexer.java | 30 +++++++++++++++++++---
1 file changed, 26 insertions(+), 4 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index bca0a5c..a6b1219 100644
---
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -51,6 +51,7 @@ import org.apache.druid.segment.filter.BooleanValueMatcher;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRow;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import javax.annotation.Nullable;
import java.util.ArrayList;
@@ -87,7 +88,7 @@ public class StringDimensionIndexer implements
DimensionIndexer<Integer, int[],
public DimensionDictionary()
{
this.lock = new ReentrantReadWriteLock();
- valueToId.defaultReturnValue(-1);
+ valueToId.defaultReturnValue(ABSENT_VALUE_ID);
}
public int getId(@Nullable String value)
@@ -513,11 +514,21 @@ public class StringDimensionIndexer implements
DimensionIndexer<Integer, int[],
final ExtractionFn extractionFn = spec.getExtractionFn();
final int dimIndex = desc.getIndex();
+
+ // maxId is used in concert with getLastRowIndex() in IncrementalIndex to
ensure that callers do not encounter
+ // rows that contain IDs over the initially-reported cardinality. The main
idea is that IncrementalIndex establishes
+ // a watermark at the time a cursor is created, and doesn't allow the
cursor to walk past that watermark.
+ //
+ // Additionally, this selector explicitly blocks knowledge of IDs past
maxId that may occur from other causes
+ // (for example: nulls getting generated for empty arrays, or calls to
lookupId).
final int maxId = getCardinality();
class IndexerDimensionSelector implements DimensionSelector, IdLookup
{
private final ArrayBasedIndexedInts indexedInts = new
ArrayBasedIndexedInts();
+
+ @Nullable
+ @MonotonicNonNull
private int[] nullIdIntArray;
@Override
@@ -542,14 +553,15 @@ public class StringDimensionIndexer implements
DimensionIndexer<Integer, int[],
rowSize = 0;
} else {
final int nullId = getEncodedValue(null, false);
- if (nullId > -1) {
+ if (nullId >= 0 && nullId < maxId) {
+ // null was added to the dictionary before this selector was
created; return its ID.
if (nullIdIntArray == null) {
nullIdIntArray = new int[]{nullId};
}
row = nullIdIntArray;
rowSize = 1;
} else {
- // doesn't contain nullId, then empty array is used
+ // null doesn't exist in the dictionary; return an empty array.
// Choose to use ArrayBasedIndexedInts later, instead of special
"empty" IndexedInts, for monomorphism
row = IntArrays.EMPTY_ARRAY;
rowSize = 0;
@@ -668,6 +680,7 @@ public class StringDimensionIndexer implements
DimensionIndexer<Integer, int[],
public String lookupName(int id)
{
if (id >= maxId) {
+ // Sanity check; IDs beyond maxId should not be known to callers.
(See comment above.)
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
}
final String strValue = getActualValue(id, false);
@@ -695,7 +708,16 @@ public class StringDimensionIndexer implements
DimensionIndexer<Integer, int[],
"cannot perform lookup when applying an extraction function"
);
}
- return getEncodedValue(name, false);
+
+ final int id = getEncodedValue(name, false);
+
+ if (id < maxId) {
+ return id;
+ } else {
+ // Can happen if a value was added to our dimLookup after this
selector was created. Act like it
+ // doesn't exist.
+ return ABSENT_VALUE_ID;
+ }
}
@SuppressWarnings("deprecation")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]