Hello. Might there be a min-versus-max copy/paste mistake in getMaxPackedValue?
I will try next to see if that fixes recent org.apache.lucene.index.TestDuelingCodecs test failures. Christine ----- Original Message ----- From: dev@lucene.apache.org To: comm...@lucene.apache.org At: Jan 8 2016 10:52:29 Author: mikemccand Date: Fri Jan 8 10:52:15 2016 New Revision: 1723682 URL: http://svn.apache.org/viewvc?rev=1723682&view=rev Log: LUCENE-6962: add min/max per dimension to dimensional values Modified: lucene/dev/trunk/lucene/CHANGES.txt lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java Modified: lucene/dev/trunk/lucene/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/CHANGES.txt (original) +++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jan 8 10:52:15 2016 @@ -55,6 +55,9 @@ New Features * LUCENE-6837: Add N-best output support to JapaneseTokenizer. (Hiroharu Konno via Christian Moen) +* LUCENE-6962: Add per-dimension min/max to dimensional values + (Mike McCandless) + API Changes * LUCENE-3312: The API of oal.document was restructured to Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java (original) +++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java Fri Jan 8 10:52:15 2016 @@ -33,8 +33,9 @@ import static org.apache.lucene.codecs.s class SimpleTextBKDReader extends BKDReader { - public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException { - super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues); + public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, + byte[] minPackedValue, byte[] maxPackedValue) throws IOException { + super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue); } @Override Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java (original) +++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java Fri Jan 8 10:52:15 2016 @@ -43,6 +43,8 @@ import static org.apache.lucene.codecs.s import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.FIELD_FP_NAME; import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.INDEX_COUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_LEAF_POINTS; +import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_VALUE; +import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MIN_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.NUM_DIMS; import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_COUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_DIM; @@ -89,6 +91,17 @@ class SimpleTextDimensionalReader extend readLine(dataIn); int count = parseInt(INDEX_COUNT); + + readLine(dataIn); + assert startsWith(MIN_VALUE); + BytesRef minValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MIN_VALUE)); + assert minValue.length == numDims*bytesPerDim; + + readLine(dataIn); + assert startsWith(MAX_VALUE); + BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE)); + assert maxValue.length == numDims*bytesPerDim; + long[] leafBlockFPs = new long[count]; for(int i=0;i<count;i++) { readLine(dataIn); @@ -108,7 +121,7 @@ class SimpleTextDimensionalReader extend System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim); } - return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues); + return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes); } private void readLine(IndexInput in) throws IOException { @@ -133,17 +146,21 @@ class SimpleTextDimensionalReader extend return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8); } - /** Finds all documents and points matching the provided visitor */ - @Override - public void intersect(String field, IntersectVisitor visitor) throws IOException { - FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field); + private BKDReader getBKDReader(String fieldName) { + FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName); if (fieldInfo == null) { - throw new IllegalArgumentException("field=\"" + field + "\" is unrecognized"); + throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized"); } if (fieldInfo.getDimensionCount() == 0) { - throw new IllegalArgumentException("field=\"" + field + "\" did not index dimensional values"); + throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values"); } - BKDReader bkdReader = readers.get(field); + return readers.get(fieldName); + } + + /** Finds all documents and points matching the provided visitor */ + @Override + public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { + BKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index dimensional values in the past, but // now all docs having this dimensional field were deleted in this segment: @@ -188,4 +205,48 @@ class SimpleTextDimensionalReader extend public String toString() { return "SimpleTextDimensionalReader(segment=" + readState.segmentInfo.name + " maxDoc=" + readState.segmentInfo.maxDoc() + ")"; } + + @Override + public byte[] getMinPackedValue(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return null; + } + return bkdReader.getMinPackedValue(); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return null; + } + return bkdReader.getMinPackedValue(); + } + + @Override + public int getNumDimensions(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return 0; + } + return bkdReader.getNumDimensions(); + } + + @Override + public int getBytesPerDimension(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return 0; + } + return bkdReader.getBytesPerDimension(); + } } Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java (original) +++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java Fri Jan 8 10:52:15 2016 @@ -49,6 +49,8 @@ class SimpleTextDimensionalWriter extend final static BytesRef FIELD_COUNT = new BytesRef("field count "); final static BytesRef FIELD_FP_NAME = new BytesRef(" field fp name "); final static BytesRef FIELD_FP = new BytesRef(" field fp "); + final static BytesRef MIN_VALUE = new BytesRef("min value "); + final static BytesRef MAX_VALUE = new BytesRef("max value "); private IndexOutput dataOut; final BytesRefBuilder scratch = new BytesRefBuilder(); @@ -90,6 +92,14 @@ class SimpleTextDimensionalWriter extend writeInt(out, leafBlockFPs.length); newline(out); + write(out, MIN_VALUE); + BytesRef br = new BytesRef(minPackedValue, 0, minPackedValue.length); + write(out, br.toString()); + + write(out, MAX_VALUE); + br = new BytesRef(maxPackedValue, 0, maxPackedValue.length); + write(out, br.toString()); + for(int i=0;i<leafBlockFPs.length;i++) { write(out, BLOCK_FP); writeLong(out, leafBlockFPs[i]); @@ -109,7 +119,7 @@ class SimpleTextDimensionalWriter extend writeInt(out, splitPackedValues[i * (1 + fieldInfo.getDimensionNumBytes())] & 0xff); newline(out); write(out, SPLIT_VALUE); - BytesRef br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes()); + br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes()); write(out, br.toString()); newline(out); } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java Fri Jan 8 10:52:15 2016 @@ -75,6 +75,26 @@ public abstract class DimensionalFormat public void intersect(String fieldName, IntersectVisitor visitor) { throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values"); } + + @Override + public byte[] getMinPackedValue(String fieldName) { + throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values"); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) { + throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values"); + } + + @Override + public int getNumDimensions(String fieldName) { + throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values"); + } + + @Override + public int getBytesPerDimension(String fieldName) { + throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with dimensional values"); + } }; } }; Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java Fri Jan 8 10:52:15 2016 @@ -95,6 +95,26 @@ public abstract class DimensionalWriter @Override public void close() { } + + @Override + public byte[] getMinPackedValue(String fieldName) { + throw new UnsupportedOperationException(); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) { + throw new UnsupportedOperationException(); + } + + @Override + public int getNumDimensions(String fieldName) { + throw new UnsupportedOperationException(); + } + + @Override + public int getBytesPerDimension(String fieldName) { + throw new UnsupportedOperationException(); + } }); } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java Fri Jan 8 10:52:15 2016 @@ -84,24 +84,29 @@ public class Lucene60DimensionalReader e } } - @Override - public void intersect(String field, IntersectVisitor visitor) throws IOException { - FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field); + private BKDReader getBKDReader(String fieldName) { + FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName); if (fieldInfo == null) { - throw new IllegalArgumentException("field=\"" + field + "\" is unrecognized"); + throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized"); } if (fieldInfo.getDimensionCount() == 0) { - throw new IllegalArgumentException("field=\"" + field + "\" did not index dimensional values"); + throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values"); } - BKDReader reader = readers.get(fieldInfo.number); - if (reader == null) { + return readers.get(fieldInfo.number); + } + + @Override + public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { + BKDReader bkdReader = getBKDReader(fieldName); + + if (bkdReader == null) { // Schema ghost corner case! This field did index dimensional values in the past, but // now all docs having this dimensional field were deleted in this segment: return; } - reader.intersect(visitor); + bkdReader.intersect(visitor); } @Override @@ -134,5 +139,51 @@ public class Lucene60DimensionalReader e // Free up heap: readers.clear(); } + + @Override + public byte[] getMinPackedValue(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return null; + } + + return bkdReader.getMinPackedValue(); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return null; + } + + return bkdReader.getMaxPackedValue(); + } + + @Override + public int getNumDimensions(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return 0; + } + return bkdReader.getNumDimensions(); + } + + @Override + public int getBytesPerDimension(String fieldName) { + BKDReader bkdReader = getBKDReader(fieldName); + if (bkdReader == null) { + // Schema ghost corner case! This field did index dimensional values in the past, but + // now all docs having this dimensional field were deleted in this segment: + return 0; + } + return bkdReader.getBytesPerDimension(); + } } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java Fri Jan 8 10:52:15 2016 @@ -39,7 +39,7 @@ public abstract class DimensionalValues /** Maximum number of dimensions */ public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS; - /** Defautl constructor */ + /** Default constructor */ protected DimensionalValues() { } @@ -78,4 +78,16 @@ public abstract class DimensionalValues * This method does not enforce live docs, so it's up to the caller * to test whether each document is deleted, if necessary. */ public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException; + + /** Returns minimum value for each dimension, packed, or null if no points were indexed */ + public abstract byte[] getMinPackedValue(String fieldName) throws IOException; + + /** Returns maximum value for each dimension, packed, or null if no points were indexed */ + public abstract byte[] getMaxPackedValue(String fieldName) throws IOException; + + /** Returns how many dimensions were indexed */ + public abstract int getNumDimensions(String fieldName) throws IOException; + + /** Returns the number of bytes per dimension */ + public abstract int getBytesPerDimension(String fieldName) throws IOException; } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java Fri Jan 8 10:52:15 2016 @@ -63,8 +63,6 @@ class DimensionalValuesWriter { public void flush(SegmentWriteState state, DimensionalWriter writer) throws IOException { - final int maxDoc = state.segmentInfo.maxDoc(); - writer.writeField(fieldInfo, new DimensionalReader() { @Override @@ -91,6 +89,26 @@ class DimensionalValuesWriter { @Override public void close() { } + + @Override + public byte[] getMinPackedValue(String fieldName) { + throw new UnsupportedOperationException(); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) { + throw new UnsupportedOperationException(); + } + + @Override + public int getNumDimensions(String fieldName) { + throw new UnsupportedOperationException(); + } + + @Override + public int getBytesPerDimension(String fieldName) { + throw new UnsupportedOperationException(); + } }); } } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java Fri Jan 8 10:52:15 2016 @@ -21,6 +21,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.lucene.util.StringHelper; + class MultiDimensionalValues extends DimensionalValues { private final List<DimensionalValues> subs; @@ -95,4 +97,74 @@ class MultiDimensionalValues extends Dim b.append(')'); return b.toString(); } + + @Override + public byte[] getMinPackedValue(String fieldName) throws IOException { + byte[] result = null; + for(int i=0;i<subs.size();i++) { + byte[] minPackedValue = subs.get(i).getMinPackedValue(fieldName); + if (result == null) { + if (minPackedValue != null) { + result = minPackedValue.clone(); + } + } else { + int numDims = subs.get(0).getNumDimensions(fieldName); + int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName); + for(int dim=0;dim<numDims;dim++) { + int offset = dim*bytesPerDim; + if (StringHelper.compare(bytesPerDim, minPackedValue, offset, result, offset) < 0) { + System.arraycopy(minPackedValue, offset, result, offset, bytesPerDim); + } + } + } + } + + return result; + } + + @Override + public byte[] getMaxPackedValue(String fieldName) throws IOException { + byte[] result = null; + for(int i=0;i<subs.size();i++) { + byte[] maxPackedValue = subs.get(i).getMaxPackedValue(fieldName); + if (result == null) { + if (maxPackedValue != null) { + result = maxPackedValue.clone(); + } + } else { + int numDims = subs.get(0).getNumDimensions(fieldName); + int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName); + for(int dim=0;dim<numDims;dim++) { + int offset = dim*bytesPerDim; + if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, result, offset) > 0) { + System.arraycopy(maxPackedValue, offset, result, offset, bytesPerDim); + } + } + } + } + + return result; + } + + @Override + public int getNumDimensions(String fieldName) throws IOException { + for(int i=0;i<subs.size();i++) { + int result = subs.get(i).getNumDimensions(fieldName); + if (result != 0) { + return result; + } + } + return 0; + } + + @Override + public int getBytesPerDimension(String fieldName) throws IOException { + for(int i=0;i<subs.size();i++) { + int result = subs.get(i).getBytesPerDimension(fieldName); + if (result != 0) { + return result; + } + } + return 0; + } } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java Fri Jan 8 10:52:15 2016 @@ -329,6 +329,58 @@ public class ParallelLeafReader extends } dimValues.intersect(fieldName, visitor); } + + @Override + public byte[] getMinPackedValue(String fieldName) throws IOException { + LeafReader reader = fieldToReader.get(fieldName); + if (reader == null) { + return null; + } + DimensionalValues dimValues = reader.getDimensionalValues(); + if (dimValues == null) { + return null; + } + return dimValues.getMinPackedValue(fieldName); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) throws IOException { + LeafReader reader = fieldToReader.get(fieldName); + if (reader == null) { + return null; + } + DimensionalValues dimValues = reader.getDimensionalValues(); + if (dimValues == null) { + return null; + } + return dimValues.getMaxPackedValue(fieldName); + } + + @Override + public int getNumDimensions(String fieldName) throws IOException { + LeafReader reader = fieldToReader.get(fieldName); + if (reader == null) { + return 0; + } + DimensionalValues dimValues = reader.getDimensionalValues(); + if (dimValues == null) { + return 0; + } + return dimValues.getNumDimensions(fieldName); + } + + @Override + public int getBytesPerDimension(String fieldName) throws IOException { + LeafReader reader = fieldToReader.get(fieldName); + if (reader == null) { + return 0; + } + DimensionalValues dimValues = reader.getDimensionalValues(); + if (dimValues == null) { + return 0; + } + return dimValues.getBytesPerDimension(fieldName); + } }; } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java Fri Jan 8 10:52:15 2016 @@ -152,6 +152,26 @@ public final class SlowCodecReaderWrappe public long ramBytesUsed() { return 0; } + + @Override + public byte[] getMinPackedValue(String fieldName) throws IOException { + return values.getMinPackedValue(fieldName); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) throws IOException { + return values.getMaxPackedValue(fieldName); + } + + @Override + public int getNumDimensions(String fieldName) throws IOException { + return values.getNumDimensions(fieldName); + } + + @Override + public int getBytesPerDimension(String fieldName) throws IOException { + return values.getBytesPerDimension(fieldName); + } }; } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java Fri Jan 8 10:52:15 2016 @@ -43,6 +43,8 @@ public class BKDReader implements Accoun final int bytesPerDim; final IndexInput in; final int maxPointsInLeafNode; + final byte[] minPackedValue; + final byte[] maxPackedValue; protected final int packedBytesLength; /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */ @@ -58,6 +60,11 @@ public class BKDReader implements Accoun assert numLeaves > 0; leafNodeOffset = numLeaves; + minPackedValue = new byte[packedBytesLength]; + maxPackedValue = new byte[packedBytesLength]; + in.readBytes(minPackedValue, 0, packedBytesLength); + in.readBytes(maxPackedValue, 0, packedBytesLength); + splitPackedValues = new byte[(1+bytesPerDim)*numLeaves]; // TODO: don't write split packed values[0]! @@ -116,8 +123,9 @@ public class BKDReader implements Accoun this.in = in; } - /** Called by consumers that have their own on-disk format for the index */ - protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException { + /** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */ + protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, + byte[] minPackedValue, byte[] maxPackedValue) throws IOException { this.in = in; this.numDims = numDims; this.maxPointsInLeafNode = maxPointsInLeafNode; @@ -126,6 +134,10 @@ public class BKDReader implements Accoun this.leafNodeOffset = leafBlockFPs.length; this.leafBlockFPs = leafBlockFPs; this.splitPackedValues = splitPackedValues; + this.minPackedValue = minPackedValue; + this.maxPackedValue = maxPackedValue; + assert minPackedValue.length == packedBytesLength; + assert maxPackedValue.length == packedBytesLength; } private static class VerifyVisitor implements IntersectVisitor { @@ -405,4 +417,20 @@ public class BKDReader implements Accoun return splitPackedValues.length + leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG; } + + public byte[] getMinPackedValue() { + return minPackedValue.clone(); + } + + public byte[] getMaxPackedValue() { + return maxPackedValue.clone(); + } + + public int getNumDimensions() { + return numDims; + } + + public int getBytesPerDimension() { + return bytesPerDim; + } } Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java Fri Jan 8 10:52:15 2016 @@ -119,6 +119,12 @@ public class BKDWriter implements Closea protected final int maxPointsInLeafNode; private final int maxPointsSortInHeap; + /** Minimum per-dim values, packed */ + protected final byte[] minPackedValue; + + /** Maximum per-dim values, packed */ + protected final byte[] maxPackedValue; + private long pointCount; public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException { @@ -142,6 +148,9 @@ public class BKDWriter implements Closea scratch2 = new byte[packedBytesLength]; commonPrefixLengths = new int[numDims]; + minPackedValue = new byte[packedBytesLength]; + maxPackedValue = new byte[packedBytesLength]; + // dimensional values (numDims * bytesPerDim) + ord (long) + docID (int) bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT; @@ -213,6 +222,22 @@ public class BKDWriter implements Closea heapPointWriter.append(packedValue, pointCount, docID); } + // TODO: we could specialize for the 1D case: + if (pointCount == 0) { + System.arraycopy(packedValue, 0, minPackedValue, 0, packedBytesLength); + System.arraycopy(packedValue, 0, maxPackedValue, 0, packedBytesLength); + } else { + for(int dim=0;dim<numDims;dim++) { + int offset = dim*bytesPerDim; + if (StringHelper.compare(bytesPerDim, packedValue, offset, minPackedValue, offset) < 0) { + System.arraycopy(packedValue, offset, minPackedValue, offset, bytesPerDim); + } + if (StringHelper.compare(bytesPerDim, packedValue, offset, maxPackedValue, offset) > 0) { + System.arraycopy(packedValue, offset, maxPackedValue, offset, bytesPerDim); + } + } + } + pointCount++; } @@ -398,6 +423,11 @@ public class BKDWriter implements Closea leafBlockDocIDs[leafCount] = reader.docIDBase + reader.docID; System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength); + if (valueCount == 0) { + System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, 0, packedBytesLength); + } + System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength); + assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue); if (leafCount == 0) { @@ -836,6 +866,8 @@ public class BKDWriter implements Closea assert leafBlockFPs.length > 0; out.writeVInt(leafBlockFPs.length); + out.writeBytes(minPackedValue, 0, packedBytesLength); + out.writeBytes(maxPackedValue, 0, packedBytesLength); // TODO: for 1D case, don't waste the first byte of each split value (it's always 0) Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java (original) +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java Fri Jan 8 10:52:15 2016 @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import java.util.BitSet; import java.util.List; @@ -46,6 +47,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.TestUtil; // TODO: factor out a BaseTestDimensionFormat @@ -906,6 +908,28 @@ public class TestDimensionalValues exten RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); DirectoryReader r = null; + // Compute actual min/max values: + byte[][] expectedMinValues = new byte[numDims][]; + byte[][] expectedMaxValues = new byte[numDims][]; + for(int ord=0;ord<docValues.length;ord++) { + for(int dim=0;dim<numDims;dim++) { + if (ord == 0) { + expectedMinValues[dim] = new byte[numBytesPerDim]; + System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim); + expectedMaxValues[dim] = new byte[numBytesPerDim]; + System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim); + } else { + // TODO: it's cheating that we use StringHelper.compare for "truth": what if it's buggy? + if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMinValues[dim], 0) < 0) { + System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], 0, numBytesPerDim); + } + if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, expectedMaxValues[dim], 0) > 0) { + System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim); + } + } + } + } + // 20% of the time we add into a separate directory, then at some point use // addIndexes to bring the indexed dimensional values to the main directory: Directory saveDir; @@ -1036,6 +1060,19 @@ public class TestDimensionalValues exten NumericDocValues idValues = MultiDocValues.getNumericValues(r, "id"); Bits liveDocs = MultiFields.getLiveDocs(r); + // Verify min/max values are correct: + byte[] minValues = dimValues.getMinPackedValue("field"); + byte[] maxValues = dimValues.getMaxPackedValue("field"); + byte[] scratch = new byte[numBytesPerDim]; + for(int dim=0;dim<numDims;dim++) { + System.arraycopy(minValues, dim*numBytesPerDim, scratch, 0, scratch.length); + //System.out.println("dim=" + dim + " expectedMin=" + new BytesRef(expectedMinValues[dim]) + " min=" + new BytesRef(scratch)); + assertTrue(Arrays.equals(expectedMinValues[dim], scratch)); + System.arraycopy(maxValues, dim*numBytesPerDim, scratch, 0, scratch.length); + //System.out.println("dim=" + dim + " expectedMax=" + new BytesRef(expectedMaxValues[dim]) + " max=" + new BytesRef(scratch)); + assertTrue(Arrays.equals(expectedMaxValues[dim], scratch)); + } + int iters = atLeast(100); for(int iter=0;iter<iters;iter++) { if (VERBOSE) { Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java (original) +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java Fri Jan 8 10:52:15 2016 @@ -123,6 +123,10 @@ public class TestBKD extends LuceneTestC } int[][] docs = new int[numDocs][]; byte[] scratch = new byte[4*numDims]; + int[] minValue = new int[numDims]; + int[] maxValue = new int[numDims]; + Arrays.fill(minValue, Integer.MAX_VALUE); + Arrays.fill(maxValue, Integer.MIN_VALUE); for(int docID=0;docID<numDocs;docID++) { int[] values = new int[numDims]; if (VERBOSE) { @@ -130,6 +134,12 @@ public class TestBKD extends LuceneTestC } for(int dim=0;dim<numDims;dim++) { values[dim] = random().nextInt(); + if (values[dim] < minValue[dim]) { + minValue[dim] = values[dim]; + } + if (values[dim] > maxValue[dim]) { + maxValue[dim] = values[dim]; + } NumericUtils.intToBytes(values[dim], scratch, dim); if (VERBOSE) { System.out.println(" " + dim + " -> " + values[dim]); @@ -148,6 +158,13 @@ public class TestBKD extends LuceneTestC in.seek(indexFP); BKDReader r = new BKDReader(in); + byte[] minPackedValue = r.getMinPackedValue(); + byte[] maxPackedValue = r.getMaxPackedValue(); + for(int dim=0;dim<numDims;dim++) { + assertEquals(minValue[dim], NumericUtils.bytesToInt(minPackedValue, dim)); + assertEquals(maxValue[dim], NumericUtils.bytesToInt(maxPackedValue, dim)); + } + int iters = atLeast(100); for(int iter=0;iter<iters;iter++) { if (VERBOSE) { Modified: lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java (original) +++ lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java Fri Jan 8 10:52:15 2016 @@ -265,8 +265,8 @@ public class SortingLeafReader extends F } @Override - public void intersect(String field, IntersectVisitor visitor) throws IOException { - in.intersect(field, + public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { + in.intersect(fieldName, new IntersectVisitor() { @Override public void visit(int docID) throws IOException { @@ -284,6 +284,26 @@ public class SortingLeafReader extends F } }); } + + @Override + public byte[] getMinPackedValue(String fieldName) throws IOException { + return in.getMinPackedValue(fieldName); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) throws IOException { + return in.getMaxPackedValue(fieldName); + } + + @Override + public int getNumDimensions(String fieldName) throws IOException { + return in.getNumDimensions(fieldName); + } + + @Override + public int getBytesPerDimension(String fieldName) throws IOException { + return in.getBytesPerDimension(fieldName); + } } private static class SortingSortedDocValues extends SortedDocValues { Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java?rev=1723682&r1=1723681&r2=1723682&view=diff ============================================================================== --- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java (original) +++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java Fri Jan 8 10:52:15 2016 @@ -98,6 +98,26 @@ public final class AssertingDimensionalF public String toString() { return getClass().getSimpleName() + "(" + in.toString() + ")"; } + + @Override + public byte[] getMinPackedValue(String fieldName) throws IOException { + return in.getMinPackedValue(fieldName); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) throws IOException { + return in.getMaxPackedValue(fieldName); + } + + @Override + public int getNumDimensions(String fieldName) throws IOException { + return in.getNumDimensions(fieldName); + } + + @Override + public int getBytesPerDimension(String fieldName) throws IOException { + return in.getBytesPerDimension(fieldName); + } } static class AssertingDimensionalWriter extends DimensionalWriter {