Yes indeed, thanks for catching this Christine! I'll fix ... Mike McCandless
http://blog.mikemccandless.com On Fri, Jan 8, 2016 at 8:22 AM, Christine Poerschke (BLOOMBERG/ LONDON) <cpoersc...@bloomberg.net> wrote: > Hello. > > Might there be a min-versus-max copy/paste mistake in getMaxPackedValue? > > I will try next to see if that fixes recent > org.apache.lucene.index.TestDuelingCodecs test failures. > > Christine > > ----- Original Message ----- > From: dev@lucene.apache.org > To: comm...@lucene.apache.org > At: Jan 8 2016 10:52:29 > > Author: mikemccand > Date: Fri Jan 8 10:52:15 2016 > New Revision: 1723682 > > URL: http://svn.apache.org/viewvc?rev=1723682&view=rev > Log: > LUCENE-6962: add min/max per dimension to dimensional values > > Modified: > lucene/dev/trunk/lucene/CHANGES.txt > > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java > > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java > > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java > > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java > > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java > > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java > > lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java > > lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java > > Modified: lucene/dev/trunk/lucene/CHANGES.txt > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- lucene/dev/trunk/lucene/CHANGES.txt (original) > +++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jan 8 10:52:15 2016 > @@ -55,6 +55,9 @@ New Features > * LUCENE-6837: Add N-best output support to JapaneseTokenizer. > (Hiroharu Konno via Christian Moen) > > +* LUCENE-6962: Add per-dimension min/max to dimensional values > + (Mike McCandless) > + > API Changes > > * LUCENE-3312: The API of oal.document was restructured to > > Modified: > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java > (original) > +++ > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java > Fri Jan 8 10:52:15 2016 > @@ -33,8 +33,9 @@ import static org.apache.lucene.codecs.s > > class SimpleTextBKDReader extends BKDReader { > > - public SimpleTextBKDReader(IndexInput datIn, int numDims, int > maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] > splitPackedValues) throws IOException { > - super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, > splitPackedValues); > + public SimpleTextBKDReader(IndexInput datIn, int numDims, int > maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] > splitPackedValues, > + byte[] minPackedValue, byte[] maxPackedValue) > throws IOException { > + super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, > splitPackedValues, minPackedValue, maxPackedValue); > } > > @Override > > Modified: > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java > (original) > +++ > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalReader.java > Fri Jan 8 10:52:15 2016 > @@ -43,6 +43,8 @@ import static org.apache.lucene.codecs.s > import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.FIELD_FP_NAME; > import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.INDEX_COUNT; > import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_LEAF_POINTS; > +import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MAX_VALUE; > +import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.MIN_VALUE; > import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.NUM_DIMS; > import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_COUNT; > import static > org.apache.lucene.codecs.simpletext.SimpleTextDimensionalWriter.SPLIT_DIM; > @@ -89,6 +91,17 @@ class SimpleTextDimensionalReader extend > > readLine(dataIn); > int count = parseInt(INDEX_COUNT); > + > + readLine(dataIn); > + assert startsWith(MIN_VALUE); > + BytesRef minValue = > SimpleTextUtil.fromBytesRefString(stripPrefix(MIN_VALUE)); > + assert minValue.length == numDims*bytesPerDim; > + > + readLine(dataIn); > + assert startsWith(MAX_VALUE); > + BytesRef maxValue = > SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE)); > + assert maxValue.length == numDims*bytesPerDim; > + > long[] leafBlockFPs = new long[count]; > for(int i=0;i<count;i++) { > readLine(dataIn); > @@ -108,7 +121,7 @@ class SimpleTextDimensionalReader extend > System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + > bytesPerDim) * i + 1, bytesPerDim); > } > > - return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, > bytesPerDim, leafBlockFPs, splitPackedValues); > + return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, > bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes); > } > > private void readLine(IndexInput in) throws IOException { > @@ -133,17 +146,21 @@ class SimpleTextDimensionalReader extend > return new String(scratch.bytes(), prefix.length, scratch.length() - > prefix.length, StandardCharsets.UTF_8); > } > > - /** Finds all documents and points matching the provided visitor */ > - @Override > - public void intersect(String field, IntersectVisitor visitor) throws > IOException { > - FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field); > + private BKDReader getBKDReader(String fieldName) { > + FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName); > if (fieldInfo == null) { > - throw new IllegalArgumentException("field=\"" + field + "\" is > unrecognized"); > + throw new IllegalArgumentException("field=\"" + fieldName + "\" is > unrecognized"); > } > if (fieldInfo.getDimensionCount() == 0) { > - throw new IllegalArgumentException("field=\"" + field + "\" did not > index dimensional values"); > + throw new IllegalArgumentException("field=\"" + fieldName + "\" did > not index dimensional values"); > } > - BKDReader bkdReader = readers.get(field); > + return readers.get(fieldName); > + } > + > + /** Finds all documents and points matching the provided visitor */ > + @Override > + public void intersect(String fieldName, IntersectVisitor visitor) throws > IOException { > + BKDReader bkdReader = getBKDReader(fieldName); > if (bkdReader == null) { > // Schema ghost corner case! This field did index dimensional values > in the past, but > // now all docs having this dimensional field were deleted in this > segment: > @@ -188,4 +205,48 @@ class SimpleTextDimensionalReader extend > public String toString() { > return "SimpleTextDimensionalReader(segment=" + > readState.segmentInfo.name + " maxDoc=" + readState.segmentInfo.maxDoc() + > ")"; > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return null; > + } > + return bkdReader.getMinPackedValue(); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return null; > + } > + return bkdReader.getMinPackedValue(); > + } > + > + @Override > + public int getNumDimensions(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return 0; > + } > + return bkdReader.getNumDimensions(); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return 0; > + } > + return bkdReader.getBytesPerDimension(); > + } > } > > Modified: > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java > (original) > +++ > lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDimensionalWriter.java > Fri Jan 8 10:52:15 2016 > @@ -49,6 +49,8 @@ class SimpleTextDimensionalWriter extend > final static BytesRef FIELD_COUNT = new BytesRef("field count "); > final static BytesRef FIELD_FP_NAME = new BytesRef(" field fp name "); > final static BytesRef FIELD_FP = new BytesRef(" field fp "); > + final static BytesRef MIN_VALUE = new BytesRef("min value "); > + final static BytesRef MAX_VALUE = new BytesRef("max value "); > > private IndexOutput dataOut; > final BytesRefBuilder scratch = new BytesRefBuilder(); > @@ -90,6 +92,14 @@ class SimpleTextDimensionalWriter extend > writeInt(out, leafBlockFPs.length); > newline(out); > > + write(out, MIN_VALUE); > + BytesRef br = new BytesRef(minPackedValue, 0, > minPackedValue.length); > + write(out, br.toString()); > + > + write(out, MAX_VALUE); > + br = new BytesRef(maxPackedValue, 0, maxPackedValue.length); > + write(out, br.toString()); > + > for(int i=0;i<leafBlockFPs.length;i++) { > write(out, BLOCK_FP); > writeLong(out, leafBlockFPs[i]); > @@ -109,7 +119,7 @@ class SimpleTextDimensionalWriter extend > writeInt(out, splitPackedValues[i * (1 + > fieldInfo.getDimensionNumBytes())] & 0xff); > newline(out); > write(out, SPLIT_VALUE); > - BytesRef br = new BytesRef(splitPackedValues, 1+(i * > (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes()); > + br = new BytesRef(splitPackedValues, 1+(i * > (1+fieldInfo.getDimensionNumBytes())), fieldInfo.getDimensionNumBytes()); > write(out, br.toString()); > newline(out); > } > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalFormat.java > Fri Jan 8 10:52:15 2016 > @@ -75,6 +75,26 @@ public abstract class DimensionalFormat > public void intersect(String fieldName, IntersectVisitor visitor) { > throw new IllegalArgumentException("field=\"" + fieldName + "\" > was not indexed with dimensional values"); > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) { > + throw new IllegalArgumentException("field=\"" + fieldName + "\" > was not indexed with dimensional values"); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) { > + throw new IllegalArgumentException("field=\"" + fieldName + "\" > was not indexed with dimensional values"); > + } > + > + @Override > + public int getNumDimensions(String fieldName) { > + throw new IllegalArgumentException("field=\"" + fieldName + "\" > was not indexed with dimensional values"); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) { > + throw new IllegalArgumentException("field=\"" + fieldName + "\" > was not indexed with dimensional values"); > + } > }; > } > }; > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java > Fri Jan 8 10:52:15 2016 > @@ -95,6 +95,26 @@ public abstract class DimensionalWriter > @Override > public void close() { > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) { > + throw new UnsupportedOperationException(); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) { > + throw new UnsupportedOperationException(); > + } > + > + @Override > + public int getNumDimensions(String fieldName) { > + throw new UnsupportedOperationException(); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) { > + throw new UnsupportedOperationException(); > + } > }); > } > > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java > Fri Jan 8 10:52:15 2016 > @@ -84,24 +84,29 @@ public class Lucene60DimensionalReader e > } > } > > - @Override > - public void intersect(String field, IntersectVisitor visitor) throws > IOException { > - FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(field); > + private BKDReader getBKDReader(String fieldName) { > + FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName); > if (fieldInfo == null) { > - throw new IllegalArgumentException("field=\"" + field + "\" is > unrecognized"); > + throw new IllegalArgumentException("field=\"" + fieldName + "\" is > unrecognized"); > } > if (fieldInfo.getDimensionCount() == 0) { > - throw new IllegalArgumentException("field=\"" + field + "\" did not > index dimensional values"); > + throw new IllegalArgumentException("field=\"" + fieldName + "\" did > not index dimensional values"); > } > > - BKDReader reader = readers.get(fieldInfo.number); > - if (reader == null) { > + return readers.get(fieldInfo.number); > + } > + > + @Override > + public void intersect(String fieldName, IntersectVisitor visitor) throws > IOException { > + BKDReader bkdReader = getBKDReader(fieldName); > + > + if (bkdReader == null) { > // Schema ghost corner case! This field did index dimensional values > in the past, but > // now all docs having this dimensional field were deleted in this > segment: > return; > } > > - reader.intersect(visitor); > + bkdReader.intersect(visitor); > } > > @Override > @@ -134,5 +139,51 @@ public class Lucene60DimensionalReader e > // Free up heap: > readers.clear(); > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return null; > + } > + > + return bkdReader.getMinPackedValue(); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return null; > + } > + > + return bkdReader.getMaxPackedValue(); > + } > + > + @Override > + public int getNumDimensions(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return 0; > + } > + return bkdReader.getNumDimensions(); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) { > + BKDReader bkdReader = getBKDReader(fieldName); > + if (bkdReader == null) { > + // Schema ghost corner case! This field did index dimensional values > in the past, but > + // now all docs having this dimensional field were deleted in this > segment: > + return 0; > + } > + return bkdReader.getBytesPerDimension(); > + } > } > > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValues.java > Fri Jan 8 10:52:15 2016 > @@ -39,7 +39,7 @@ public abstract class DimensionalValues > /** Maximum number of dimensions */ > public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS; > > - /** Defautl constructor */ > + /** Default constructor */ > protected DimensionalValues() { > } > > @@ -78,4 +78,16 @@ public abstract class DimensionalValues > * This method does not enforce live docs, so it's up to the caller > * to test whether each document is deleted, if necessary. */ > public abstract void intersect(String fieldName, IntersectVisitor visitor) > throws IOException; > + > + /** Returns minimum value for each dimension, packed, or null if no points > were indexed */ > + public abstract byte[] getMinPackedValue(String fieldName) throws > IOException; > + > + /** Returns maximum value for each dimension, packed, or null if no points > were indexed */ > + public abstract byte[] getMaxPackedValue(String fieldName) throws > IOException; > + > + /** Returns how many dimensions were indexed */ > + public abstract int getNumDimensions(String fieldName) throws IOException; > + > + /** Returns the number of bytes per dimension */ > + public abstract int getBytesPerDimension(String fieldName) throws > IOException; > } > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DimensionalValuesWriter.java > Fri Jan 8 10:52:15 2016 > @@ -63,8 +63,6 @@ class DimensionalValuesWriter { > > public void flush(SegmentWriteState state, DimensionalWriter writer) > throws IOException { > > - final int maxDoc = state.segmentInfo.maxDoc(); > - > writer.writeField(fieldInfo, > new DimensionalReader() { > @Override > @@ -91,6 +89,26 @@ class DimensionalValuesWriter { > @Override > public void close() { > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) { > + throw new UnsupportedOperationException(); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) { > + throw new UnsupportedOperationException(); > + } > + > + @Override > + public int getNumDimensions(String fieldName) { > + throw new UnsupportedOperationException(); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) { > + throw new UnsupportedOperationException(); > + } > }); > } > } > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiDimensionalValues.java > Fri Jan 8 10:52:15 2016 > @@ -21,6 +21,8 @@ import java.io.IOException; > import java.util.ArrayList; > import java.util.List; > > +import org.apache.lucene.util.StringHelper; > + > class MultiDimensionalValues extends DimensionalValues { > > private final List<DimensionalValues> subs; > @@ -95,4 +97,74 @@ class MultiDimensionalValues extends Dim > b.append(')'); > return b.toString(); > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) throws IOException { > + byte[] result = null; > + for(int i=0;i<subs.size();i++) { > + byte[] minPackedValue = subs.get(i).getMinPackedValue(fieldName); > + if (result == null) { > + if (minPackedValue != null) { > + result = minPackedValue.clone(); > + } > + } else { > + int numDims = subs.get(0).getNumDimensions(fieldName); > + int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName); > + for(int dim=0;dim<numDims;dim++) { > + int offset = dim*bytesPerDim; > + if (StringHelper.compare(bytesPerDim, minPackedValue, offset, > result, offset) < 0) { > + System.arraycopy(minPackedValue, offset, result, offset, > bytesPerDim); > + } > + } > + } > + } > + > + return result; > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) throws IOException { > + byte[] result = null; > + for(int i=0;i<subs.size();i++) { > + byte[] maxPackedValue = subs.get(i).getMaxPackedValue(fieldName); > + if (result == null) { > + if (maxPackedValue != null) { > + result = maxPackedValue.clone(); > + } > + } else { > + int numDims = subs.get(0).getNumDimensions(fieldName); > + int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName); > + for(int dim=0;dim<numDims;dim++) { > + int offset = dim*bytesPerDim; > + if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, > result, offset) > 0) { > + System.arraycopy(maxPackedValue, offset, result, offset, > bytesPerDim); > + } > + } > + } > + } > + > + return result; > + } > + > + @Override > + public int getNumDimensions(String fieldName) throws IOException { > + for(int i=0;i<subs.size();i++) { > + int result = subs.get(i).getNumDimensions(fieldName); > + if (result != 0) { > + return result; > + } > + } > + return 0; > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) throws IOException { > + for(int i=0;i<subs.size();i++) { > + int result = subs.get(i).getBytesPerDimension(fieldName); > + if (result != 0) { > + return result; > + } > + } > + return 0; > + } > } > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java > Fri Jan 8 10:52:15 2016 > @@ -329,6 +329,58 @@ public class ParallelLeafReader extends > } > dimValues.intersect(fieldName, visitor); > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) throws IOException { > + LeafReader reader = fieldToReader.get(fieldName); > + if (reader == null) { > + return null; > + } > + DimensionalValues dimValues = reader.getDimensionalValues(); > + if (dimValues == null) { > + return null; > + } > + return dimValues.getMinPackedValue(fieldName); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) throws IOException { > + LeafReader reader = fieldToReader.get(fieldName); > + if (reader == null) { > + return null; > + } > + DimensionalValues dimValues = reader.getDimensionalValues(); > + if (dimValues == null) { > + return null; > + } > + return dimValues.getMaxPackedValue(fieldName); > + } > + > + @Override > + public int getNumDimensions(String fieldName) throws IOException { > + LeafReader reader = fieldToReader.get(fieldName); > + if (reader == null) { > + return 0; > + } > + DimensionalValues dimValues = reader.getDimensionalValues(); > + if (dimValues == null) { > + return 0; > + } > + return dimValues.getNumDimensions(fieldName); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) throws IOException { > + LeafReader reader = fieldToReader.get(fieldName); > + if (reader == null) { > + return 0; > + } > + DimensionalValues dimValues = reader.getDimensionalValues(); > + if (dimValues == null) { > + return 0; > + } > + return dimValues.getBytesPerDimension(fieldName); > + } > }; > } > > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java > Fri Jan 8 10:52:15 2016 > @@ -152,6 +152,26 @@ public final class SlowCodecReaderWrappe > public long ramBytesUsed() { > return 0; > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) throws IOException { > + return values.getMinPackedValue(fieldName); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) throws IOException { > + return values.getMaxPackedValue(fieldName); > + } > + > + @Override > + public int getNumDimensions(String fieldName) throws IOException { > + return values.getNumDimensions(fieldName); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) throws IOException { > + return values.getBytesPerDimension(fieldName); > + } > }; > } > > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java > Fri Jan 8 10:52:15 2016 > @@ -43,6 +43,8 @@ public class BKDReader implements Accoun > final int bytesPerDim; > final IndexInput in; > final int maxPointsInLeafNode; > + final byte[] minPackedValue; > + final byte[] maxPackedValue; > protected final int packedBytesLength; > > /** Caller must pre-seek the provided {@link IndexInput} to the index > location that {@link BKDWriter#finish} returned */ > @@ -58,6 +60,11 @@ public class BKDReader implements Accoun > assert numLeaves > 0; > leafNodeOffset = numLeaves; > > + minPackedValue = new byte[packedBytesLength]; > + maxPackedValue = new byte[packedBytesLength]; > + in.readBytes(minPackedValue, 0, packedBytesLength); > + in.readBytes(maxPackedValue, 0, packedBytesLength); > + > splitPackedValues = new byte[(1+bytesPerDim)*numLeaves]; > > // TODO: don't write split packed values[0]! > @@ -116,8 +123,9 @@ public class BKDReader implements Accoun > this.in = in; > } > > - /** Called by consumers that have their own on-disk format for the index */ > - protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, > int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues) throws > IOException { > + /** Called by consumers that have their own on-disk format for the index > (e.g. SimpleText) */ > + protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, > int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, > + byte[] minPackedValue, byte[] maxPackedValue) throws > IOException { > this.in = in; > this.numDims = numDims; > this.maxPointsInLeafNode = maxPointsInLeafNode; > @@ -126,6 +134,10 @@ public class BKDReader implements Accoun > this.leafNodeOffset = leafBlockFPs.length; > this.leafBlockFPs = leafBlockFPs; > this.splitPackedValues = splitPackedValues; > + this.minPackedValue = minPackedValue; > + this.maxPackedValue = maxPackedValue; > + assert minPackedValue.length == packedBytesLength; > + assert maxPackedValue.length == packedBytesLength; > } > > private static class VerifyVisitor implements IntersectVisitor { > @@ -405,4 +417,20 @@ public class BKDReader implements Accoun > return splitPackedValues.length + > leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG; > } > + > + public byte[] getMinPackedValue() { > + return minPackedValue.clone(); > + } > + > + public byte[] getMaxPackedValue() { > + return maxPackedValue.clone(); > + } > + > + public int getNumDimensions() { > + return numDims; > + } > + > + public int getBytesPerDimension() { > + return bytesPerDim; > + } > } > > Modified: > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java > Fri Jan 8 10:52:15 2016 > @@ -119,6 +119,12 @@ public class BKDWriter implements Closea > protected final int maxPointsInLeafNode; > private final int maxPointsSortInHeap; > > + /** Minimum per-dim values, packed */ > + protected final byte[] minPackedValue; > + > + /** Maximum per-dim values, packed */ > + protected final byte[] maxPackedValue; > + > private long pointCount; > > public BKDWriter(Directory tempDir, String tempFileNamePrefix, int > numDims, int bytesPerDim) throws IOException { > @@ -142,6 +148,9 @@ public class BKDWriter implements Closea > scratch2 = new byte[packedBytesLength]; > commonPrefixLengths = new int[numDims]; > > + minPackedValue = new byte[packedBytesLength]; > + maxPackedValue = new byte[packedBytesLength]; > + > // dimensional values (numDims * bytesPerDim) + ord (long) + docID (int) > bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + > RamUsageEstimator.NUM_BYTES_INT; > > @@ -213,6 +222,22 @@ public class BKDWriter implements Closea > heapPointWriter.append(packedValue, pointCount, docID); > } > > + // TODO: we could specialize for the 1D case: > + if (pointCount == 0) { > + System.arraycopy(packedValue, 0, minPackedValue, 0, packedBytesLength); > + System.arraycopy(packedValue, 0, maxPackedValue, 0, packedBytesLength); > + } else { > + for(int dim=0;dim<numDims;dim++) { > + int offset = dim*bytesPerDim; > + if (StringHelper.compare(bytesPerDim, packedValue, offset, > minPackedValue, offset) < 0) { > + System.arraycopy(packedValue, offset, minPackedValue, offset, > bytesPerDim); > + } > + if (StringHelper.compare(bytesPerDim, packedValue, offset, > maxPackedValue, offset) > 0) { > + System.arraycopy(packedValue, offset, maxPackedValue, offset, > bytesPerDim); > + } > + } > + } > + > pointCount++; > } > > @@ -398,6 +423,11 @@ public class BKDWriter implements Closea > leafBlockDocIDs[leafCount] = reader.docIDBase + reader.docID; > System.arraycopy(reader.state.scratchPackedValue, 0, > leafBlockPackedValues[leafCount], 0, packedBytesLength); > > + if (valueCount == 0) { > + System.arraycopy(reader.state.scratchPackedValue, 0, minPackedValue, > 0, packedBytesLength); > + } > + System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, > 0, packedBytesLength); > + > assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, > reader.state.scratchPackedValue); > > if (leafCount == 0) { > @@ -836,6 +866,8 @@ public class BKDWriter implements Closea > > assert leafBlockFPs.length > 0; > out.writeVInt(leafBlockFPs.length); > + out.writeBytes(minPackedValue, 0, packedBytesLength); > + out.writeBytes(maxPackedValue, 0, packedBytesLength); > > // TODO: for 1D case, don't waste the first byte of each split value > (it's always 0) > > > Modified: > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDimensionalValues.java > Fri Jan 8 10:52:15 2016 > @@ -20,6 +20,7 @@ package org.apache.lucene.index; > import java.io.IOException; > import java.math.BigInteger; > import java.util.ArrayList; > +import java.util.Arrays; > import java.util.BitSet; > import java.util.List; > > @@ -46,6 +47,7 @@ import org.apache.lucene.util.BytesRef; > import org.apache.lucene.util.IOUtils; > import org.apache.lucene.util.LuceneTestCase; > import org.apache.lucene.util.NumericUtils; > +import org.apache.lucene.util.StringHelper; > import org.apache.lucene.util.TestUtil; > > // TODO: factor out a BaseTestDimensionFormat > @@ -906,6 +908,28 @@ public class TestDimensionalValues exten > RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); > DirectoryReader r = null; > > + // Compute actual min/max values: > + byte[][] expectedMinValues = new byte[numDims][]; > + byte[][] expectedMaxValues = new byte[numDims][]; > + for(int ord=0;ord<docValues.length;ord++) { > + for(int dim=0;dim<numDims;dim++) { > + if (ord == 0) { > + expectedMinValues[dim] = new byte[numBytesPerDim]; > + System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], > 0, numBytesPerDim); > + expectedMaxValues[dim] = new byte[numBytesPerDim]; > + System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], > 0, numBytesPerDim); > + } else { > + // TODO: it's cheating that we use StringHelper.compare for > "truth": what if it's buggy? > + if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, > expectedMinValues[dim], 0) < 0) { > + System.arraycopy(docValues[ord][dim], 0, expectedMinValues[dim], > 0, numBytesPerDim); > + } > + if (StringHelper.compare(numBytesPerDim, docValues[ord][dim], 0, > expectedMaxValues[dim], 0) > 0) { > + System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], > 0, numBytesPerDim); > + } > + } > + } > + } > + > // 20% of the time we add into a separate directory, then at some point > use > // addIndexes to bring the indexed dimensional values to the main > directory: > Directory saveDir; > @@ -1036,6 +1060,19 @@ public class TestDimensionalValues exten > NumericDocValues idValues = MultiDocValues.getNumericValues(r, "id"); > Bits liveDocs = MultiFields.getLiveDocs(r); > > + // Verify min/max values are correct: > + byte[] minValues = dimValues.getMinPackedValue("field"); > + byte[] maxValues = dimValues.getMaxPackedValue("field"); > + byte[] scratch = new byte[numBytesPerDim]; > + for(int dim=0;dim<numDims;dim++) { > + System.arraycopy(minValues, dim*numBytesPerDim, scratch, 0, > scratch.length); > + //System.out.println("dim=" + dim + " expectedMin=" + new > BytesRef(expectedMinValues[dim]) + " min=" + new BytesRef(scratch)); > + assertTrue(Arrays.equals(expectedMinValues[dim], scratch)); > + System.arraycopy(maxValues, dim*numBytesPerDim, scratch, 0, > scratch.length); > + //System.out.println("dim=" + dim + " expectedMax=" + new > BytesRef(expectedMaxValues[dim]) + " max=" + new BytesRef(scratch)); > + assertTrue(Arrays.equals(expectedMaxValues[dim], scratch)); > + } > + > int iters = atLeast(100); > for(int iter=0;iter<iters;iter++) { > if (VERBOSE) { > > Modified: > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java > (original) > +++ > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java > Fri Jan 8 10:52:15 2016 > @@ -123,6 +123,10 @@ public class TestBKD extends LuceneTestC > } > int[][] docs = new int[numDocs][]; > byte[] scratch = new byte[4*numDims]; > + int[] minValue = new int[numDims]; > + int[] maxValue = new int[numDims]; > + Arrays.fill(minValue, Integer.MAX_VALUE); > + Arrays.fill(maxValue, Integer.MIN_VALUE); > for(int docID=0;docID<numDocs;docID++) { > int[] values = new int[numDims]; > if (VERBOSE) { > @@ -130,6 +134,12 @@ public class TestBKD extends LuceneTestC > } > for(int dim=0;dim<numDims;dim++) { > values[dim] = random().nextInt(); > + if (values[dim] < minValue[dim]) { > + minValue[dim] = values[dim]; > + } > + if (values[dim] > maxValue[dim]) { > + maxValue[dim] = values[dim]; > + } > NumericUtils.intToBytes(values[dim], scratch, dim); > if (VERBOSE) { > System.out.println(" " + dim + " -> " + values[dim]); > @@ -148,6 +158,13 @@ public class TestBKD extends LuceneTestC > in.seek(indexFP); > BKDReader r = new BKDReader(in); > > + byte[] minPackedValue = r.getMinPackedValue(); > + byte[] maxPackedValue = r.getMaxPackedValue(); > + for(int dim=0;dim<numDims;dim++) { > + assertEquals(minValue[dim], > NumericUtils.bytesToInt(minPackedValue, dim)); > + assertEquals(maxValue[dim], > NumericUtils.bytesToInt(maxPackedValue, dim)); > + } > + > int iters = atLeast(100); > for(int iter=0;iter<iters;iter++) { > if (VERBOSE) { > > Modified: > lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java > (original) > +++ > lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingLeafReader.java > Fri Jan 8 10:52:15 2016 > @@ -265,8 +265,8 @@ public class SortingLeafReader extends F > } > > @Override > - public void intersect(String field, IntersectVisitor visitor) throws > IOException { > - in.intersect(field, > + public void intersect(String fieldName, IntersectVisitor visitor) throws > IOException { > + in.intersect(fieldName, > new IntersectVisitor() { > @Override > public void visit(int docID) throws IOException { > @@ -284,6 +284,26 @@ public class SortingLeafReader extends F > } > }); > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) throws IOException { > + return in.getMinPackedValue(fieldName); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) throws IOException { > + return in.getMaxPackedValue(fieldName); > + } > + > + @Override > + public int getNumDimensions(String fieldName) throws IOException { > + return in.getNumDimensions(fieldName); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) throws IOException { > + return in.getBytesPerDimension(fieldName); > + } > } > > private static class SortingSortedDocValues extends SortedDocValues { > > Modified: > lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java?rev=1723682&r1=1723681&r2=1723682&view=diff > ============================================================================== > --- > lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java > (original) > +++ > lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDimensionalFormat.java > Fri Jan 8 10:52:15 2016 > @@ -98,6 +98,26 @@ public final class AssertingDimensionalF > public String toString() { > return getClass().getSimpleName() + "(" + in.toString() + ")"; > } > + > + @Override > + public byte[] getMinPackedValue(String fieldName) throws IOException { > + return in.getMinPackedValue(fieldName); > + } > + > + @Override > + public byte[] getMaxPackedValue(String fieldName) throws IOException { > + return in.getMaxPackedValue(fieldName); > + } > + > + @Override > + public int getNumDimensions(String fieldName) throws IOException { > + return in.getNumDimensions(fieldName); > + } > + > + @Override > + public int getBytesPerDimension(String fieldName) throws IOException { > + return in.getBytesPerDimension(fieldName); > + } > } > > static class AssertingDimensionalWriter extends DimensionalWriter { > > > > --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org