nknize commented on code in PR #1017:
URL: https://github.com/apache/lucene/pull/1017#discussion_r924571676


##########
lucene/core/src/java/org/apache/lucene/document/LatLonShape.java:
##########
@@ -148,6 +234,18 @@ public static Query newBoxQuery(
     return new LatLonShapeBoundingBoxQuery(field, queryRelation, rectangle);
   }
 
+  /** create a docvalue query to find all geo shapes that intersect a defined 
bounding box * */
+  public static Query newDocValuesBoxQuery(

Review Comment:
   +1 for `newSlowBoxQuery` 
   
   +1 for the separate idea but I was opting to go that route in a follow up 
incremental PR after the field settles.



##########
lucene/core/src/java/org/apache/lucene/document/ShapeDocValuesField.java:
##########
@@ -0,0 +1,896 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.ShapeField.DecodedTriangle.TYPE;
+import org.apache.lucene.document.ShapeField.QueryRelation;
+import org.apache.lucene.document.SpatialQuery.EncodedRectangle;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+/** A doc values field representation for {@link LatLonShape} and {@link 
XYShape} */
+public final class ShapeDocValuesField extends Field {
+  private final ShapeComparator shapeComparator;
+
+  private static final FieldType FIELD_TYPE = new FieldType();
+
+  static {
+    FIELD_TYPE.setDocValuesType(DocValuesType.BINARY);
+    FIELD_TYPE.setOmitNorms(true);
+    FIELD_TYPE.freeze();
+  }
+
+  /**
+   * Creates a {@ShapeDocValueField} instance from a shape tessellation
+   *
+   * @param name The Field Name (must not be null)
+   * @param tessellation The tessellation (must not be null)
+   */
+  ShapeDocValuesField(String name, List<ShapeField.DecodedTriangle> 
tessellation) {
+    super(name, FIELD_TYPE);
+    BytesRef b = computeBinaryValue(tessellation);
+    this.fieldsData = b;
+    try {
+      this.shapeComparator = new ShapeComparator(b);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("unable to read binary shape doc 
value field. ", e);
+    }
+  }
+
+  /** Creates a {@code ShapeDocValue} field from a given serialized value */
+  ShapeDocValuesField(String name, BytesRef binaryValue) {
+    super(name, FIELD_TYPE);
+    this.fieldsData = binaryValue;
+    try {
+      this.shapeComparator = new ShapeComparator(binaryValue);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("unable to read binary shape doc 
value field. ", e);
+    }
+  }
+
+  /** The name of the field */
+  @Override
+  public String name() {
+    return name;
+  }
+
+  /** Gets the {@code IndexableFieldType} for this ShapeDocValue field */
+  @Override
+  public IndexableFieldType fieldType() {
+    return FIELD_TYPE;
+  }
+
+  /** Currently there is no string representation for the ShapeDocValueField */
+  @Override
+  public String stringValue() {
+    return null;
+  }
+
+  /** TokenStreams are not yet supported */
+  @Override
+  public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
+    return null;
+  }
+
+  /** create a shape docvalue field from indexable fields */
+  public static ShapeDocValuesField createDocValueField(String fieldName, 
Field[] indexableFields) {
+    ArrayList<ShapeField.DecodedTriangle> tess = new 
ArrayList<>(indexableFields.length);
+    final byte[] scratch = new byte[7 * Integer.BYTES];
+    for (Field f : indexableFields) {
+      BytesRef br = f.binaryValue();
+      assert br.length == 7 * ShapeField.BYTES;
+      System.arraycopy(br.bytes, br.offset, scratch, 0, 7 * ShapeField.BYTES);
+      ShapeField.DecodedTriangle t = new ShapeField.DecodedTriangle();
+      ShapeField.decodeTriangle(scratch, t);
+      tess.add(t);
+    }
+    return new ShapeDocValuesField(fieldName, tess);
+  }
+
+  /** Returns the number of terms (tessellated triangles) for this shape */
+  public int numberOfTerms() {
+    return shapeComparator.numberOfTerms();
+  }
+
+  /** Creates a geometry query for shape docvalues */
+  public static Query newGeometryQuery(
+      final String field, final QueryRelation relation, Object... geometries) {
+    return null;
+    // TODO
+    //  return new ShapeDocValuesQuery(field, relation, geometries);
+  }
+
+  /** Compute the spatial relation of this shape and a bounding box (in 
encoded space) */
+  public Relation relate(final int minX, final int maxX, final int minY, final 
int maxY)
+      throws IOException {
+    return shapeComparator.relate(minX, maxX, minY, maxY);
+  }
+
+  /** returns the min x value for the shape's bounding box */
+  public int getMinX() {
+    return shapeComparator.getMinX();
+  }
+
+  /** returns the min y value for the shape's bounding box */
+  public int getMinY() {
+    return shapeComparator.getMinY();
+  }
+
+  /** returns the max x value for the shape's bounding box */
+  public int getMaxX() {
+    return shapeComparator.getMaxX();
+  }
+
+  /** returns the max y value for the shape's bounding box */
+  public int getMaxY() {
+    return shapeComparator.getMaxY();
+  }
+
+  /** Retrieves the x centroid location for the geometry(s) */
+  public int getCentroidX() {
+    return shapeComparator.getCentroidX();
+  }
+
+  /** Retrieves the y centroid location for the geometry(s) */
+  public int getCentroidY() {
+    return shapeComparator.getCentroidY();
+  }
+
+  /**
+   * Retrieves the highest dimensional type (POINT, LINE, TRIANGLE) for 
computing the geometry(s)
+   * centroid
+   */
+  public TYPE getHighestDimensionType() {
+    return shapeComparator.getHighestDimension();
+  }
+
+  private BytesRef computeBinaryValue(List<ShapeField.DecodedTriangle> 
tessellation) {
+    try {
+      // dfs order serialization
+      List<TreeNode> dfsSerialized = new ArrayList<>(tessellation.size());
+      buildTree(tessellation, dfsSerialized);
+      Writer w = new Writer(dfsSerialized);
+      return w.getBytesRef();
+    } catch (IOException e) {
+      throw new RuntimeException("Internal error building 
LatLonShapeDocValues. Got ", e);
+    }
+  }
+
+  /** main entry point to build the tessellation tree * */
+  public TreeNode buildTree(
+      List<ShapeField.DecodedTriangle> tessellation, List<TreeNode> 
dfsSerialized)
+      throws IOException {
+    if (tessellation.size() == 1) {
+      ShapeField.DecodedTriangle t = tessellation.get(0);
+      TreeNode node = new TreeNode(t);
+      if (t.type == TYPE.LINE) {
+        node.midX /= node.length;
+        node.midY /= node.length;
+      } else if (t.type == TYPE.TRIANGLE) {
+        node.midX /= node.signedArea;
+        node.midY /= node.signedArea;
+      }
+      node.highestType = t.type;
+      dfsSerialized.add(node);
+      return node;
+    }
+    TreeNode[] triangles = new TreeNode[tessellation.size()];
+    int i = 0;
+    int minY = Integer.MAX_VALUE;
+    int minX = Integer.MAX_VALUE;
+    int maxY = Integer.MIN_VALUE;
+    int maxX = Integer.MIN_VALUE;
+
+    // running stats for computing centroid
+    double totalSignedArea = 0;
+    double totalLength = 0;
+    double numXPnt = 0;
+    double numYPnt = 0;
+    double numXLin = 0;
+    double numYLin = 0;
+    double numXPly = 0;
+    double numYPly = 0;
+    TYPE highestType = TYPE.POINT;
+
+    for (ShapeField.DecodedTriangle t : tessellation) {
+      TreeNode node = new TreeNode(t);
+      triangles[i++] = node;
+      // compute the bbox values up front
+      minY = Math.min(minY, node.minY);
+      minX = Math.min(minX, node.minX);
+      maxY = Math.max(maxY, node.maxY);
+      maxX = Math.max(maxX, node.maxX);
+
+      // compute the running centroid stats
+      totalSignedArea += node.signedArea; // non-zero if any components are 
triangles
+      totalLength += node.length; // non-zero if any components are line 
segments
+      if (t.type == TYPE.POINT) {
+        numXPnt += node.midX;
+        numYPnt += node.midY;
+      } else if (t.type == TYPE.LINE) {
+        if (highestType == TYPE.POINT) {
+          highestType = TYPE.LINE;
+        }
+        numXLin += node.midX;
+        numYLin += node.midY;
+      } else {
+        if (highestType != TYPE.TRIANGLE) {
+          highestType = TYPE.TRIANGLE;
+        }
+        numXPly += node.midX;
+        numYPly += node.midY;
+      }
+    }
+    TreeNode root = createTree(triangles, 0, triangles.length - 1, false, 
null, dfsSerialized);
+
+    // pull up min values for the root node so the bbox is consistent
+    root.minY = minY;
+    root.minX = minX;
+
+    // set the highest dimensional type
+    root.highestType = highestType;
+
+    // compute centroid values for the root node so the centroid is consistent
+    if (highestType == TYPE.POINT) {
+      root.midX = numXPnt / i;
+      root.midY = numYPnt / i;
+    } else if (highestType == TYPE.LINE) {
+      // numerator is sum of segment midPoints times segment length
+      // divide by total length per
+      // https://www.ae.msstate.edu/vlsm/shape/centroid_of_a_line/straight.htm
+      root.midX = numXLin / totalLength;
+      root.midY = numYLin / totalLength;
+    } else {
+      // numerator is sum of triangle centroids times triangle signed area
+      // divide by total signed area per 
http://www.faqs.org/faqs/graphics/algorithms-faq/
+      root.midX = numXPly / totalSignedArea;
+      root.midY = numYPly / totalSignedArea;
+    }
+
+    return root;
+  }
+
+  /** creates the tree */
+  private TreeNode createTree(
+      TreeNode[] triangles,
+      int low,
+      int high,
+      boolean splitX,
+      TreeNode parent,
+      List<TreeNode> dfsSerialized) {
+    if (low > high) {
+      return null;
+    }
+    // add midpoint
+    int mid = (low + high) >>> 1;
+    if (low < high) {
+      Comparator<TreeNode> comparator =
+          splitX
+              ? Comparator.comparingInt((TreeNode left) -> left.minX)
+                  .thenComparingInt(left -> left.maxX)
+              : Comparator.comparingInt((TreeNode left) -> left.minY)
+                  .thenComparingInt(left -> left.maxY);
+      ArrayUtil.select(triangles, low, high + 1, mid, comparator);
+    }
+    TreeNode newNode = triangles[mid];
+    dfsSerialized.add(newNode);
+    // set parent
+    newNode.parent = parent;
+
+    // add children
+    newNode.left = createTree(triangles, low, mid - 1, !splitX, newNode, 
dfsSerialized);
+    newNode.right = createTree(triangles, mid + 1, high, !splitX, newNode, 
dfsSerialized);
+    // pull up values to this node
+    if (newNode.left != null) {
+      newNode.maxX = Math.max(newNode.maxX, newNode.left.maxX);
+      newNode.maxY = Math.max(newNode.maxY, newNode.left.maxY);
+    }
+    if (newNode.right != null) {
+      newNode.maxX = Math.max(newNode.maxX, newNode.right.maxX);
+      newNode.maxY = Math.max(newNode.maxY, newNode.right.maxY);
+    }
+
+    // adjust byteSize based on new parent bbox values
+    if (newNode.left != null) {
+      // bounding box size
+      newNode.left.byteSize += vLongSize((long) newNode.maxX - 
newNode.left.maxX);

Review Comment:
   Doing that required me to carve out fixed long bytes for the size of each 
component since you can't know the variable size of the component until you 
write it to the array. Instead, I opted to compute when the tree is being built 
so that the component size can also be variable and the doc value byte array 
remain as small as possible. 
   
   I originally started with something like that  but the code was coming out 
harder to follow so I left this as a possible follow up enhancement to not hold 
up progress.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to