Kontinuation commented on code in PR #1992: URL: https://github.com/apache/sedona/pull/1992#discussion_r2160831596
########## common/src/main/java/org/apache/sedona/common/S2Geography/S2Geography.java: ########## @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import static org.apache.sedona.common.S2Geography.S2Geography.GeographyKind.POINT; + +import com.google.common.geometry.*; +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** + * An abstract class represent S2Geography. Has 6 subtypes of geography: POINT, POLYLINE, POLYGON, + * GEOGRAPHY_COLLECTION, SHAPE_INDEX, ENCODED_SHAPE_INDEX. + */ +public class S2Geography { + protected final GeographyKind kind; + + protected S2Geography(GeographyKind kind) { + this.kind = kind; + } + + public enum GeographyKind { + UNINITIALIZED(0), + POINT(1), + POLYLINE(2), + POLYGON(3), + GEOGRAPHY_COLLECTION(4), + SHAPE_INDEX(5), + ENCODED_SHAPE_INDEX(6), + CELL_CENTER(7); + + private final int kind; + + GeographyKind(int kind) { + this.kind = kind; + } + + /** Returns the integer tag for this kind. */ + public int getKind() { + return kind; + } + /** + * Look up the enum by its integer tag. + * + * @throws IllegalArgumentException if no matching kind exists. + */ + public static GeographyKind fromKind(int kind) { + for (GeographyKind k : values()) { + if (k.getKind() == kind) return k; + } + throw new IllegalArgumentException("Unknown GeographyKind: " + kind); + } + } + /** + * @return 0, 1, or 2 if all Shape()s that are returned will have the same dimension (i.e., they + * are all points, all lines, or all polygons). + */ + public int dimension() { + if (numShapes() == 0) return -1; + int dim = shape(0).dimension(); + for (int i = 1; i < numShapes(); ++i) { + if (dim != shape(i).dimension()) return -1; + } + return dim; + } + + /** + * @return The number of S2Shape objects needed to represent this Geography + */ + public int numShapes() { + return 0; + } + + /** + * Returns the given S2Shape (where 0 <= id < num_shapes()). The caller retains ownership of the + * S2Shape but the data pointed to by the object requires that the underlying Geography outlives + * the returned object. + * + * @param id (where 0 <= id < num_shapes()) + * @return the given S2Shape + */ + public S2Shape shape(int id) { + return null; + } + + /** + * Returns an S2Region that represents the object. The caller retains ownership of the S2Region + * but the data pointed to by the object requires that the underlying Geography outlives the + * returned object. + * + * @return S2Region + */ + public S2Region region() { + return null; + } Review Comment: We can define `S2Geography` as an abstract class and don't implement these methods. Subclasses are required to implement these methods. ########## common/src/main/java/org/apache/sedona/common/S2Geography/PointShape.java: ########## @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import com.google.common.geometry.*; +import com.google.common.geometry.S2Edge; +import com.google.common.geometry.S2Point; +import com.google.common.geometry.S2Shape; +import java.util.List; + +/** + * Java equivalent of the C++ S2PointVectorShape: each point is a degenerate edge (start==end), one + * chain per point. + */ +/** A degenerate S2Shape that represents exactly one point. */ +public final class PointShape implements S2Shape { + private final List<S2Point> point; + + public PointShape(List<S2Point> point) { + this.point = point; + } + + public int num_points() { + return point.size(); + } + + @Override + public int numEdges() { + return num_points(); + } + + @Override + public void getEdge(int index, MutableEdge result) { + if (index != 0) { + throw new IndexOutOfBoundsException("PointShape has exactly one edge"); + } + result.set(point.get(index), point.get(index)); + } + + public S2Edge edge(int e) { + return new S2Edge(point.get(e), point.get(e)); + } + + @Override + public int dimension() { + return 0; + } + + @Override + public boolean hasInterior() { + return false; + } + + @Override + public boolean containsOrigin() { + return false; + } + + @Override + public ReferencePoint getReferencePoint() { + // hasInterior=false, contained()=false + return ReferencePoint.create(point.get(0), false); + } + + @Override + public int numChains() { + return 1; + } + + @Override + public int getChainStart(int chainId) { + if (chainId != 0) { + throw new IndexOutOfBoundsException("PointShape has exactly one chain"); + } + return 0; + } + + @Override + public int getChainLength(int chainId) { + if (chainId != 0) { + throw new IndexOutOfBoundsException("PointShape has exactly one chain"); + } + return 1; + } + + @Override + public void getChainEdge(int chainId, int offset, MutableEdge result) { + if (chainId != 0 || offset != 0) { + throw new IndexOutOfBoundsException("PointShape chainId and offset must both be 0"); + } + result.set(point.get(chainId), point.get(chainId)); + } + + @Override + public S2Point getChainVertex(int chainId, int offset) { + if (chainId != 0 || offset != 0) { + throw new IndexOutOfBoundsException("PointShape chainId and offset must both be 0"); + } + return point.get(chainId); + } Review Comment: numChains should return the number of points, all other getChain* methods should be adjusted accordingly. ########## common/src/main/java/org/apache/sedona/common/S2Geography/PointShapeCoders.java: ########## @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import com.google.common.geometry.S2Coder; +import com.google.common.geometry.S2Point; +import java.lang.reflect.Field; + +public class PointShapeCoders { Review Comment: The latest `org.datasyslab:s2-geometry-library` has `S2Point.Shape.FAST_CODER` and `COMPACT_CODER` as public. This utility class for extracting non-public static fields is no longer needed. `CountingPointVectorCoder` can be removed as well. ########## common/src/main/java/org/apache/sedona/common/S2Geography/PointGeography.java: ########## @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import com.google.common.geometry.*; +import java.io.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +public class PointGeography extends S2Geography { + // Underlying list of points + private final List<S2Point> points = new ArrayList<>(); + + /** Constructs an empty PointGeography. */ + public PointGeography() { + super(GeographyKind.POINT); + } + + /** Constructs a single-point geography. */ + public PointGeography(S2Point point) { + this(); + points.add(point); + } + + /** Constructs from a list of points. */ + public PointGeography(List<S2Point> pts) { + this(); + points.addAll(pts); + } + + @Override + public int dimension() { + // Points are 0-dimensional (or -1 if empty) + return points.isEmpty() ? -1 : 0; + } + + @Override + public int numShapes() { + // Represent all points as a single composite shape + return points.isEmpty() ? 0 : 1; + } + + @Override + public S2Shape shape(int id) { + if (numShapes() == 0) { + throw new IllegalStateException("No shapes in empty PointGeography"); + } + if (id != 0) { + throw new IllegalArgumentException("Shape id out of bounds: " + id); + } + return new PointShape(points); + } + + @Override + public S2Region region() { + if (points.isEmpty()) { + return S2Cap.empty(); + } else if (points.size() == 1) { + return new S2PointRegion(points.get(0)); + } else { + // Union of all point regions + Collection<S2Region> pointRegionCollection = new ArrayList<>(); + for (S2Point p : points) { + pointRegionCollection.add(new S2PointRegion(p)); + } + S2RegionUnion union = new S2RegionUnion(pointRegionCollection); + return union; + } + } + + @Override + public void getCellUnionBound(List<S2CellId> cellIds) { + if (points.size() < 10) { + // For small point sets, cover each point individually + for (S2Point p : points) { + cellIds.add(S2CellId.fromPoint(p)); + } + } else { + // Fallback to the default covering logic in S2Geography + super.getCellUnionBound(cellIds); + } + } + + /** Returns an immutable view of the points. */ + public List<S2Point> getPoints() { + return Collections.unmodifiableList(points); + } + + // ------------------------------------------------------- + // EncodeTagged / DecodeTagged + // ------------------------------------------------------- + + @Override + public void encodeTagged(OutputStream os, EncodeOptions opts) throws IOException { + DataOutputStream out = new DataOutputStream(os); + + // CELL_CENTER path + if (points.size() == 1 && opts.getCodingHint() == EncodeOptions.CodingHint.COMPACT) { + S2CellId cid = S2CellId.fromPoint(points.get(0)); + if (cid.level() >= 23) { + out.writeByte(S2Geography.GeographyKind.CELL_CENTER.getKind()); + out.writeByte(0); + out.writeByte(1); + out.writeByte(0); + out.writeLong(cid.id()); + return; + } + } + + // EMPTY path + if (points.isEmpty()) { + EncodeTag tag = new EncodeTag(); + tag.setKind(GeographyKind.POINT); + tag.setFlags((byte) (tag.getFlags() | EncodeTag.FLAG_EMPTY)); + tag.setCoveringSize((byte) 0); + tag.encode(out); + return; + } + + // header POINT + List<S2CellId> cover = new ArrayList<>(); + EncodeTag tag = new EncodeTag(); + tag.setKind(GeographyKind.POINT); + if (opts.isIncludeCovering()) getCellUnionBound(cover); + tag.setCoveringSize((byte) cover.size()); + tag.encode(out); + for (var c2 : cover) out.writeLong(c2.id()); + + // payload + S2Point.Shape shp = S2Point.Shape.fromList(points); + if (opts.getCodingHint() == EncodeOptions.CodingHint.FAST) { + CountingPointVectorCoder.INSTANCE.encode(shp, out); + } else { + PointShapeCoders.COMPACT.encode(shp, out); + } Review Comment: The empty points or other cases can fallback to parent class's implementation (the C++ implementation uses `Geography::EncodeTagged(encoder, options)` for doing this). We just need to implement `encode()` here. The parent class's implementation will call our `encode` in `encodeTagged`. ########## common/src/main/java/org/apache/sedona/common/S2Geography/PointShape.java: ########## @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import com.google.common.geometry.*; +import com.google.common.geometry.S2Edge; +import com.google.common.geometry.S2Point; +import com.google.common.geometry.S2Shape; +import java.util.List; + +/** + * Java equivalent of the C++ S2PointVectorShape: each point is a degenerate edge (start==end), one + * chain per point. + */ +/** A degenerate S2Shape that represents exactly one point. */ Review Comment: `PointShape` does not represent exactly one point. It represents a list of points. Please refer to [`S2PointVectorShape`](https://github.com/google/s2geometry/blob/v0.12.0/src/s2/s2point_vector_shape.h#L32-L37) for details. ########## common/src/main/java/org/apache/sedona/common/S2Geography/S2Geography.java: ########## @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import static org.apache.sedona.common.S2Geography.S2Geography.GeographyKind.POINT; + +import com.google.common.geometry.*; +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** + * An abstract class represent S2Geography. Has 6 subtypes of geography: POINT, POLYLINE, POLYGON, + * GEOGRAPHY_COLLECTION, SHAPE_INDEX, ENCODED_SHAPE_INDEX. + */ +public class S2Geography { + protected final GeographyKind kind; + + protected S2Geography(GeographyKind kind) { + this.kind = kind; + } + + public enum GeographyKind { + UNINITIALIZED(0), + POINT(1), + POLYLINE(2), + POLYGON(3), + GEOGRAPHY_COLLECTION(4), + SHAPE_INDEX(5), + ENCODED_SHAPE_INDEX(6), + CELL_CENTER(7); + + private final int kind; + + GeographyKind(int kind) { + this.kind = kind; + } + + /** Returns the integer tag for this kind. */ + public int getKind() { + return kind; + } + /** + * Look up the enum by its integer tag. + * + * @throws IllegalArgumentException if no matching kind exists. + */ + public static GeographyKind fromKind(int kind) { + for (GeographyKind k : values()) { + if (k.getKind() == kind) return k; + } + throw new IllegalArgumentException("Unknown GeographyKind: " + kind); + } + } + /** + * @return 0, 1, or 2 if all Shape()s that are returned will have the same dimension (i.e., they + * are all points, all lines, or all polygons). + */ + public int dimension() { + if (numShapes() == 0) return -1; + int dim = shape(0).dimension(); + for (int i = 1; i < numShapes(); ++i) { + if (dim != shape(i).dimension()) return -1; + } + return dim; + } + + /** + * @return The number of S2Shape objects needed to represent this Geography + */ + public int numShapes() { + return 0; + } + + /** + * Returns the given S2Shape (where 0 <= id < num_shapes()). The caller retains ownership of the + * S2Shape but the data pointed to by the object requires that the underlying Geography outlives + * the returned object. + * + * @param id (where 0 <= id < num_shapes()) + * @return the given S2Shape + */ + public S2Shape shape(int id) { + return null; + } + + /** + * Returns an S2Region that represents the object. The caller retains ownership of the S2Region + * but the data pointed to by the object requires that the underlying Geography outlives the + * returned object. + * + * @return S2Region + */ + public S2Region region() { + return null; + } + + /** + * Adds an unnormalized set of S2CellIDs to `cell_ids`. This is intended to be faster than using + * Region().GetCovering() directly and to return a small number of cells that can be used to + * compute a possible intersection quickly. + */ + public void getCellUnionBound(List<S2CellId> cellIds) { + // Build a shape index of all shapes in this geography + S2ShapeIndex index = new S2ShapeIndex(); + for (int i = 0; i < numShapes(); i++) { + index.add(shape(i)); + } + // Create a region from the index and delegate covering + S2ShapeIndexRegion region = new S2ShapeIndexRegion(index); + region.getCellUnionBound(cellIds); + } + + // ─── Encoding / decoding machinery ──────────────────────────────────────────── + /** + * Serialize this geography to an encoder. This does not include any encapsulating information + * (e.g., which geography type or flags). Encode this geography into a stream as: 1) a 4-byte + * EncodeTag header (see EncodeTag encode / decode) 2) coveringSize × 8-byte cell-ids 3) the raw + * shape payload (point/polyline/polygon) via the built-in coder + * + * @param options CodingHint.FAST / CodingHint.COMPACT + */ + public void encodeTagged(OutputStream outStream, EncodeOptions options) throws IOException { + DataOutputStream out = new DataOutputStream(outStream); + + // 1) build + write tag header + EncodeTag tag = new EncodeTag(); + tag.setKind(kind); + if (numShapes() == 0) { + tag.setFlags((byte) (tag.getFlags() | EncodeTag.FLAG_EMPTY)); + } + // compute covering if requested + List<S2CellId> cover = new ArrayList<>(); + if (options.isIncludeCovering()) { + getCellUnionBound(cover); + } + tag.setCoveringSize((byte) cover.size()); + tag.encode(out); + + // 2) write each cell-id + for (S2CellId cid : cover) { + out.writeLong(cid.id()); + } + + // 3) encode by GeographyKind + switch (kind) { + case POINT: + ((PointGeography) this).encodeTagged(out, options); + break; + + // TODO: handle POLYLINE, POLYGON, etc. + default: + throw new IllegalArgumentException("encodeTagged not implemented for kind=" + kind); + } Review Comment: We can directly call `this.encode(out)` here. Subclasses will implement the `encode` method to encode itself without tag. Explicit dispatching is not needed. ########## common/src/main/java/org/apache/sedona/common/S2Geography/S2Geography.java: ########## @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import static org.apache.sedona.common.S2Geography.S2Geography.GeographyKind.POINT; + +import com.google.common.geometry.*; +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** + * An abstract class represent S2Geography. Has 6 subtypes of geography: POINT, POLYLINE, POLYGON, + * GEOGRAPHY_COLLECTION, SHAPE_INDEX, ENCODED_SHAPE_INDEX. + */ +public class S2Geography { + protected final GeographyKind kind; + + protected S2Geography(GeographyKind kind) { + this.kind = kind; + } + + public enum GeographyKind { + UNINITIALIZED(0), + POINT(1), + POLYLINE(2), + POLYGON(3), + GEOGRAPHY_COLLECTION(4), + SHAPE_INDEX(5), + ENCODED_SHAPE_INDEX(6), + CELL_CENTER(7); + + private final int kind; + + GeographyKind(int kind) { + this.kind = kind; + } + + /** Returns the integer tag for this kind. */ + public int getKind() { + return kind; + } + /** + * Look up the enum by its integer tag. + * + * @throws IllegalArgumentException if no matching kind exists. + */ + public static GeographyKind fromKind(int kind) { + for (GeographyKind k : values()) { + if (k.getKind() == kind) return k; + } + throw new IllegalArgumentException("Unknown GeographyKind: " + kind); + } + } + /** + * @return 0, 1, or 2 if all Shape()s that are returned will have the same dimension (i.e., they + * are all points, all lines, or all polygons). + */ + public int dimension() { + if (numShapes() == 0) return -1; + int dim = shape(0).dimension(); + for (int i = 1; i < numShapes(); ++i) { + if (dim != shape(i).dimension()) return -1; + } + return dim; + } + + /** + * @return The number of S2Shape objects needed to represent this Geography + */ + public int numShapes() { + return 0; + } + + /** + * Returns the given S2Shape (where 0 <= id < num_shapes()). The caller retains ownership of the + * S2Shape but the data pointed to by the object requires that the underlying Geography outlives + * the returned object. + * + * @param id (where 0 <= id < num_shapes()) + * @return the given S2Shape + */ + public S2Shape shape(int id) { + return null; + } + + /** + * Returns an S2Region that represents the object. The caller retains ownership of the S2Region + * but the data pointed to by the object requires that the underlying Geography outlives the + * returned object. + * + * @return S2Region + */ + public S2Region region() { + return null; + } + + /** + * Adds an unnormalized set of S2CellIDs to `cell_ids`. This is intended to be faster than using + * Region().GetCovering() directly and to return a small number of cells that can be used to + * compute a possible intersection quickly. + */ + public void getCellUnionBound(List<S2CellId> cellIds) { + // Build a shape index of all shapes in this geography + S2ShapeIndex index = new S2ShapeIndex(); + for (int i = 0; i < numShapes(); i++) { + index.add(shape(i)); + } + // Create a region from the index and delegate covering + S2ShapeIndexRegion region = new S2ShapeIndexRegion(index); + region.getCellUnionBound(cellIds); + } + + // ─── Encoding / decoding machinery ──────────────────────────────────────────── + /** + * Serialize this geography to an encoder. This does not include any encapsulating information + * (e.g., which geography type or flags). Encode this geography into a stream as: 1) a 4-byte + * EncodeTag header (see EncodeTag encode / decode) 2) coveringSize × 8-byte cell-ids 3) the raw + * shape payload (point/polyline/polygon) via the built-in coder + * + * @param options CodingHint.FAST / CodingHint.COMPACT + */ + public void encodeTagged(OutputStream outStream, EncodeOptions options) throws IOException { + DataOutputStream out = new DataOutputStream(outStream); + + // 1) build + write tag header + EncodeTag tag = new EncodeTag(); + tag.setKind(kind); + if (numShapes() == 0) { + tag.setFlags((byte) (tag.getFlags() | EncodeTag.FLAG_EMPTY)); + } + // compute covering if requested + List<S2CellId> cover = new ArrayList<>(); + if (options.isIncludeCovering()) { + getCellUnionBound(cover); + } + tag.setCoveringSize((byte) cover.size()); + tag.encode(out); + + // 2) write each cell-id + for (S2CellId cid : cover) { + out.writeLong(cid.id()); + } + + // 3) encode by GeographyKind + switch (kind) { + case POINT: + ((PointGeography) this).encodeTagged(out, options); + break; + + // TODO: handle POLYLINE, POLYGON, etc. + default: + throw new IllegalArgumentException("encodeTagged not implemented for kind=" + kind); + } + + out.flush(); + } + + /** + * Reads a tagged geography from the stream (header + covering + payload). Dispatches to the right + * subclass decoder. + */ + public static S2Geography decodeTagged(DataInputStream is) throws IOException { + DataInputStream in = new DataInputStream(is); + + // 1) Read the full 4-byte header: + int kindVal = in.readUnsignedByte(); + byte flags = (byte) in.readUnsignedByte(); + int coverCount = in.readUnsignedByte(); + int reserved = in.readUnsignedByte(); + if (reserved != 0) { + throw new IOException("Reserved header byte must be 0, was " + reserved); + } + + GeographyKind kind = GeographyKind.fromKind(kindVal); + EncodeTag tag = new EncodeTag(); + tag.setKind(kind); + tag.setFlags(flags); + tag.setCoveringSize((byte) coverCount); + + // 2) If CELL_CENTER, read exactly one ID *as* payload, and return immediately: + if (kind == GeographyKind.CELL_CENTER) { + long id = in.readLong(); + return new PointGeography(new S2CellId(id).toPoint()); + } + + // 2) Skip any covering IDs + for (int i = 0; i < coverCount; i++) { + in.readLong(); + } + + // 3) Dispatch to the payload decoder + switch (kind) { + case POINT: + return PointGeography.decodeTagged(in, tag); + case POLYLINE: + return PolylineGeography.decodeTagged(in, tag); + case POLYGON: + return PolygonGeography.decodeTagged(in, tag); + default: + throw new IllegalArgumentException("Unsupported kind " + kind); + } + } +} Review Comment: We should declare `encode` and `decode` functions for encoding/decoding geography objects without tags. ########## common/src/main/java/org/apache/sedona/common/S2Geography/S2Geography.java: ########## @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import static org.apache.sedona.common.S2Geography.S2Geography.GeographyKind.POINT; + +import com.google.common.geometry.*; +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** + * An abstract class represent S2Geography. Has 6 subtypes of geography: POINT, POLYLINE, POLYGON, + * GEOGRAPHY_COLLECTION, SHAPE_INDEX, ENCODED_SHAPE_INDEX. + */ +public class S2Geography { + protected final GeographyKind kind; + + protected S2Geography(GeographyKind kind) { + this.kind = kind; + } + + public enum GeographyKind { + UNINITIALIZED(0), + POINT(1), + POLYLINE(2), + POLYGON(3), + GEOGRAPHY_COLLECTION(4), + SHAPE_INDEX(5), + ENCODED_SHAPE_INDEX(6), + CELL_CENTER(7); + + private final int kind; + + GeographyKind(int kind) { + this.kind = kind; + } + + /** Returns the integer tag for this kind. */ + public int getKind() { + return kind; + } + /** + * Look up the enum by its integer tag. + * + * @throws IllegalArgumentException if no matching kind exists. + */ + public static GeographyKind fromKind(int kind) { + for (GeographyKind k : values()) { + if (k.getKind() == kind) return k; + } + throw new IllegalArgumentException("Unknown GeographyKind: " + kind); + } + } + /** + * @return 0, 1, or 2 if all Shape()s that are returned will have the same dimension (i.e., they + * are all points, all lines, or all polygons). + */ + public int dimension() { + if (numShapes() == 0) return -1; + int dim = shape(0).dimension(); + for (int i = 1; i < numShapes(); ++i) { + if (dim != shape(i).dimension()) return -1; + } + return dim; + } + + /** + * @return The number of S2Shape objects needed to represent this Geography + */ + public int numShapes() { + return 0; + } + + /** + * Returns the given S2Shape (where 0 <= id < num_shapes()). The caller retains ownership of the + * S2Shape but the data pointed to by the object requires that the underlying Geography outlives + * the returned object. + * + * @param id (where 0 <= id < num_shapes()) + * @return the given S2Shape + */ + public S2Shape shape(int id) { + return null; + } + + /** + * Returns an S2Region that represents the object. The caller retains ownership of the S2Region + * but the data pointed to by the object requires that the underlying Geography outlives the + * returned object. + * + * @return S2Region + */ + public S2Region region() { + return null; + } + + /** + * Adds an unnormalized set of S2CellIDs to `cell_ids`. This is intended to be faster than using + * Region().GetCovering() directly and to return a small number of cells that can be used to + * compute a possible intersection quickly. + */ + public void getCellUnionBound(List<S2CellId> cellIds) { + // Build a shape index of all shapes in this geography + S2ShapeIndex index = new S2ShapeIndex(); + for (int i = 0; i < numShapes(); i++) { + index.add(shape(i)); + } + // Create a region from the index and delegate covering + S2ShapeIndexRegion region = new S2ShapeIndexRegion(index); + region.getCellUnionBound(cellIds); + } + + // ─── Encoding / decoding machinery ──────────────────────────────────────────── + /** + * Serialize this geography to an encoder. This does not include any encapsulating information + * (e.g., which geography type or flags). Encode this geography into a stream as: 1) a 4-byte + * EncodeTag header (see EncodeTag encode / decode) 2) coveringSize × 8-byte cell-ids 3) the raw + * shape payload (point/polyline/polygon) via the built-in coder + * + * @param options CodingHint.FAST / CodingHint.COMPACT + */ + public void encodeTagged(OutputStream outStream, EncodeOptions options) throws IOException { + DataOutputStream out = new DataOutputStream(outStream); + + // 1) build + write tag header + EncodeTag tag = new EncodeTag(); + tag.setKind(kind); + if (numShapes() == 0) { + tag.setFlags((byte) (tag.getFlags() | EncodeTag.FLAG_EMPTY)); + } + // compute covering if requested + List<S2CellId> cover = new ArrayList<>(); + if (options.isIncludeCovering()) { + getCellUnionBound(cover); + } + tag.setCoveringSize((byte) cover.size()); Review Comment: We should skip encoding the covering when `cover.size() > 256`, since it does not fit in the 8-bit `coveringSize`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
