This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new ae3b398af7 [SEDONA-671] Implement Spider spatial data generator (#1680)
ae3b398af7 is described below

commit ae3b398af74c400fe6c0e31b63f56d46f5742162
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Tue Nov 12 11:58:34 2024 +0800

    [SEDONA-671] Implement Spider spatial data generator (#1680)
    
    * Implement Spider spatial data generator
    
    * Add documentation and python tests
---
 .../apache/sedona/common/spider/BitGenerator.java  |  79 ++++
 .../sedona/common/spider/DiagonalGenerator.java    |  81 ++++
 .../sedona/common/spider/GaussianGenerator.java    |  36 ++
 .../org/apache/sedona/common/spider/Generator.java |  42 ++
 .../sedona/common/spider/GeneratorFactory.java     |  57 +++
 .../sedona/common/spider/ParcelGenerator.java      | 194 ++++++++++
 .../sedona/common/spider/PointBasedGenerator.java  | 234 ++++++++++++
 .../sedona/common/spider/SierpinskiGenerator.java  |  74 ++++
 .../sedona/common/spider/UniformGenerator.java     |  34 ++
 .../apache/sedona/common/spider/GeneratorTest.java | 425 +++++++++++++++++++++
 docs/api/sql/Spider.md                             | 271 +++++++++++++
 docs/image/spider/spider-bit.png                   | Bin 0 -> 3777 bytes
 docs/image/spider/spider-diagonal.png              | Bin 0 -> 4223 bytes
 docs/image/spider/spider-gaussian.png              | Bin 0 -> 31518 bytes
 docs/image/spider/spider-parcel.png                | Bin 0 -> 8647 bytes
 docs/image/spider/spider-quickstart.png            | Bin 0 -> 105979 bytes
 docs/image/spider/spider-sierpinski.png            | Bin 0 -> 8612 bytes
 docs/image/spider/spider-uniform-affine.png        | Bin 0 -> 8157 bytes
 docs/image/spider/spider-uniform.png               | Bin 0 -> 10217 bytes
 mkdocs.yml                                         |   1 +
 python/tests/sql/test_spider.py                    |  73 ++++
 ...org.apache.spark.sql.sources.DataSourceRegister |   1 +
 .../sql/datasources/spider/AffineTransform.scala   |  83 ++++
 .../sql/datasources/spider/SpiderDataSource.scala  |  67 ++++
 .../sql/datasources/spider/SpiderPartition.scala   |  31 ++
 .../datasources/spider/SpiderPartitionReader.scala |  53 +++
 .../sql/datasources/spider/SpiderScanBuilder.scala | 134 +++++++
 .../sql/datasources/spider/SpiderTable.scala       |  54 +++
 .../scala/org/apache/sedona/sql/SpiderTests.scala  | 316 +++++++++++++++
 29 files changed, 2340 insertions(+)

diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/BitGenerator.java 
b/common/src/main/java/org/apache/sedona/common/spider/BitGenerator.java
new file mode 100644
index 0000000000..d42c81f14b
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/spider/BitGenerator.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Map;
+import java.util.Random;
+import org.locationtech.jts.geom.Coordinate;
+
+/** Generates geometries that are distributed according to the Bit 
distribution */
+public class BitGenerator extends PointBasedGenerator {
+
+  public static class BitParameter {
+    public final PointBasedParameter pointBasedParameter;
+
+    /** The probability of setting a bit */
+    public final double probability;
+
+    /** Number of digits in the generated data */
+    public final int digits;
+
+    public BitParameter(PointBasedParameter pointBasedParameter, double 
probability, int digits) {
+      if (probability < 0 || probability > 1) {
+        throw new IllegalArgumentException("Probability must be between 0 and 
1");
+      }
+      if (digits <= 0) {
+        throw new IllegalArgumentException("Digits must be a positive 
integer");
+      }
+      this.pointBasedParameter = pointBasedParameter;
+      this.probability = probability;
+      this.digits = digits;
+    }
+
+    public static BitParameter create(Map<String, String> conf) {
+      PointBasedParameter pointBasedParameter = 
PointBasedParameter.create(conf);
+      double probability = Double.parseDouble(conf.getOrDefault("probability", 
"0.2"));
+      int digits = Integer.parseInt(conf.getOrDefault("digits", "10"));
+      return new BitParameter(pointBasedParameter, probability, digits);
+    }
+  }
+
+  private final BitParameter parameter;
+
+  public BitGenerator(Random random, BitParameter bitParameter) {
+    super(random, bitParameter.pointBasedParameter);
+    this.parameter = bitParameter;
+  }
+
+  @Override
+  protected Coordinate generateCoordinate() {
+    double x = generateCoordinateValue();
+    double y = generateCoordinateValue();
+    return new Coordinate(x, y);
+  }
+
+  private double generateCoordinateValue() {
+    double n = 0.0;
+    for (int i = 1; i <= parameter.digits; i++) {
+      double bit = bernoulli(parameter.probability);
+      n += bit / (1 << i);
+    }
+    return n;
+  }
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/DiagonalGenerator.java 
b/common/src/main/java/org/apache/sedona/common/spider/DiagonalGenerator.java
new file mode 100644
index 0000000000..42eac79872
--- /dev/null
+++ 
b/common/src/main/java/org/apache/sedona/common/spider/DiagonalGenerator.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Map;
+import java.util.Random;
+import org.locationtech.jts.geom.Coordinate;
+
+/** Generates geometries that are distributed according to the Diagonal 
distribution */
+public class DiagonalGenerator extends PointBasedGenerator {
+
+  public static class DiagonalParameter {
+    public final PointBasedParameter pointBasedParameter;
+
+    /** The percentage of records that are perfectly on the diagonal */
+    public final double percentage;
+
+    /** For points not exactly on the diagonal, the buffer in which they are 
dispersed */
+    public final double buffer;
+
+    public DiagonalParameter(
+        PointBasedParameter pointBasedParameter, double percentage, double 
buffer) {
+      this.pointBasedParameter = pointBasedParameter;
+      this.percentage = percentage;
+      this.buffer = buffer;
+    }
+
+    public static DiagonalParameter create(Map<String, String> conf) {
+      PointBasedParameter pointBasedParameter = 
PointBasedParameter.create(conf);
+      double percentage = Double.parseDouble(conf.getOrDefault("percentage", 
"0.5"));
+      double buffer = Double.parseDouble(conf.getOrDefault("buffer", "0.5"));
+      if (percentage < 0 || percentage > 1) {
+        throw new IllegalArgumentException("Percentage must be between 0 and 
1");
+      }
+      if (buffer < 0) {
+        throw new IllegalArgumentException("Buffer must be a non-negative 
number");
+      }
+      return new DiagonalParameter(pointBasedParameter, percentage, buffer);
+    }
+  }
+
+  private static final double INV_SQRT2 = 1 / Math.sqrt(2);
+
+  private final DiagonalParameter parameter;
+
+  public DiagonalGenerator(Random random, DiagonalParameter diagonalParameter) 
{
+    super(random, diagonalParameter.pointBasedParameter);
+    this.parameter = diagonalParameter;
+  }
+
+  @Override
+  protected Coordinate generateCoordinate() {
+    if (bernoulli(parameter.percentage) == 1) {
+      double position = uniform(0, 1);
+      return new Coordinate(position, position);
+    } else {
+      double c = uniform(0, 1);
+      double d = normal(0, parameter.buffer / 5);
+      double displacement = d * INV_SQRT2;
+      double x = c + displacement;
+      double y = c - displacement;
+      return new Coordinate(x, y);
+    }
+  }
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/GaussianGenerator.java 
b/common/src/main/java/org/apache/sedona/common/spider/GaussianGenerator.java
new file mode 100644
index 0000000000..59302cc8bb
--- /dev/null
+++ 
b/common/src/main/java/org/apache/sedona/common/spider/GaussianGenerator.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Random;
+import org.locationtech.jts.geom.Coordinate;
+
+/** Generates geometries that are distributed according to the Gaussian 
distribution */
+public class GaussianGenerator extends PointBasedGenerator {
+  public GaussianGenerator(Random random, PointBasedParameter 
pointBasedParameter) {
+    super(random, pointBasedParameter);
+  }
+
+  @Override
+  protected Coordinate generateCoordinate() {
+    double x = normal(0.5, 0.1);
+    double y = normal(0.5, 0.1);
+    return new Coordinate(x, y);
+  }
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/Generator.java 
b/common/src/main/java/org/apache/sedona/common/spider/Generator.java
new file mode 100644
index 0000000000..16fed4d302
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/spider/Generator.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.GeometryFactory;
+
+/**
+ * A generator is an iterator that generates random geometries. The actual 
implementation of the
+ * generator is defined in the subclasses. You can create an instance of the 
generator by calling
+ * the factory method {@link GeneratorFactory#create(String, Random, Map)} 
with the generator name,
+ * a random number generator, and the configuration.
+ *
+ * <p>The idea and algorithms of this generator comes from this publication:
+ *
+ * <pre>
+ * Puloma Katiyar, Tin Vu, Sara Migliorini, Alberto Belussi, Ahmed Eldawy.
+ * "SpiderWeb: A Spatial Data Generator on the Web", ACM SIGSPATIAL 2020, 
Seattle, WA
+ * </pre>
+ */
+public interface Generator extends Iterator<Geometry> {
+  GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/GeneratorFactory.java 
b/common/src/main/java/org/apache/sedona/common/spider/GeneratorFactory.java
new file mode 100644
index 0000000000..36bce9a3dd
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/spider/GeneratorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Map;
+import java.util.Random;
+import org.apache.sedona.common.spider.PointBasedGenerator.PointBasedParameter;
+
+/** A factory class for creating instances of {@link Generator} */
+public class GeneratorFactory {
+  private GeneratorFactory() {}
+
+  /**
+   * Creates an instance of {@link Generator} with the given name, random 
number generator, and
+   * configuration.
+   *
+   * @param name the name of the generator
+   * @param random the random number generator
+   * @param parameters the configuration
+   * @return an instance of {@link Generator}
+   */
+  public static Generator create(String name, Random random, Map<String, 
String> parameters) {
+    switch (name) {
+      case "uniform":
+        return new UniformGenerator(random, 
PointBasedParameter.create(parameters));
+      case "gaussian":
+        return new GaussianGenerator(random, 
PointBasedParameter.create(parameters));
+      case "diagonal":
+        return new DiagonalGenerator(
+            random, DiagonalGenerator.DiagonalParameter.create(parameters));
+      case "bit":
+        return new BitGenerator(random, 
BitGenerator.BitParameter.create(parameters));
+      case "sierpinski":
+        return new SierpinskiGenerator(random, 
PointBasedParameter.create(parameters));
+      case "parcel":
+        return new ParcelGenerator(random, 
ParcelGenerator.ParcelParameter.create(parameters));
+      default:
+        throw new IllegalArgumentException("Unknown generator: " + name);
+    }
+  }
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/ParcelGenerator.java 
b/common/src/main/java/org/apache/sedona/common/spider/ParcelGenerator.java
new file mode 100644
index 0000000000..85bd1484e9
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/spider/ParcelGenerator.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Random;
+import org.locationtech.jts.geom.Envelope;
+import org.locationtech.jts.geom.Geometry;
+
+/** Generate boxes that are non-overlapping and fill up the unit square. */
+public class ParcelGenerator implements Generator {
+
+  public static class ParcelParameter {
+    /** The number of boxes to generate */
+    public final long cardinality;
+
+    /** The amount of dithering as a ratio of the side length. Allowed range 
[0, 1] */
+    public final double dither;
+
+    /**
+     * The allowed range for splitting boxes. Allowed range [0.0, 0.5] 0.0 
means all values are
+     * allowed. 0.5 means always split in half.
+     */
+    public final double splitRange;
+
+    public ParcelParameter(long cardinality, double dither, double splitRange) 
{
+      if (cardinality < 0) {
+        throw new IllegalArgumentException("cardinality must be non-negative");
+      }
+      if (dither < 0 || dither > 1) {
+        throw new IllegalArgumentException("dither must be in the range [0, 
1]");
+      }
+      if (splitRange < 0 || splitRange > 0.5) {
+        throw new IllegalArgumentException("splitRange must be in the range 
[0, 0.5]");
+      }
+      this.cardinality = cardinality;
+      this.dither = dither;
+      this.splitRange = splitRange;
+    }
+
+    public static ParcelParameter create(Map<String, String> conf) {
+      long cardinality = Long.parseLong(conf.getOrDefault("cardinality", 
"100"));
+      double dither = Double.parseDouble(conf.getOrDefault("dither", "0.5"));
+      double splitRange = Double.parseDouble(conf.getOrDefault("splitRange", 
"0.5"));
+      return new ParcelParameter(cardinality, dither, splitRange);
+    }
+  }
+
+  private final Random random;
+
+  private long iRecord = 0;
+  private final long cardinality;
+  private final double splitRange;
+  private final double dither;
+
+  private static class BoxWithLevel {
+    public final Envelope envelope;
+    public final int level;
+
+    public BoxWithLevel(Envelope envelope, int level) {
+      this.envelope = envelope;
+      this.level = level;
+    }
+  }
+
+  /** A stack of boxes to split. Each pair represents the level and the box */
+  private List<BoxWithLevel> boxesToSplit;
+
+  /**
+   * The level of the deepest box to generate is &lceil; 
log<sub>2</sub>(n)&rceil; =
+   * &lfloor;log<sub>2</sub>(n-1)&rfloor; + 1
+   */
+  private final int maxDepth;
+
+  /**
+   * Number of boxes that will be generated at the deepest level (maxDepth). 
The remaining records
+   * will be generated at level maxDepth - 1
+   */
+  private final long numBoxesMaxDepth;
+
+  public ParcelGenerator(Random random, ParcelParameter parameter) {
+    this.random = random;
+
+    this.cardinality = parameter.cardinality;
+    this.splitRange = parameter.splitRange;
+    this.dither = parameter.dither;
+
+    this.boxesToSplit = new ArrayList<>();
+    this.boxesToSplit.add(new BoxWithLevel(new Envelope(0, 1, 0, 1), 0));
+
+    this.maxDepth = 64 - Long.numberOfLeadingZeros(cardinality - 1);
+    this.numBoxesMaxDepth = 2 * cardinality - (1L << maxDepth);
+  }
+
+  /** Generates a box by first generating a point and building a box around it 
*/
+  public Envelope generateBox() {
+    assert !boxesToSplit.isEmpty();
+    assert iRecord < cardinality;
+    BoxWithLevel boxWithLevel = boxesToSplit.remove(boxesToSplit.size() - 1);
+    int level = boxWithLevel.level;
+    Envelope box = boxWithLevel.envelope;
+
+    while (true) {
+      if (level == maxDepth || (level == maxDepth - 1 && iRecord >= 
numBoxesMaxDepth)) {
+        // Box is final. Return it
+        ditherBox(box);
+        iRecord += 1;
+        return box;
+      } else {
+        // Split the box into two
+        Envelope[] splitBoxes = splitBox(box);
+        boxesToSplit.add(new BoxWithLevel(splitBoxes[1], level + 1));
+        // Update the level and box for the next iteration
+        level = level + 1;
+        box = splitBoxes[0];
+      }
+    }
+  }
+
+  /**
+   * Split the given box into two according to the splitRange value. This 
function always splits the
+   * box along the longest side. Let's assume the longest side has a length l, 
the split will happen
+   * at l * uniform(splitRange, 1-splitRange).
+   */
+  private Envelope[] splitBox(Envelope box) {
+    boolean splitX = box.getWidth() > box.getHeight();
+    if (splitX) {
+      double splitPoint =
+          box.getMinX()
+              + box.getWidth() * (splitRange + random.nextDouble() * (1 - 2 * 
splitRange));
+      return new Envelope[] {
+        new Envelope(box.getMinX(), splitPoint, box.getMinY(), box.getMaxY()),
+        new Envelope(splitPoint, box.getMaxX(), box.getMinY(), box.getMaxY())
+      };
+    } else {
+      double splitPoint =
+          box.getMinY()
+              + box.getHeight() * (splitRange + random.nextDouble() * (1 - 2 * 
splitRange));
+      return new Envelope[] {
+        new Envelope(box.getMinX(), box.getMaxX(), box.getMinY(), splitPoint),
+        new Envelope(box.getMinX(), box.getMaxX(), splitPoint, box.getMaxY())
+      };
+    }
+  }
+
+  /**
+   * Change the size of the given box along all dimensions according to the 
dither parameter. The
+   * amount of change on the side length is a uniformly random variable 
between [0, dither). This
+   * means that if the dither parameter is zero, the box will not be changed. 
The center of the box
+   * remains fixed while dither
+   */
+  private void ditherBox(Envelope box) {
+    double changeX = random.nextDouble() * dither * box.getWidth();
+    double changeY = random.nextDouble() * dither * box.getHeight();
+
+    box.init(
+        box.getMinX() + changeX / 2,
+        box.getMaxX() - changeX / 2,
+        box.getMinY() + changeY / 2,
+        box.getMaxY() - changeY / 2);
+  }
+
+  @Override
+  public boolean hasNext() {
+    return iRecord < cardinality;
+  }
+
+  @Override
+  public Geometry next() {
+    if (!hasNext()) {
+      throw new NoSuchElementException("No more parcels to generate");
+    }
+    return GEOMETRY_FACTORY.toGeometry(generateBox());
+  }
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/PointBasedGenerator.java 
b/common/src/main/java/org/apache/sedona/common/spider/PointBasedGenerator.java
new file mode 100644
index 0000000000..0e941a345f
--- /dev/null
+++ 
b/common/src/main/java/org/apache/sedona/common/spider/PointBasedGenerator.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Arrays;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import org.apache.sedona.common.enums.GeometryType;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.Point;
+import org.locationtech.jts.geom.Polygon;
+
+abstract class PointBasedGenerator implements Generator {
+
+  public static class PointBasedParameter {
+    public final GeometryType geometryType;
+    public final RandomBoxParameter boxParameter;
+    public final RandomPolygonParameter polygonParameter;
+
+    public PointBasedParameter(
+        GeometryType geometryType,
+        RandomBoxParameter boxParameter,
+        RandomPolygonParameter polygonParameter) {
+      this.geometryType = geometryType;
+      this.boxParameter = boxParameter;
+      this.polygonParameter = polygonParameter;
+    }
+
+    public static PointBasedParameter create(Map<String, String> conf) {
+      String geomType = conf.getOrDefault("geometryType", 
"point").toLowerCase(Locale.ROOT);
+      GeometryType geometryType;
+      switch (geomType) {
+        case "point":
+          geometryType = GeometryType.POINT;
+          break;
+        case "polygon":
+          geometryType = GeometryType.POLYGON;
+          break;
+        case "box":
+          geometryType = GeometryType.RECTANGLE;
+          break;
+        default:
+          throw new IllegalArgumentException("Unsupported geometry type: " + 
geomType);
+      }
+      RandomBoxParameter boxParameter = RandomBoxParameter.create(conf);
+      RandomPolygonParameter polygonParameter = 
RandomPolygonParameter.create(conf);
+      return new PointBasedParameter(geometryType, boxParameter, 
polygonParameter);
+    }
+  }
+
+  public static class RandomBoxParameter {
+    public final double maxWidth;
+    public final double maxHeight;
+
+    public RandomBoxParameter(double maxWidth, double maxHeight) {
+      if (maxWidth < 0) {
+        throw new IllegalArgumentException("maxWidth must be non-negative");
+      }
+      if (maxHeight < 0) {
+        throw new IllegalArgumentException("maxHeight must be non-negative");
+      }
+      this.maxWidth = maxWidth;
+      this.maxHeight = maxHeight;
+    }
+
+    public static RandomBoxParameter create(Map<String, String> conf) {
+      double maxWidth = Double.parseDouble(conf.getOrDefault("maxWidth", 
"0.01"));
+      double maxHeight = Double.parseDouble(conf.getOrDefault("maxHeight", 
"0.01"));
+      return new RandomBoxParameter(maxWidth, maxHeight);
+    }
+  }
+
+  public static class RandomPolygonParameter {
+    public final double maxSize;
+    public final int minSegments;
+    public final int maxSegments;
+
+    public RandomPolygonParameter(double maxSize, int minSegments, int 
maxSegments) {
+      if (maxSize < 0) {
+        throw new IllegalArgumentException("maxSize must be non-negative");
+      }
+      if (minSegments < 3) {
+        throw new IllegalArgumentException("minSegments must be at least 3");
+      }
+      if (maxSegments < minSegments) {
+        throw new IllegalArgumentException("maxSegments must be at least 
minSegments");
+      }
+      this.maxSize = maxSize;
+      this.minSegments = minSegments;
+      this.maxSegments = maxSegments;
+    }
+
+    public static RandomPolygonParameter create(Map<String, String> conf) {
+      double maxSize = Double.parseDouble(conf.getOrDefault("maxSize", 
"0.01"));
+      int minSegments = Integer.parseInt(conf.getOrDefault("minSegments", 
"3"));
+      int maxSegments = Integer.parseInt(conf.getOrDefault("maxSegments", 
"3"));
+      return new RandomPolygonParameter(maxSize, minSegments, maxSegments);
+    }
+  }
+
+  private final Random random;
+  private final GeometryType geometryType;
+  private final RandomBoxParameter boxParameter;
+  private final RandomPolygonParameter polygonParameter;
+
+  public PointBasedGenerator(Random random, PointBasedParameter 
pointBasedParameter) {
+    this.random = random;
+    this.geometryType = pointBasedParameter.geometryType;
+    this.boxParameter = pointBasedParameter.boxParameter;
+    this.polygonParameter = pointBasedParameter.polygonParameter;
+  }
+
+  /** Generate a random value {0, 1} from a bernoulli distribution with 
parameter p */
+  protected int bernoulli(double p) {
+    return random.nextDouble() < p ? 1 : 0;
+  }
+
+  /** Generate a random value in the range [a, b) from a uniform distribution 
*/
+  protected double uniform(double a, double b) {
+    return (b - a) * random.nextDouble() + a;
+  }
+
+  /** Generate a random number in the range (-inf, +inf) from a normal 
distribution */
+  protected double normal(double mu, double sigma) {
+    return mu
+        + sigma
+            * Math.sqrt(-2 * Math.log(random.nextDouble()))
+            * Math.sin(2 * Math.PI * random.nextDouble());
+  }
+
+  /** Generate a random integer in the range [1, n] */
+  protected int dice(int n) {
+    return dice(1, n);
+  }
+
+  /** Generate a random integer in the given range */
+  protected int dice(int min, int max) {
+    return random.nextInt(max - min + 1) + min;
+  }
+
+  protected abstract Coordinate generateCoordinate();
+
+  @Override
+  public boolean hasNext() {
+    return true;
+  }
+
+  @Override
+  public Geometry next() {
+    switch (geometryType) {
+      case POINT:
+        return generatePoint();
+      case POLYGON:
+        return generatePolygon();
+      case RECTANGLE:
+        return generateBox();
+      default:
+        throw new UnsupportedOperationException("Unsupported geometry type: " 
+ geometryType);
+    }
+  }
+
+  protected Point generatePoint() {
+    Coordinate coordinate = generateCoordinate();
+    return GEOMETRY_FACTORY.createPoint(coordinate);
+  }
+
+  protected Polygon generateBox() {
+    Coordinate coordinate = generateCoordinate();
+    double width = uniform(0, boxParameter.maxWidth);
+    double height = uniform(0, boxParameter.maxHeight);
+    double lowerLeftX = coordinate.x - width / 2;
+    double lowerLeftY = coordinate.y - height / 2;
+    return GEOMETRY_FACTORY.createPolygon(
+        new Coordinate[] {
+          new Coordinate(lowerLeftX, lowerLeftY),
+          new Coordinate(lowerLeftX + width, lowerLeftY),
+          new Coordinate(lowerLeftX + width, lowerLeftY + height),
+          new Coordinate(lowerLeftX, lowerLeftY + height),
+          new Coordinate(lowerLeftX, lowerLeftY)
+        });
+  }
+
+  protected Polygon generatePolygon() {
+    Coordinate coordinate = generateCoordinate();
+
+    // Generate a polygon around the point as follows
+    // Picture a clock with one hand making a complete rotation of 2 PI
+    // The hand makes n stops to create n points on the polygon
+    // At each stop, we choose a point on the hand at random to create one 
point
+    // We connect all points to create the n line segments
+    // This way, we can generate an arbitrary polygon (convex or concave) that 
does not
+    // intersect itself.
+    int numSegments = dice(polygonParameter.minSegments, 
polygonParameter.maxSegments);
+
+    // The sorted angle stops of the hand
+    double[] angles = new double[numSegments];
+    for (int k = 0; k < numSegments; k++) {
+      angles[k] = uniform(0, Math.PI * 2);
+    }
+    Arrays.sort(angles);
+
+    Coordinate[] coordinates = new Coordinate[numSegments + 1];
+    for (int k = 0; k < numSegments; k++) {
+      double angle = angles[k];
+      double distance = uniform(0, polygonParameter.maxSize / 2);
+      double x = coordinate.x + distance * Math.cos(angle);
+      double y = coordinate.y + distance * Math.sin(angle);
+      coordinates[k] = new Coordinate(x, y);
+    }
+
+    // To ensure that the polygon is closed, we override the value of the last 
point to be
+    // the same as the first
+    coordinates[numSegments] = new Coordinate(coordinates[0]);
+    return GEOMETRY_FACTORY.createPolygon(coordinates);
+  }
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/SierpinskiGenerator.java 
b/common/src/main/java/org/apache/sedona/common/spider/SierpinskiGenerator.java
new file mode 100644
index 0000000000..ccdb31f8cb
--- /dev/null
+++ 
b/common/src/main/java/org/apache/sedona/common/spider/SierpinskiGenerator.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Random;
+import org.locationtech.jts.geom.Coordinate;
+
+/** Generates points or boxes that are distributed according to the Sierpinski 
distribution */
+public class SierpinskiGenerator extends PointBasedGenerator {
+  private final Coordinate point1;
+  private final Coordinate point2;
+  private final Coordinate point3;
+  private Coordinate prevPoint;
+
+  private long iRecord = 0;
+
+  public SierpinskiGenerator(Random random, PointBasedParameter 
pointBasedParameter) {
+    super(random, pointBasedParameter);
+    // Initialize the three vertices of the triangle
+    point1 = new Coordinate(0.0, 0.0);
+    point2 = new Coordinate(1.0, 0.0);
+    point3 = new Coordinate(0.5, Math.sqrt(3) / 2);
+  }
+
+  @Override
+  protected Coordinate generateCoordinate() {
+    Coordinate point;
+    if (iRecord == 0) {
+      point = point1;
+    } else if (iRecord == 1) {
+      point = point2;
+    } else if (iRecord == 2) {
+      point = point3;
+    } else {
+      // Roll a die (1-5) and choose which vertex to use
+      int roll = dice(5);
+      Coordinate targetPoint;
+      if (roll <= 2) {
+        targetPoint = point1;
+      } else if (roll <= 4) {
+        targetPoint = point2;
+      } else {
+        targetPoint = point3;
+      }
+      point = middlePoint(prevPoint, targetPoint);
+    }
+
+    iRecord++;
+    prevPoint = point;
+    return point;
+  }
+
+  private Coordinate middlePoint(Coordinate p1, Coordinate p2) {
+    double x = (p1.x + p2.x) / 2;
+    double y = (p1.y + p2.y) / 2;
+    return new Coordinate(x, y);
+  }
+}
diff --git 
a/common/src/main/java/org/apache/sedona/common/spider/UniformGenerator.java 
b/common/src/main/java/org/apache/sedona/common/spider/UniformGenerator.java
new file mode 100644
index 0000000000..93be63bccf
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/spider/UniformGenerator.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import java.util.Random;
+import org.locationtech.jts.geom.Coordinate;
+
+/** Generates geometries that are distributed uniformly */
+public class UniformGenerator extends PointBasedGenerator {
+  public UniformGenerator(Random random, PointBasedParameter 
pointBasedParameter) {
+    super(random, pointBasedParameter);
+  }
+
+  @Override
+  protected Coordinate generateCoordinate() {
+    return new Coordinate(uniform(0, 1), uniform(0, 1));
+  }
+}
diff --git 
a/common/src/test/java/org/apache/sedona/common/spider/GeneratorTest.java 
b/common/src/test/java/org/apache/sedona/common/spider/GeneratorTest.java
new file mode 100644
index 0000000000..663d92cb01
--- /dev/null
+++ b/common/src/test/java/org/apache/sedona/common/spider/GeneratorTest.java
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.spider;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import org.junit.Test;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.Point;
+import org.locationtech.jts.geom.Polygon;
+
+public class GeneratorTest {
+  private static final double DELTA = 1e-10;
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidGenerator() {
+    GeneratorFactory.create("invalid", new Random(), new HashMap<>());
+  }
+
+  @Test
+  public void testPointGeneration() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    Random random = new Random(42);
+
+    String[] generators = {"uniform", "gaussian", "bit", "diagonal", 
"sierpinski"};
+
+    for (String generatorName : generators) {
+      Generator generator = GeneratorFactory.create(generatorName, random, 
params);
+
+      // Generate and verify points
+      for (int i = 0; i < 10; i++) {
+        Geometry geom = generator.next();
+        assertTrue("Point generation failed for " + generatorName, geom 
instanceof Point);
+        Point point = (Point) geom;
+        assertTrue("Invalid point generated by " + generatorName, 
point.isValid());
+        // All generators should produce points within [0,1] range
+        assertTrue(
+            "X coordinate out of bounds for " + generatorName,
+            point.getX() >= -0.5 && point.getX() <= 1.5);
+        assertTrue(
+            "Y coordinate out of bounds for " + generatorName,
+            point.getY() >= -0.5 && point.getY() <= 1.5);
+      }
+    }
+  }
+
+  @Test
+  public void testPolygonGeneration() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "polygon");
+    params.put("maxSize", "0.1");
+    params.put("minSegments", "4");
+    params.put("maxSegments", "6");
+    Random random = new Random(42);
+
+    String[] generators = {"uniform", "gaussian", "bit", "diagonal", 
"sierpinski"};
+
+    for (String generatorName : generators) {
+      Generator generator = GeneratorFactory.create(generatorName, random, 
params);
+
+      // Generate and verify polygons
+      for (int i = 0; i < 10; i++) {
+        Geometry geom = generator.next();
+        assertTrue("Polygon generation failed for " + generatorName, geom 
instanceof Polygon);
+        Polygon polygon = (Polygon) geom;
+        assertTrue(
+            "Polygon has too few points for " + generatorName,
+            polygon.getCoordinates().length >= 5); // At least 4 points + 
closing point
+        assertTrue(
+            "Polygon has too many points for " + generatorName,
+            polygon.getCoordinates().length <= 7); // At most 6 points + 
closing point
+
+        // Verify polygon bounds
+        assertTrue(
+            "Polygon out of bounds for " + generatorName,
+            polygon.getEnvelopeInternal().getMinX() >= -0.5
+                && polygon.getEnvelopeInternal().getMaxX() <= 1.5
+                && polygon.getEnvelopeInternal().getMinY() >= -0.5
+                && polygon.getEnvelopeInternal().getMaxY() <= 1.5);
+
+        // Verify polygon area
+        double area = polygon.getArea();
+        assertTrue("Polygon area exceeds maximum for " + generatorName, area 
<= 0.01);
+      }
+    }
+  }
+
+  @Test
+  public void testBoxGeneration() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "box");
+    params.put("maxWidth", "0.1");
+    params.put("maxHeight", "0.1");
+    Random random = new Random(42);
+
+    String[] generators = {"uniform", "gaussian", "bit", "diagonal", 
"sierpinski"};
+
+    for (String generatorName : generators) {
+      Generator generator = GeneratorFactory.create(generatorName, random, 
params);
+
+      // Generate and verify boxes
+      for (int i = 0; i < 10; i++) {
+        Geometry geom = generator.next();
+        assertTrue("Box generation failed for " + generatorName, geom 
instanceof Polygon);
+        Polygon box = (Polygon) geom;
+        assertTrue("Invalid box generated by " + generatorName, box.isValid());
+        assertTrue("Non-rectangular box generated by " + generatorName, 
box.isRectangle());
+        assertEquals(
+            "Incorrect number of points in box for " + generatorName,
+            5,
+            box.getCoordinates().length); // 4 corners + closing point
+
+        // Verify box bounds
+        assertTrue(
+            "Box out of bounds for " + generatorName,
+            box.getEnvelopeInternal().getMinX() >= -0.5
+                && box.getEnvelopeInternal().getMaxX() <= 1.5
+                && box.getEnvelopeInternal().getMinY() >= -0.5
+                && box.getEnvelopeInternal().getMaxY() <= 1.5);
+
+        // Verify box dimensions
+        double width = box.getEnvelopeInternal().getWidth();
+        double height = box.getEnvelopeInternal().getHeight();
+        assertTrue("Box width exceeds maximum for " + generatorName, width <= 
0.1);
+        assertTrue("Box height exceeds maximum for " + generatorName, height 
<= 0.1);
+      }
+    }
+  }
+
+  @Test
+  public void testUniformGenerator() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    Random random = new Random(42); // Fixed seed for reproducibility
+
+    Generator generator = GeneratorFactory.create("uniform", random, params);
+    assertTrue(generator.hasNext());
+
+    // Generate and verify points are within [0,1] range
+    for (int i = 0; i < 100; i++) {
+      Point point = (Point) generator.next();
+      assertTrue(point.getX() >= 0 && point.getX() <= 1);
+      assertTrue(point.getY() >= 0 && point.getY() <= 1);
+    }
+  }
+
+  @Test
+  public void testGaussianGenerator() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("gaussian", random, params);
+
+    // Generate points and verify they cluster around 0.5,0.5
+    double sumX = 0, sumY = 0;
+    int count = 1000;
+
+    for (int i = 0; i < count; i++) {
+      Point point = (Point) generator.next();
+      sumX += point.getX();
+      sumY += point.getY();
+    }
+
+    // Verify mean is close to 0.5
+    assertEquals(0.5, sumX / count, 0.1);
+    assertEquals(0.5, sumY / count, 0.1);
+  }
+
+  @Test
+  public void testSierpinskiGeneratorBasic() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("sierpinski", random, 
params);
+
+    // First three points should be the triangle vertices
+    Point p1 = (Point) generator.next();
+    Point p2 = (Point) generator.next();
+    Point p3 = (Point) generator.next();
+
+    // Verify first point is at (0,0)
+    assertEquals(0.0, p1.getX(), DELTA);
+    assertEquals(0.0, p1.getY(), DELTA);
+
+    // Verify second point is at (1,0)
+    assertEquals(1.0, p2.getX(), DELTA);
+    assertEquals(0.0, p2.getY(), DELTA);
+
+    // Verify third point is at (0.5, sqrt(3)/2)
+    assertEquals(0.5, p3.getX(), DELTA);
+    assertEquals(Math.sqrt(3) / 2, p3.getY(), DELTA);
+  }
+
+  @Test
+  public void testSierpinskiGenerator() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("sierpinski", random, 
params);
+
+    // Generate 1000 points and verify they're within the triangle bounds
+    for (int i = 0; i < 1000; i++) {
+      Point point = (Point) generator.next();
+
+      // All points should be within the bounds of the initial triangle
+      assertTrue(point.getX() >= 0 && point.getX() <= 1);
+      assertTrue(point.getY() >= 0 && point.getY() <= Math.sqrt(3) / 2);
+    }
+  }
+
+  @Test
+  public void testBitGenerator() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    params.put("probability", "0.5");
+    params.put("digits", "5");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("bit", random, params);
+
+    // Generate points and verify they're within [0,1] range
+    for (int i = 0; i < 100; i++) {
+      Point point = (Point) generator.next();
+      assertTrue(point.getX() >= 0 && point.getX() <= 1);
+      assertTrue(point.getY() >= 0 && point.getY() <= 1);
+    }
+  }
+
+  @Test
+  public void testDiagonalGenerator() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    params.put("percentage", "1.0"); // Force all points to be on diagonal
+    params.put("buffer", "0.1");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("diagonal", random, params);
+
+    // With percentage=1.0, all points should be exactly on the diagonal
+    for (int i = 0; i < 100; i++) {
+      Point point = (Point) generator.next();
+      assertEquals(point.getX(), point.getY(), DELTA);
+      assertTrue(point.getX() >= 0 && point.getX() <= 1);
+    }
+  }
+
+  @Test
+  public void testDiagonalGeneratorWithBuffer() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "point");
+    params.put("percentage", "0.5"); // Only half of the points will be on the 
diagonal
+    params.put("buffer", "0.1");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("diagonal", random, params);
+
+    // With percentage=0.5, half of the points should be exactly on the 
diagonal
+    int onDiagonalCount = 0;
+    for (int i = 0; i < 100; i++) {
+      Point point = (Point) generator.next();
+      if (Math.abs(point.getX() - point.getY()) < DELTA) {
+        onDiagonalCount++;
+      }
+      assertTrue(point.getX() >= 0 && point.getX() <= 1);
+    }
+    // Expect around half of the points to be on the diagonal
+    assertEquals(50, onDiagonalCount, 10);
+  }
+
+  @Test
+  public void testParcelGeneratorBasic() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "box");
+    params.put("dither", "0.0");
+    params.put("splitRange", "0.5");
+    Random random = new Random(42);
+
+    int cardinalities[] = {0, 1, 3, 10, 100};
+
+    for (int cardinality : cardinalities) {
+      params.put("cardinality", Integer.toString(cardinality));
+      Generator generator = GeneratorFactory.create("parcel", random, params);
+
+      // Generate and verify all boxes
+      List<Geometry> boxes = new ArrayList<>();
+      while (generator.hasNext()) {
+        boxes.add(generator.next());
+      }
+
+      // Verify cardinality
+      assertEquals(cardinality, boxes.size());
+
+      // Verify all boxes are valid and within bounds
+      for (Geometry box : boxes) {
+        assertTrue(box.isValid());
+        assertTrue(box.isRectangle());
+        assertTrue(box.getEnvelopeInternal().getMinX() >= 0);
+        assertTrue(box.getEnvelopeInternal().getMaxX() <= 1);
+        assertTrue(box.getEnvelopeInternal().getMinY() >= 0);
+        assertTrue(box.getEnvelopeInternal().getMaxY() <= 1);
+      }
+
+      // Verify that the boxes are not overlapping with each other
+      for (int i = 0; i < boxes.size(); i++) {
+        for (int j = i + 1; j < boxes.size(); j++) {
+          assertFalse(boxes.get(i).overlaps(boxes.get(j)));
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testParcelGeneratorDither() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "box");
+    params.put("cardinality", "10");
+    params.put("dither", "0.5"); // High dither value
+    params.put("splitRange", "0.5");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("parcel", random, params);
+
+    // Generate boxes and calculate total area
+    double totalArea = 0;
+    while (generator.hasNext()) {
+      Geometry box = generator.next();
+      totalArea += box.getArea();
+    }
+
+    // With high dither, total area should be less than 1.0 (unit square)
+    assertTrue("Total area with dither should be less than 1.0", totalArea < 
1.0);
+  }
+
+  @Test
+  public void testParcelGeneratorSplitRange() {
+    // Test with minimum split range (more varied box sizes)
+    Map<String, String> params1 = new HashMap<>();
+    params1.put("geometryType", "box");
+    params1.put("cardinality", "20");
+    params1.put("dither", "0.0");
+    params1.put("splitRange", "0.0");
+    Random random1 = new Random(42);
+
+    Generator generator1 = GeneratorFactory.create("parcel", random1, params1);
+    List<Double> areas1 = new ArrayList<>();
+    while (generator1.hasNext()) {
+      areas1.add(generator1.next().getArea());
+    }
+
+    // Test with maximum split range (more uniform box sizes)
+    Map<String, String> params2 = new HashMap<>();
+    params2.put("geometryType", "box");
+    params2.put("cardinality", "20");
+    params2.put("dither", "0.0");
+    params2.put("splitRange", "0.5");
+    Random random2 = new Random(42);
+
+    Generator generator2 = GeneratorFactory.create("parcel", random2, params2);
+    List<Double> areas2 = new ArrayList<>();
+    while (generator2.hasNext()) {
+      areas2.add(generator2.next().getArea());
+    }
+
+    // Calculate variance of areas
+    double variance1 = calculateVariance(areas1);
+    double variance2 = calculateVariance(areas2);
+
+    // Variance should be higher with splitRange=0.0
+    assertTrue("Variance with splitRange=0.0 should be higher", variance1 > 
variance2);
+  }
+
+  @Test
+  public void testParcelGeneratorLargeCardinality() {
+    Map<String, String> params = new HashMap<>();
+    params.put("geometryType", "box");
+    params.put("cardinality", "1000");
+    params.put("dither", "0.0");
+    params.put("splitRange", "0.5");
+    Random random = new Random(42);
+
+    Generator generator = GeneratorFactory.create("parcel", random, params);
+
+    int count = 0;
+    double totalArea = 0;
+    while (generator.hasNext()) {
+      Geometry box = generator.next();
+      count++;
+      totalArea += box.getArea();
+    }
+
+    assertEquals(1000, count);
+    assertEquals(1.0, totalArea, 1e-10); // Total area should be 1.0 with no 
dither
+  }
+
+  // Helper method to calculate variance
+  private double calculateVariance(List<Double> values) {
+    double mean = 
values.stream().mapToDouble(Double::doubleValue).average().orElse(0.0);
+    return values.stream().mapToDouble(v -> Math.pow(v - mean, 
2)).average().orElse(0.0);
+  }
+}
diff --git a/docs/api/sql/Spider.md b/docs/api/sql/Spider.md
new file mode 100644
index 0000000000..0b313ff6fb
--- /dev/null
+++ b/docs/api/sql/Spider.md
@@ -0,0 +1,271 @@
+Sedona offers a spatial data generator called Spider. It is a data source that 
generates random spatial data based on the user-specified parameters.
+
+## Quick Start
+
+Once you have your [`SedonaContext` object created](../Overview#quick-start), 
you can create a DataFrame with the `spider` data source.
+
+```python
+df_random_points = sedona.read.format("spider").load(n=1000, 
distribution='uniform')
+df_random_boxes = sedona.read.format("spider").load(n=1000, 
distribution='gaussian', geometryType='box', maxWidth=0.05, maxHeight=0.05)
+df_random_polygons = sedona.read.format("spider").load(n=1000, 
distribution='bit', geometryType='polygon', minSegment=3, maxSegment=5, 
maxSize=0.1)
+```
+
+Now we have three DataFrames with random spatial data. We can show the first 
three rows of the `df_random_points` DataFrame to verify the data is generated 
correctly.
+
+```python
+df_random_points.show(3, False)
+```
+
+Output:
+
+```
++---+---------------------------------------------+
+|id |geometry                                     |
++---+---------------------------------------------+
+|1  |POINT (0.8781393502074886 0.5925787985028703)|
+|2  |POINT (0.3159498147172185 0.1907316577342276)|
+|3  |POINT (0.2618294441170143 0.3623164670133922)|
++---+---------------------------------------------+
+only showing top 3 rows
+```
+
+The generated DataFrame has two columns: `id` and `geometry`. The `id` column 
is the unique identifier of each record, and the `geometry` column is the 
randomly generated spatial data.
+
+We can plot all 3 DataFrames using the following code.
+
+```python
+import matplotlib.pyplot as plt
+import geopandas as gpd
+
+# Convert DataFrames to GeoDataFrames
+gdf_random_points = gpd.GeoDataFrame(df_random_points.toPandas(), 
geometry='geometry')
+gdf_random_boxes = gpd.GeoDataFrame(df_random_boxes.toPandas(), 
geometry='geometry')
+gdf_random_polygons = gpd.GeoDataFrame(df_random_polygons.toPandas(), 
geometry='geometry')
+
+# Create a figure and a set of subplots
+fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+
+# Plot each GeoDataFrame on a different subplot
+gdf_random_points.plot(ax=axes[0], color='blue', markersize=5)
+axes[0].set_title('Random Points')
+
+gdf_random_boxes.boundary.plot(ax=axes[1], color='red')
+axes[1].set_title('Random Boxes')
+
+gdf_random_polygons.boundary.plot(ax=axes[2], color='green')
+axes[2].set_title('Random Polygons')
+
+# Adjust the layout
+plt.tight_layout()
+
+# Show the plot
+plt.show()
+```
+
+Output:
+
+![Random Spatial Data](../../image/spider/spider-quickstart.png)
+
+You can browse the [SpiderWeb](https://spider.cs.ucr.edu/) website to play 
with the parameters and see how they affect the generated data. Once you are 
satisfied with the parameters, you can use them in your Spider DataFrame 
creation code. The following sections will explain the parameters in detail.
+
+## Common Parameters
+
+The following parameters are common to all distributions.
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| n         | Number of records to generate | 100 |
+| distribution | Distribution type. See [Distributions](#distributions) for 
details. | `uniform` |
+| numPartitions | Number of partitions to generate | The default parallelism 
of your Spark Context |
+| seed | Random seed | Current timestamp in milliseconds |
+
+!!! warning
+    The same `seed` parameter may produce different results with different 
Java versions or Sedona versions.
+
+## Distributions
+
+Spider supports generating random points, boxes and polygons under various 
distributions. You can explore the capabilities of Spider by visiting the 
[SpiderWeb](https://spider.cs.ucr.edu/) website. You can specify the 
distribution type using the `distribution` parameter. The parameters for each 
distribution are listed below.
+
+### Uniform Distribution
+
+The uniform distribution generates random geometries in the unit square `[0, 
1] x [0, 1]`. This distribution can be selected by setting the `distribution` 
parameter to `uniform`.
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| geometryType | Geometry type, either `point`, `box` or `polygon` | `point` |
+| maxWidth | Maximum width of the generated boxes | 0.01 |
+| maxHeight | Maximum height of the generated boxes | 0.01 |
+| minSegment | Minimum number of segments of the generated polygons | 3 |
+| maxSegment | Maximum number of segments of the generated polygons | 3 |
+| maxSize | Maximum size of the generated polygons | 0.01 |
+
+Example:
+
+```python
+import geopandas as gpd
+df = sedona.read.format("spider").load(n=300, distribution='uniform', 
geometryType='box', maxWidth=0.05, maxHeight=0.05)
+gpd.GeoDataFrame(df.toPandas(), geometry='geometry').boundary.plot()
+```
+
+![Uniform Distribution](../../image/spider/spider-uniform.png)
+
+### Gaussian Distribution
+
+The Gaussian distribution generates random geometries in a Gaussian 
distribution with mean `[0.5, 0.5]` and standard deviation `[0.1, 0.1]`. This 
distribution can be selected by setting the `distribution` parameter to 
`gaussian`.
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| geometryType | Geometry type, either `point`, `box` or `polygon` | `point` |
+| maxWidth | Maximum width of the generated boxes | 0.01 |
+| maxHeight | Maximum height of the generated boxes | 0.01 |
+| minSegment | Minimum number of segments of the generated polygons | 3 |
+| maxSegment | Maximum number of segments of the generated polygons | 3 |
+| maxSize | Maximum size of the generated polygons | 0.01 |
+
+Example:
+
+```python
+import geopandas as gpd
+df = sedona.read.format("spider").load(n=300, distribution='gaussian', 
geometryType='polygon', maxSize=0.05)
+gpd.GeoDataFrame(df.toPandas(), geometry='geometry').boundary.plot()
+```
+
+![Gaussian Distribution](../../image/spider/spider-gaussian.png)
+
+### Bit Distribution
+
+The bit distribution generates random geometries in a bit distribution. This 
distribution can be selected by setting the `distribution` parameter to `bit`.
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| geometryType | Geometry type, either `point`, `box` or `polygon` | `point` |
+| probability | Probability of setting a bit | 0.2 |
+| digits | Number of digits in the generated data | 10 |
+| maxWidth | Maximum width of the generated boxes | 0.01 |
+| maxHeight | Maximum height of the generated boxes | 0.01 |
+| minSegment | Minimum number of segments of the generated polygons | 3 |
+| maxSegment | Maximum number of segments of the generated polygons | 3 |
+| maxSize | Maximum size of the generated polygons | 0.01 |
+
+Example:
+
+```python
+import geopandas as gpd
+df = sedona.read.format("spider").load(n=300, distribution='bit', 
geometryType='point', probability=0.2, digits=10)
+gpd.GeoDataFrame(df.toPandas(), geometry='geometry').plot(markersize=1)
+```
+
+![Bit Distribution](../../image/spider/spider-bit.png)
+
+### Diagonal Distribution
+
+The diagonal distribution generates random geometries on the diagonal line `y 
= x` with some dispersion for geometries that are not exactly on the diagonal. 
This distribution can be selected by setting the `distribution` parameter to 
`diagonal`.
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| geometryType | Geometry type, either `point`, `box` or `polygon` | `point` |
+| percentage | The percentage of records that are perfectly on the diagonal | 
0.5 |
+| buffer | For points not exactly on the diagonal, the buffer in which they 
are dispersed | 0.5 |
+| maxWidth | Maximum width of the generated boxes | 0.01 |
+| maxHeight | Maximum height of the generated boxes | 0.01 |
+| minSegment | Minimum number of segments of the generated polygons | 3 |
+| maxSegment | Maximum number of segments of the generated polygons | 3 |
+| maxSize | Maximum size of the generated polygons | 0.01 |
+
+Example:
+
+```python
+import geopandas as gpd
+df = sedona.read.format("spider").load(n=300, distribution='diagonal', 
geometryType='point', percentage=0.5, buffer=0.5)
+gpd.GeoDataFrame(df.toPandas(), geometry='geometry').plot(markersize=1)
+```
+
+![Diagonal Distribution](../../image/spider/spider-diagonal.png)
+
+### Sierpinski Distribution
+
+The Sierpinski distribution generates random geometries distributed on a 
Sierpinski triangle. This distribution can be selected by setting the 
`distribution` parameter to `sierpinski`.
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| geometryType | Geometry type, either `point`, `box` or `polygon` | `point` |
+| maxWidth | Maximum width of the generated boxes | 0.01 |
+| maxHeight | Maximum height of the generated boxes | 0.01 |
+| minSegment | Minimum number of segments of the generated polygons | 3 |
+| maxSegment | Maximum number of segments of the generated polygons | 3 |
+| maxSize | Maximum size of the generated polygons | 0.01 |
+
+Example:
+
+```python
+import geopandas as gpd
+df = sedona.read.format("spider").load(n=2000, distribution='sierpinski', 
geometryType='point')
+gpd.GeoDataFrame(df.toPandas(), geometry='geometry').plot(markersize=1)
+```
+
+![Sierpinski Distribution](../../image/spider/spider-sierpinski.png)
+
+### Parcel Distribution
+
+This generator produces boxes that resemble parcel areas. It works by 
recursively splitting the input domain (unit square) along the longest 
dimension and then randomly dithering each generated box to add some 
randomness. This generator can only generate boxes. This distribution can be 
selected by setting the `distribution` parameter to `parcel`.
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| dither | The amount of dithering as a ratio of the side length. Allowed 
range [0, 1] | 0.5 |
+| splitRange | The allowed range for splitting boxes. Allowed range [0.0, 0.5] 
0.0 means all values are allowed. 0.5 means always split in half. | 0.5 |
+
+Example:
+
+```python
+import geopandas as gpd
+df = sedona.read.format("spider").load(n=300, distribution='parcel', 
dither=0.5, splitRange=0.5)
+gpd.GeoDataFrame(df.toPandas(), geometry='geometry').boundary.plot()
+```
+
+![Parcel Distribution](../../image/spider/spider-parcel.png)
+
+!!!note
+    The number of partitions generated by the `parcel` distribution is always 
power of 4. This is for guaranteeing the quality of the generated data. If the 
specified `numPartitions` is not a power of 4, it will be automatically 
adjusted to the nearest power of 4 smaller or equal to the specified value.
+
+## Affine Transformation
+
+The random spatial data generated by Spider are mostly in the unit square `[0, 
1] x [0, 1]`. If you need to generate random spatial data in a different 
region, you can specify affine transformation parameters to scale and translate 
the data to the target region.
+
+The following code demonstrates how to generate random spatial data in a 
different region using affine transformation.
+
+The affine transformation parameters are:
+
+| Parameter | Description | Default Value |
+| --------- | ----------- | ------------- |
+| translateX | Translate the data horizontally | 0 |
+| translateY | Translate the data vertically | 0 |
+| scaleX | Scale the data horizontally | 1 |
+| scaleY | Scale the data vertically | 1 |
+| skewX | Skew the data horizontally | 0 |
+| skewY | Skew the data vertically | 0 |
+
+The affine transformation is applied to the generated data as follows:
+
+```
+x' = translateX + scaleX * x + skewX * y
+y' = translateY + skewY * x + scaleY * y
+```
+
+Example:
+
+```python
+import geopandas as gpd
+df_random_points = sedona.read.format("spider").load(n=1000, 
distribution='uniform', translateX=0.5, translateY=0.5, scaleX=2, scaleY=2)
+gpd.GeoDataFrame(df_random_points.toPandas(), 
geometry='geometry').plot(markersize=1)
+```
+
+The data is now in the region `[0.5, 2.5] x [0.5, 2.5]`.
+
+![Affine Transformation](../../image/spider/spider-uniform-affine.png)
+
+## References
+
+- Puloma Katiyar, Tin Vu, Sara Migliorini, Alberto Belussi, Ahmed Eldawy. 
"SpiderWeb: A Spatial Data Generator on the Web", ACM SIGSPATIAL 2020, Seattle, 
WA
+- Beast Spatial Data Generator: 
https://bitbucket.org/bdlabucr/beast/src/master/doc/spatial-data-generator.md
+- SpiderWeb: A Spatial Data Generator on the Web: https://spider.cs.ucr.edu/
+- SpiderWeb YouTube Video: https://www.youtube.com/watch?v=h0xCG6Swdqw
diff --git a/docs/image/spider/spider-bit.png b/docs/image/spider/spider-bit.png
new file mode 100644
index 0000000000..e0ea299010
Binary files /dev/null and b/docs/image/spider/spider-bit.png differ
diff --git a/docs/image/spider/spider-diagonal.png 
b/docs/image/spider/spider-diagonal.png
new file mode 100644
index 0000000000..719f10fc93
Binary files /dev/null and b/docs/image/spider/spider-diagonal.png differ
diff --git a/docs/image/spider/spider-gaussian.png 
b/docs/image/spider/spider-gaussian.png
new file mode 100644
index 0000000000..7ff0a09bf1
Binary files /dev/null and b/docs/image/spider/spider-gaussian.png differ
diff --git a/docs/image/spider/spider-parcel.png 
b/docs/image/spider/spider-parcel.png
new file mode 100644
index 0000000000..a1e9da87d7
Binary files /dev/null and b/docs/image/spider/spider-parcel.png differ
diff --git a/docs/image/spider/spider-quickstart.png 
b/docs/image/spider/spider-quickstart.png
new file mode 100644
index 0000000000..3d6f0ca274
Binary files /dev/null and b/docs/image/spider/spider-quickstart.png differ
diff --git a/docs/image/spider/spider-sierpinski.png 
b/docs/image/spider/spider-sierpinski.png
new file mode 100644
index 0000000000..75606e76a4
Binary files /dev/null and b/docs/image/spider/spider-sierpinski.png differ
diff --git a/docs/image/spider/spider-uniform-affine.png 
b/docs/image/spider/spider-uniform-affine.png
new file mode 100644
index 0000000000..5f0a5fcb47
Binary files /dev/null and b/docs/image/spider/spider-uniform-affine.png differ
diff --git a/docs/image/spider/spider-uniform.png 
b/docs/image/spider/spider-uniform.png
new file mode 100644
index 0000000000..491ecd40fd
Binary files /dev/null and b/docs/image/spider/spider-uniform.png differ
diff --git a/mkdocs.yml b/mkdocs.yml
index 31928560b2..d19c5ba6e4 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -86,6 +86,7 @@ nav:
                   - DataFrame Style functions: api/sql/DataFrameAPI.md
                   - Query optimization: api/sql/Optimizer.md
                   - Nearest-Neighbour searching: 
api/sql/NearestNeighbourSearching.md
+                  - "Spider:Spatial Data Generator": api/sql/Spider.md
                   - Reading Legacy Parquet Files: 
api/sql/Reading-legacy-parquet.md
                   - Visualization:
                       - SedonaPyDeck: api/sql/Visualization_SedonaPyDeck.md
diff --git a/python/tests/sql/test_spider.py b/python/tests/sql/test_spider.py
new file mode 100644
index 0000000000..21018ac0c2
--- /dev/null
+++ b/python/tests/sql/test_spider.py
@@ -0,0 +1,73 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+import pytest
+from tests.test_base import TestBase
+
+
+class TestSpider(TestBase):
+    def test_spider_uniform(self):
+        df = self.spark.read.format("spider").load(
+            n=1000,
+            distribution="uniform",
+            geometryType="box",
+            maxWidth=0.03,
+            maxHeight=0.02,
+            translateX=0.5,
+            translateY=0.5,
+            scaleX=2,
+            scaleY=2,
+        )
+        assert df.count() == 1000
+
+        # Convert geometries to coordinates for bound checking
+        bounds_df = df.selectExpr(
+            "ST_XMin(geometry) as min_x",
+            "ST_XMax(geometry) as max_x",
+            "ST_YMin(geometry) as min_y",
+            "ST_YMax(geometry) as max_y",
+        ).collect()[0]
+
+        # Check geometry bounds (should be within [0.5, 2.5] * [0.5, 2.5] with 
some tolerance)
+        assert bounds_df.min_x >= 0.4
+        assert bounds_df.max_x <= 2.6
+        assert bounds_df.min_y >= 0.4
+        assert bounds_df.max_y <= 2.6
+
+        # Check box dimensions
+        dimensions_df = df.selectExpr(
+            "ST_XMax(geometry) - ST_XMin(geometry) as width",
+            "ST_YMax(geometry) - ST_YMin(geometry) as height",
+        ).collect()
+
+        for row in dimensions_df:
+            assert row.width <= 0.06, f"Box width {row.width} exceeds maximum 
0.06"
+            assert row.height <= 0.04, f"Box height {row.height} exceeds 
maximum 0.04"
+
+        # Check mean width and height
+        mean_width = (
+            df.selectExpr("AVG(ST_XMax(geometry) - ST_XMin(geometry)) as 
mean_width")
+            .collect()[0]
+            .mean_width
+        )
+        mean_height = (
+            df.selectExpr("AVG(ST_YMax(geometry) - ST_YMin(geometry)) as 
mean_height")
+            .collect()[0]
+            .mean_height
+        )
+        assert abs(mean_width - 0.03) < 0.005
+        assert abs(mean_height - 0.02) < 0.005
diff --git 
a/spark/common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
 
b/spark/common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index a71664cc3c..b664d1db50 100644
--- 
a/spark/common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ 
b/spark/common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,2 +1,3 @@
 org.apache.spark.sql.sedona_sql.io.raster.RasterFileFormat
 org.apache.spark.sql.sedona_sql.io.geojson.GeoJSONFileFormat
+org.apache.sedona.sql.datasources.spider.SpiderDataSource
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/AffineTransform.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/AffineTransform.scala
new file mode 100644
index 0000000000..483ccbff36
--- /dev/null
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/AffineTransform.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql.datasources.spider
+
+import org.locationtech.jts.geom.util.AffineTransformation
+import org.locationtech.jts.geom.Envelope
+
+/**
+ * Represents an affine transformation matrix.
+ *
+ * The transformation matrix is represented as:
+ * | scaleX  skewX   translateX |
+ * |:---------------------------|
+ * | skewY   scaleY  translateY |
+ * | 0       0       1          |
+ */
+case class AffineTransform(
+    translateX: Double,
+    translateY: Double,
+    scaleX: Double,
+    scaleY: Double,
+    skewX: Double,
+    skewY: Double) {
+  def toJTS: Option[AffineTransformation] = {
+    if (isIdentity) {
+      None
+    } else {
+      Some(
+        new AffineTransformation(
+          scaleX, // m00
+          skewX, // m01
+          translateX, // m02
+          skewY, // m10
+          scaleY, // m11
+          translateY // m12
+        ))
+    }
+  }
+
+  private def isIdentity: Boolean = {
+    translateX == 0 && translateY == 0 && scaleX == 1 && scaleY == 1 && skewX 
== 0 && skewY == 0
+  }
+
+  def transform(box: Envelope): AffineTransform = {
+    // Cascade an affine transform A that transforms the unit box [0, 1] x [0, 
1] into the desired box
+    // after self, the final transform is self * A
+
+    // Calculate the scale factors
+    val scaleX = box.getWidth
+    val scaleY = box.getHeight
+
+    // Calculate the translation factors
+    val translateX = box.getMinX
+    val translateY = box.getMinY
+
+    // Multiply the transformation matrices in the correct order (self * A)
+    val newScaleX = this.scaleX * scaleX
+    val newScaleY = this.scaleY * scaleY
+    val newSkewX = this.skewX * scaleY
+    val newSkewY = this.skewY * scaleX
+    val newTranslateX = this.scaleX * translateX + this.skewX * translateY + 
this.translateX
+    val newTranslateY = this.skewY * translateX + this.scaleY * translateY + 
this.translateY
+
+    // Now we get the new transformation
+    AffineTransform(newTranslateX, newTranslateY, newScaleX, newScaleY, 
newSkewX, newSkewY)
+  }
+}
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderDataSource.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderDataSource.scala
new file mode 100644
index 0000000000..4cbf9efa10
--- /dev/null
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderDataSource.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql.datasources.spider
+
+import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.sources.DataSourceRegister
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.SparkSession
+
+import java.util
+
+/**
+ * DataSource for generating random geometric data. The idea and algorithms of 
this data source
+ * comes from this following publication: Puloma Katiyar, Tin Vu, Sara 
Migliorini, Alberto
+ * Belussi, Ahmed Eldawy. "SpiderWeb: A Spatial Data Generator on the Web", 
ACM SIGSPATIAL 2020,
+ * Seattle, WA
+ */
+class SpiderDataSource extends TableProvider with DataSourceRegister {
+
+  override def shortName(): String = "spider"
+
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
+    SpiderTable.SCHEMA
+  }
+
+  override def getTable(
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    val opts = new CaseInsensitiveStringMap(properties)
+    var numPartitions = opts.getInt("numPartitions", -1)
+    if (numPartitions < 0) {
+      numPartitions = SparkSession.active.sparkContext.defaultParallelism
+    }
+    new SpiderTable(
+      numRows = opts.getOrDefault("cardinality", opts.getOrDefault("n", 
"100")).toLong,
+      numPartitions = numPartitions,
+      seed = opts.getLong("seed", System.currentTimeMillis()),
+      distribution = opts.getOrDefault("distribution", "uniform"),
+      transform = AffineTransform(
+        translateX = opts.getDouble("translateX", 0),
+        translateY = opts.getDouble("translateY", 0),
+        scaleX = opts.getDouble("scaleX", 1),
+        scaleY = opts.getDouble("scaleY", 1),
+        skewX = opts.getDouble("skewX", 0),
+        skewY = opts.getDouble("skewY", 0)),
+      opts)
+  }
+}
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderPartition.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderPartition.scala
new file mode 100644
index 0000000000..3ba9ddbeb8
--- /dev/null
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderPartition.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql.datasources.spider
+
+import org.apache.spark.sql.connector.read.InputPartition
+
+case class SpiderPartition(
+    index: Int,
+    startIndex: Long,
+    numRows: Long,
+    seed: Long,
+    distribution: String,
+    transform: AffineTransform,
+    opts: java.util.Map[String, String])
+    extends InputPartition
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderPartitionReader.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderPartitionReader.scala
new file mode 100644
index 0000000000..e03a237cc0
--- /dev/null
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderPartitionReader.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql.datasources.spider
+
+import org.apache.sedona.common.spider.Generator
+import org.apache.sedona.common.spider.GeneratorFactory
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.PartitionReader
+import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
+import org.locationtech.jts.geom.Geometry
+import org.locationtech.jts.geom.util.AffineTransformation
+
+class SpiderPartitionReader(partition: SpiderPartition) extends 
PartitionReader[InternalRow] {
+  private val random: java.util.Random = new java.util.Random(partition.seed)
+  private val affine: Option[AffineTransformation] = partition.transform.toJTS
+  private val generator: Generator =
+    GeneratorFactory.create(partition.distribution, random, partition.opts)
+  private var count: Long = 0
+  private var currentGeometry: Geometry = _
+
+  override def next(): Boolean = {
+    if (count < partition.numRows) {
+      val geom = generator.next()
+      currentGeometry = affine.map(_.transform(geom)).getOrElse(geom)
+      count += 1
+      true
+    } else {
+      false
+    }
+  }
+
+  override def get(): InternalRow = {
+    InternalRow(partition.startIndex + count, 
GeometryUDT.serialize(currentGeometry))
+  }
+
+  override def close(): Unit = {}
+}
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderScanBuilder.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderScanBuilder.scala
new file mode 100644
index 0000000000..e09c22300a
--- /dev/null
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderScanBuilder.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql.datasources.spider
+
+import org.apache.sedona.common.spider.GeneratorFactory
+import org.apache.sedona.common.spider.ParcelGenerator
+import org.apache.spark.sql.connector.read.Batch
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
+import org.apache.spark.sql.connector.read.Scan
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+import java.util
+import java.util.Random
+
+private class SpiderScanBuilder(
+    distribution: String,
+    numPartitions: Int,
+    numRows: Long,
+    opts: CaseInsensitiveStringMap,
+    seed: Long,
+    transform: AffineTransform)
+    extends ScanBuilder {
+  override def build(): Scan = new Scan {
+    override def readSchema(): StructType = SpiderTable.SCHEMA
+
+    override def toBatch: Batch = new Batch {
+      override def planInputPartitions(): Array[InputPartition] = {
+        if (distribution != "parcel") {
+          val finalNumPartitions = Math.min(numPartitions, numRows).toInt
+          val partitionInfo = computePartitionRanges(finalNumPartitions, 
numRows)
+          val optsMap = opts.asCaseSensitiveMap()
+          partitionInfo.zipWithIndex.map { case ((startIndex, partitionRows), 
iPartition) =>
+            SpiderPartition(
+              iPartition,
+              startIndex,
+              partitionRows,
+              seed + iPartition,
+              distribution,
+              transform,
+              optsMap)
+          }.toArray
+        } else {
+          // Parcel distribution requires special partition generation to 
ensure that the records are
+          // non overlapping.
+          val maxPartitionBoxes = Math.min(numPartitions, numRows)
+          if (maxPartitionBoxes == 0) {
+            return Array.empty[InputPartition]
+          }
+
+          // The actual number of partitions is a power of 4 that is less than 
or equal to the number of
+          // requested partitions. This is for distributing the records evenly 
across the partitions.
+          val finalNumPartitions =
+            Math.pow(4, Math.floor(Math.log(maxPartitionBoxes) / 
Math.log(4))).toInt
+
+          // Generate the partitions using the parcel generator but set 
dithering to zero since dithering
+          // should only be applied on the final records and not the partitions
+          val conf = new util.HashMap[String, String]()
+          conf.putAll(opts.asCaseSensitiveMap())
+          conf.put("dither", "0")
+          conf.put("cardinality", finalNumPartitions.toString)
+          val random = new Random(seed - 1)
+          val parcelGenerator =
+            GeneratorFactory.create("parcel", random, 
conf).asInstanceOf[ParcelGenerator]
+
+          // Generate the partitions
+          val partitionInfo = computePartitionRanges(finalNumPartitions, 
numRows)
+          partitionInfo.zipWithIndex.map { case ((startIndex, partitionRows), 
iPartition) =>
+            val partitionBox = parcelGenerator.generateBox()
+            val partitionTransform = transform.transform(partitionBox)
+            val partitionOpts = new util.HashMap[String, String]()
+            partitionOpts.putAll(opts.asCaseSensitiveMap())
+            partitionOpts.put("cardinality", partitionRows.toString)
+            SpiderPartition(
+              iPartition,
+              startIndex,
+              partitionRows,
+              seed + iPartition,
+              distribution,
+              partitionTransform,
+              partitionOpts)
+          }.toArray
+        }
+      }
+
+      /**
+       * Computes the start index and number of rows for each partition
+       *
+       * @param numPartitions
+       *   The number of partitions to create
+       * @param totalRows
+       *   The total number of rows to distribute
+       * @return
+       *   Sequence of (startIndex, numRows) for each partition
+       */
+      private def computePartitionRanges(
+          numPartitions: Int,
+          totalRows: Long): Seq[(Long, Long)] = {
+        var recordsRemaining = totalRows
+        (0 until numPartitions).map { iPartition =>
+          val startIndex = totalRows - recordsRemaining
+          val partitionRows = recordsRemaining / (numPartitions - iPartition)
+          recordsRemaining -= partitionRows
+          (startIndex, partitionRows)
+        }
+      }
+
+      override def createReaderFactory(): PartitionReaderFactory = {
+        (partition: InputPartition) =>
+          {
+            new SpiderPartitionReader(partition.asInstanceOf[SpiderPartition])
+          }
+      }
+    }
+  }
+}
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderTable.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderTable.scala
new file mode 100644
index 0000000000..81cb5bffec
--- /dev/null
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/spider/SpiderTable.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql.datasources.spider
+
+import org.apache.spark.sql.connector.catalog.SupportsRead
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.catalog.TableCapability
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class SpiderTable(
+    numRows: Long,
+    numPartitions: Int,
+    seed: Long,
+    distribution: String,
+    transform: AffineTransform,
+    opts: CaseInsensitiveStringMap)
+    extends Table
+    with SupportsRead {
+
+  override def name(): String = "spider"
+
+  override def schema(): StructType = SpiderTable.SCHEMA
+
+  override def capabilities(): java.util.Set[TableCapability] =
+    java.util.EnumSet.of(TableCapability.BATCH_READ)
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder 
= {
+    new SpiderScanBuilder(distribution, numPartitions, numRows, opts, seed, 
transform)
+  }
+}
+
+object SpiderTable {
+  val SCHEMA: StructType = StructType(
+    Seq(StructField("id", LongType), StructField("geometry", GeometryUDT)))
+}
diff --git 
a/spark/common/src/test/scala/org/apache/sedona/sql/SpiderTests.scala 
b/spark/common/src/test/scala/org/apache/sedona/sql/SpiderTests.scala
new file mode 100644
index 0000000000..3a3a72178d
--- /dev/null
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/SpiderTests.scala
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql
+
+import org.locationtech.jts.geom.Geometry
+import org.locationtech.jts.geom.Polygon
+import org.locationtech.jts.geom.Envelope
+import org.locationtech.jts.geom.Point
+import org.locationtech.jts.operation.overlayng.OverlayNGRobust
+import org.scalatest.BeforeAndAfterAll
+
+import java.util
+
+class SpiderTests extends TestBaseScala with BeforeAndAfterAll {
+  describe("Spider data generator tests") {
+    it("generate data with default parameters") {
+      val spiderDf = sparkSession.read.format("spider").load()
+      assert(spiderDf.count() == 100)
+      spiderDf.collect().foreach { row =>
+        assert(row.getAs[Long]("id") >= 0)
+        assert(row.getAs[Geometry]("geometry").isInstanceOf[Point])
+      }
+    }
+
+    it("generate spider data with specified records and partitions") {
+      val spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "1000")
+        .option("numPartitions", "10")
+        .option("distribution", "uniform")
+        .load()
+      assert(spiderDf.count() == 1000)
+      assert(spiderDf.rdd.getNumPartitions == 10)
+      // ids should not be duplicated
+      assert(spiderDf.select("id").distinct().count() == 1000)
+    }
+
+    it("generate data with 0 records") {
+      var spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "0")
+        .option("numPartitions", "10")
+        .option("distribution", "uniform")
+        .load()
+      assert(spiderDf.count() == 0)
+      assert(spiderDf.rdd.getNumPartitions == 0)
+
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "0")
+        .option("numPartitions", "10")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.count() == 0)
+      assert(spiderDf.rdd.getNumPartitions == 0)
+    }
+
+    it("generate data with small number of records") {
+      var spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "10")
+        .option("numPartitions", "10")
+        .option("distribution", "uniform")
+        .load()
+      assert(spiderDf.count() == 10)
+      assert(spiderDf.rdd.getNumPartitions == 10)
+
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "4")
+        .option("numPartitions", "10")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.count() == 4)
+      assert(spiderDf.rdd.getNumPartitions == 4)
+    }
+
+    it("generate data with specified seed") {
+      val spiderDf1 = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "10")
+        .option("distribution", "uniform")
+        .option("seed", "12345")
+        .load()
+      val spiderDf2 = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "10")
+        .option("distribution", "uniform")
+        .option("seed", "12345")
+        .load()
+      assert(spiderDf1.count() == 100)
+      assert(spiderDf2.count() == 100)
+      val spiderDf1Data = spiderDf1.collect()
+      val spiderDf2Data = spiderDf2.collect()
+      assert(spiderDf1Data.length == spiderDf2Data.length)
+      assert(spiderDf1Data.toSet == spiderDf2Data.toSet)
+
+      val spiderDf3 = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "10")
+        .option("distribution", "parcel")
+        .option("seed", "23456")
+        .load()
+      assert(spiderDf3.count() == 100)
+      val spiderDf3Data = spiderDf3.collect()
+      assert(spiderDf1Data.toSet != spiderDf3Data.toSet)
+    }
+
+    it("generate data with specified geometry type") {
+      val spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("geometryType", "polygon")
+        .option("minSegments", "5")
+        .option("maxSegments", "10")
+        .load()
+      assert(spiderDf.count() == 100)
+      spiderDf.collect().foreach { row =>
+        assert(row.getAs[Geometry]("geometry").isInstanceOf[Polygon])
+        val geom = row.getAs[Geometry]("geometry")
+        assert(geom.getNumPoints >= 6 && geom.getNumPoints <= 11)
+      }
+
+      val spiderDf2 = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("geometryType", "box")
+        .load()
+      assert(spiderDf2.count() == 100)
+      spiderDf2.collect().foreach { row =>
+        assert(row.getAs[Geometry]("geometry").isInstanceOf[Polygon])
+        val geom = row.getAs[Geometry]("geometry")
+        assert(geom.getNumPoints == 5)
+        assert(geom.isRectangle)
+      }
+    }
+
+    it("generate data with specified transform") {
+      val spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "10")
+        .option("distribution", "uniform")
+        .option("translateX", "10")
+        .option("translateY", "20")
+        .option("scaleX", "2")
+        .option("scaleY", "2")
+        .option("skewX", "0")
+        .option("skewY", "0")
+        .load()
+      assert(spiderDf.count() == 100)
+      spiderDf.collect().foreach { row =>
+        val geom = row.getAs[Geometry]("geometry")
+        // check if the geometry is transformed correctly
+        // the transformed geometry should be a rectangle within region [10, 
12] x [20, 22]
+        val centroid = geom.getCentroid
+        assert(centroid.getX >= 10 && centroid.getX <= 12)
+        assert(centroid.getY >= 20 && centroid.getY <= 22)
+      }
+    }
+
+    it("generate data with diagonal distribution") {
+      val spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "1000")
+        .option("numPartitions", "4")
+        .option("percentage", "0.7")
+        .option("distribution", "diagonal")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 4)
+      val records = spiderDf.collect()
+      assert(records.length == 1000)
+      // verify the distribution of the data
+      val numPoints = records.count { row =>
+        val geom = row.getAs[Geometry]("geometry").getCentroid
+        Math.abs(geom.getX - geom.getY) < 0.000001
+      }
+      assert(numPoints >= 650 && numPoints <= 750)
+    }
+
+    it("generate parcel data") {
+      val spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "1000")
+        .option("numPartitions", "4")
+        .option("dither", "0.1")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.count() == 1000)
+      // ids should not be duplicated
+      assert(spiderDf.select("id").distinct().count() == 1000)
+      // verify the distribution of the data
+      val geometries: Array[Geometry] = spiderDf.collect().map { row =>
+        row.getAs[Geometry]("geometry")
+      }
+      var totalArea = 0.0
+      val bounds = new Envelope(0, 1, 0, 1)
+      geometries.foreach { geom =>
+        assert(geom.isInstanceOf[Polygon])
+        assert(geom.getNumPoints == 5)
+        assert(geom.isRectangle)
+        totalArea += geom.getArea
+        assert(bounds.covers(geom.getEnvelopeInternal))
+      }
+      assert(totalArea >= 0.7 && totalArea <= 1.0)
+      val unionArea = OverlayNGRobust.union(util.Arrays.asList(geometries: 
_*)).getArea
+      assert(Math.abs(totalArea - unionArea) < 0.001)
+    }
+
+    it("generate parcel data with specified number of partitions") {
+      var spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "1")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 1)
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "3")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 1)
+
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "4")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 4)
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "15")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 4)
+
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "16")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 16)
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "63")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 16)
+      spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "100")
+        .option("numPartitions", "64")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.rdd.getNumPartitions == 64)
+    }
+
+    it("generate parcel data with transformation") {
+      val spiderDf = sparkSession.read
+        .format("spider")
+        .option("N", "1000")
+        .option("numPartitions", "16")
+        .option("dither", "0.1")
+        .option("translateX", "10")
+        .option("translateY", "20")
+        .option("scaleX", "2")
+        .option("scaleY", "2")
+        .option("skewX", "0")
+        .option("skewY", "0")
+        .option("distribution", "parcel")
+        .load()
+      assert(spiderDf.count() == 1000)
+      // verify the distribution of the data
+      val geometries: Array[Geometry] = spiderDf.collect().map { row =>
+        row.getAs[Geometry]("geometry")
+      }
+      var totalArea = 0.0
+      val bounds = new Envelope(10, 12, 20, 22)
+      geometries.foreach { geom =>
+        assert(geom.isInstanceOf[Polygon])
+        assert(geom.getNumPoints == 5)
+        assert(geom.isRectangle)
+        totalArea += geom.getArea
+        assert(bounds.covers(geom.getEnvelopeInternal))
+      }
+      assert(totalArea >= 3.0 && totalArea <= 4.0)
+      val unionArea = OverlayNGRobust.union(util.Arrays.asList(geometries: 
_*)).getArea
+      assert(Math.abs(totalArea - unionArea) < 0.001)
+    }
+  }
+}

Reply via email to