This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git

commit ccb03489c2bb620ff3715610da95612cdcc97386
Author: Pranav Toggi <[email protected]>
AuthorDate: Thu Jun 26 14:14:11 2025 -0700

    add parcel distribution
---
 tpchgen-arrow/src/building.rs |   2 +-
 tpchgen/src/generators.rs     |  26 ++++-
 tpchgen/src/lib.rs            |   3 +-
 tpchgen/src/spider.rs         | 259 +++++++++++++++++++++++++++++++++---------
 tpchgen/src/spider_presets.rs |  53 +++++++++
 5 files changed, 281 insertions(+), 62 deletions(-)

diff --git a/tpchgen-arrow/src/building.rs b/tpchgen-arrow/src/building.rs
index 34a6ccb..05821c6 100644
--- a/tpchgen-arrow/src/building.rs
+++ b/tpchgen-arrow/src/building.rs
@@ -59,7 +59,7 @@ impl Iterator for BuildingArrow {
 
         let buildingkey = Int64Array::from_iter_values(rows.iter().map(|r| 
r.b_buildingkey));
         let name = string_view_array_from_display_iter(rows.iter().map(|r| 
&r.b_name));
-        let polygon_wkt = 
StringViewArray::from_iter_values(rows.iter().map(|r| r.b_polygonwkt));
+        let polygon_wkt = 
StringViewArray::from_iter_values(rows.iter().map(|r| r.b_polygonwkt.clone()));
 
         let batch = RecordBatch::try_new(
             Arc::clone(self.schema()),
diff --git a/tpchgen/src/generators.rs b/tpchgen/src/generators.rs
index 7d27dd8..a87c39e 100644
--- a/tpchgen/src/generators.rs
+++ b/tpchgen/src/generators.rs
@@ -14,7 +14,8 @@ use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use crate::dates::{GenerateUtils, TPCHDate};
 use crate::random::{RandomBoundedInt, RandomString, RandomStringSequence, 
RandomText};
-use crate::spider::SpiderGenerator;
+use crate::spider::{spider_seed_for_index, SpiderGenerator};
+use crate::spider_presets::SpiderPresets;
 
 /// Generator for Nation table data
 #[derive(Debug, Clone)]
@@ -2042,7 +2043,7 @@ pub struct TripGenerator {
 
 impl TripGenerator {
     /// Base scale for trip generation
-    const SCALE_BASE: i32 = 1_500_000;
+    const SCALE_BASE: i32 = 500_000;
 
     // Constants for trip generation
     const FARE_MIN_PER_MILE: i32 = 150; // $1.50 per mile
@@ -2061,7 +2062,7 @@ impl TripGenerator {
             Distributions::static_default(),
             TextPool::get_or_init_default(),
             crate::kde::default_distance_kde(),
-            SpiderGenerator::default(),
+            SpiderPresets::for_trip_pickups(),
         )
     }
 
@@ -2327,7 +2328,7 @@ pub struct Building<'a> {
     /// Name of the building
     pub b_name: StringSequenceInstance<'a>,
     /// WKT representation of the building's polygon
-    pub b_polygonwkt: &'a str,
+    pub b_polygonwkt: String,
 }
 
 impl Display for Building<'_> {
@@ -2349,6 +2350,7 @@ pub struct BuildingGenerator<'a> {
     part_count: i32,
     distributions: &'a Distributions,
     text_pool: &'a TextPool,
+    spatial_gen: SpiderGenerator,
 }
 
 impl<'a> BuildingGenerator<'a> {
@@ -2369,6 +2371,7 @@ impl<'a> BuildingGenerator<'a> {
             part_count,
             Distributions::static_default(),
             TextPool::get_or_init_default(),
+            SpiderPresets::for_building_polygons(),
         )
     }
 
@@ -2379,6 +2382,7 @@ impl<'a> BuildingGenerator<'a> {
         part_count: i32,
         distributions: &'b Distributions,
         text_pool: &'b TextPool,
+        spatial_gen: SpiderGenerator,
     ) -> BuildingGenerator<'b> {
         BuildingGenerator {
             scale_factor,
@@ -2386,6 +2390,7 @@ impl<'a> BuildingGenerator<'a> {
             part_count,
             distributions,
             text_pool,
+            spatial_gen,
         }
     }
 
@@ -2406,6 +2411,7 @@ impl<'a> BuildingGenerator<'a> {
                 self.part_count,
             ),
             Self::calculate_row_count(self.scale_factor, self.part, 
self.part_count),
+            self.spatial_gen.clone(),
         )
     }
 }
@@ -2424,6 +2430,7 @@ impl<'a> IntoIterator for &'a BuildingGenerator<'a> {
 pub struct BuildingGeneratorIterator<'a> {
     name_random: RandomStringSequence<'a>,
     wkt_random: RandomText<'a>,
+    spatial_gen: SpiderGenerator,
 
     start_index: i64,
     row_count: i64,
@@ -2436,6 +2443,7 @@ impl<'a> BuildingGeneratorIterator<'a> {
         text_pool: &'a TextPool,
         start_index: i64,
         row_count: i64,
+        spatial_gen: SpiderGenerator,
     ) -> Self {
         let mut name_random = RandomStringSequence::new(
             709314158,
@@ -2457,6 +2465,8 @@ impl<'a> BuildingGeneratorIterator<'a> {
             wkt_random,
             start_index,
             row_count,
+            spatial_gen,
+
             index: 0,
         }
     }
@@ -2465,10 +2475,14 @@ impl<'a> BuildingGeneratorIterator<'a> {
     fn make_building(&mut self, building_key: i64) -> Building<'a> {
         let name = self.name_random.next_value();
 
+        let seed = spider_seed_for_index(building_key as u64, 1234);
+        let mut rng = StdRng::seed_from_u64(seed);
+        let wkt = self.spatial_gen.generate_parcel(&mut rng);
+
         Building {
             b_buildingkey: building_key,
             b_name: name,
-            b_polygonwkt: self.wkt_random.next_value(),
+            b_polygonwkt: wkt,
         }
     }
 }
@@ -2623,7 +2637,7 @@ mod tests {
         let trips: Vec<_> = generator.iter().collect();
 
         // Should have 0.01 * 1,000,000 = 10,000 trips
-        assert_eq!(trips.len(), 15000);
+        assert_eq!(trips.len(), 5000);
 
         // Check first trip
         let first = &trips[0];
diff --git a/tpchgen/src/lib.rs b/tpchgen/src/lib.rs
index 4fee43c..098941c 100644
--- a/tpchgen/src/lib.rs
+++ b/tpchgen/src/lib.rs
@@ -60,4 +60,5 @@ pub mod queries;
 pub mod random;
 pub mod text;
 pub mod kde;
-mod spider;
+pub mod spider;
+pub mod spider_presets;
diff --git a/tpchgen/src/spider.rs b/tpchgen/src/spider.rs
index d1bf86b..7d1042b 100644
--- a/tpchgen/src/spider.rs
+++ b/tpchgen/src/spider.rs
@@ -2,19 +2,58 @@ use rand::{Rng, SeedableRng};
 use rand::rngs::StdRng;
 
 #[derive(Debug, Clone, Copy)]
-pub enum SpiderDistribution {
+pub enum DistributionType {
     Uniform,
+    Normal,
+    Diagonal,
+    Sierpinski,
+    Bit,
+    Parcel,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum GeomType {
+    Polygon,
+    Box,
+    Point,
+}
+
+#[derive(Debug, Clone)]
+pub struct BoxWithDepth {
+    pub depth: i32,
+    pub x: f64,
+    pub y: f64,
+    pub w: f64,
+    pub h: f64,
+}
+
+#[derive(Debug, Clone)]
+pub enum DistributionParams {
+    None,
     Normal { mu: f64, sigma: f64 },
     Diagonal { percentage: f64, buffer: f64 },
     Bit { probability: f64, digits: u32 },
-    Sierpinski,
+    Parcel { srange: f64, dither: f64 },
 }
 
 #[derive(Debug, Clone)]
 pub struct SpiderConfig {
-    pub dist: SpiderDistribution,
-    pub global_seed: u64,
-    pub affine: Option<[f64; 6]>,
+    pub dist_type: DistributionType,
+    pub geom_type: GeomType,
+    pub dim: i32,
+    pub seed: u32,
+    pub affine: Option<[f64; 6]>, // Affine transformation matrix
+
+    // Box-specific fields
+    pub width: f64,
+    pub height: f64,
+
+    // Polygon-specific fields
+    pub maxseg: i32,
+    pub polysize: f64,
+
+    // Distribution-specific params
+    pub params: DistributionParams,
 }
 
 #[derive(Clone, Debug)]
@@ -28,54 +67,164 @@ impl SpiderGenerator {
     }
 
     pub fn generate_point(&self, index: u64) -> (f64, f64) {
-        let seed = spider_seed_for_index(index, self.config.global_seed);
+        let seed = spider_seed_for_index(index, self.config.seed as u64);
         let mut rng = StdRng::seed_from_u64(seed);
 
-        match self.config.dist {
-            SpiderDistribution::Uniform => (rng.gen(), rng.gen()),
+        match self.config.dist_type {
+            DistributionType::Uniform => self.generate_uniform(&mut rng),
+            DistributionType::Normal => self.generate_normal(&mut rng),
+            DistributionType::Diagonal => self.generate_diagonal(&mut rng),
+            DistributionType::Bit => self.generate_bit(&mut rng),
+            DistributionType::Sierpinski => self.generate_sierpinski(&mut rng),
+            _ => (rng.gen(), rng.gen())
+        }
+
+    }
+
+    fn generate_uniform(&self, rng: &mut StdRng) -> (f64, f64) {
+        (rand_unit(rng), rand_unit(rng))
+    }
+
+    fn generate_normal(&self, rng: &mut StdRng) -> (f64, f64) {
+        if let DistributionParams::Normal { mu, sigma } = self.config.params {
+            let x = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
+            let y = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
+            (x, y)
+        } else {
+            // Default values or error handling
+            (rng.gen(), rng.gen())
+        }
+    }
 
-            SpiderDistribution::Normal { mu, sigma } => {
-                let x = rand_normal(&mut rng, mu, sigma).clamp(0.0, 1.0);
-                let y = rand_normal(&mut rng, mu, sigma).clamp(0.0, 1.0);
+    fn generate_diagonal(&self, rng: &mut StdRng) -> (f64, f64) {
+        if let DistributionParams::Diagonal { percentage, buffer } = 
self.config.params {
+            if rng.gen::<f64>() < percentage {
+                let v = rng.gen();
+                (v, v)
+            } else {
+                let c: f64 = rng.gen();
+                let d: f64 = rand_normal(rng, 0.0, buffer / 5.0);
+                let x: f64 = (c + d / f64::sqrt(2.0)).clamp(0.0, 1.0);
+                let y: f64 = (c - d / f64::sqrt(2.0)).clamp(0.0, 1.0);
                 (x, y)
             }
+        } else {
+            // Default values or error handling
+            (rng.gen(), rng.gen())
+        }
+    }
 
-            SpiderDistribution::Diagonal { percentage, buffer } => {
-                if rng.gen::<f64>() < percentage {
-                    let v = rng.gen();
-                    (v, v)
-                } else {
-                    let c: f64 = rng.gen();
-                    let d: f64 = rand_normal(&mut rng, 0.0, buffer / 5.0);
-                    let x: f64 = (c + d / f64::sqrt(2.0)).clamp(0.0, 1.0);
-                    let y: f64 = (c - d / f64::sqrt(2.0)).clamp(0.0, 1.0);
-                    (x, y)
-                }
-            }
+    fn generate_bit(&self, rng: &mut StdRng) -> (f64, f64) {
+        if let DistributionParams::Bit { probability, digits } = 
self.config.params {
+            let x = spider_bit(rng, probability, digits);
+            let y = spider_bit(rng, probability, digits);
+            (x, y)
+        } else {
+            // Default values or error handling
+            (rng.gen(), rng.gen())
+        }
+    }
 
-            SpiderDistribution::Bit { probability, digits } => {
-                let x = spider_bit(&mut rng, probability, digits);
-                let y = spider_bit(&mut rng, probability, digits);
-                (x, y)
+    fn generate_sierpinski(&self, rng: &mut StdRng) -> (f64, f64) {
+        let (mut x, mut y) = (0.0, 0.0);
+        let a = (0.0, 0.0);
+        let b = (1.0, 0.0);
+        let c = (0.5, (3.0f64).sqrt() / 2.0);
+        for _ in 0..10 {
+            match rng.gen_range(0..3) {
+                0 => { x = (x + a.0) / 2.0; y = (y + a.1) / 2.0; }
+                1 => { x = (x + b.0) / 2.0; y = (y + b.1) / 2.0; }
+                _ => { x = (x + c.0) / 2.0; y = (y + c.1) / 2.0; }
             }
+        }
+        (x, y)
+    }
+
+    pub fn generate_parcel(&self, rng: &mut StdRng) -> String {
+        if let DistributionParams::Parcel { srange, dither } = 
self.config.params {
+            let mut box_stack = vec![BoxWithDepth {
+                depth: 0,
+                x: 0.0,
+                y: 0.0,
+                w: 1.0,
+                h: 1.0,
+            }];
+
+            // Pick a depth based on dim (log2) or fixed depth
+            let depth_limit = 6; // You can make this configurable if needed
 
-            SpiderDistribution::Sierpinski => {
-                let (mut x, mut y) = (0.0, 0.0);
-                let a = (0.0, 0.0);
-                let b = (1.0, 0.0);
-                let c = (0.5, (3.0f64).sqrt() / 2.0);
-                for _ in 0..10 {
-                    match rng.gen_range(0..3) {
-                        0 => { x = (x + a.0) / 2.0; y = (y + a.1) / 2.0; }
-                        1 => { x = (x + b.0) / 2.0; y = (y + b.1) / 2.0; }
-                        _ => { x = (x + c.0) / 2.0; y = (y + c.1) / 2.0; }
-                    }
+            for _ in 0..depth_limit {
+                let b = box_stack.pop().unwrap();
+                let (b1, b2) = if b.w > b.h {
+                    let split = b.w * (srange + rand_unit(rng) * (1.0 - 2.0 * 
srange));
+                    (
+                        BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y, w: 
split, h: b.h },
+                        BoxWithDepth { depth: b.depth + 1, x: b.x + split, y: 
b.y, w: b.w - split, h: b.h },
+                    )
+                } else {
+                    let split = b.h * (srange + rand_unit(rng) * (1.0 - 2.0 * 
srange));
+                    (
+                        BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y, w: 
b.w, h: split },
+                        BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y + 
split, w: b.w, h: b.h - split },
+                    )
+                };
+
+                // Randomly pick one of the two
+                if rng.gen_bool(0.5) {
+                    box_stack.push(b1);
+                } else {
+                    box_stack.push(b2);
                 }
-                (x, y)
             }
+
+            let mut b = box_stack.pop().unwrap();
+
+            // Apply dither
+            let dx = b.w * dither * (rand_unit(rng) - 0.5);
+            let dy = b.h * dither * (rand_unit(rng) - 0.5);
+            b.x += dx / 2.0;
+            b.y += dy / 2.0;
+            b.w -= dx;
+            b.h -= dy;
+
+            // Pick random point inside the box
+            let _x = b.x + rand_unit(rng) * b.w;
+            let _y = b.y + rand_unit(rng) * b.h;
+
+            self.box_to_wkt(&b)
+        } else {
+            self.box_to_wkt(&BoxWithDepth {
+                depth: 0,
+                x: 0.0,
+                y: 0.0,
+                w: 1.0,
+                h: 1.0,
+            })
         }
     }
 
+    fn box_to_wkt(&self, b: &BoxWithDepth) -> String {
+        let corners = [
+            (b.x, b.y),
+            (b.x + b.w, b.y),
+            (b.x + b.w, b.y + b.h),
+            (b.x, b.y + b.h),
+            (b.x, b.y),
+        ];
+
+        let affine = self.config.affine.unwrap_or([1.0, 0.0, 0.0, 0.0, 1.0, 
0.0]);
+
+        let coords: Vec<String> = corners
+            .iter()
+            .map(|&(x, y)| {
+                let (tx, ty) = apply_affine(x, y, &affine);
+                format!("{:.6} {:.6}", tx, ty)
+            })
+            .collect();
+
+        format!("POLYGON (({}))", coords.join(", "))
+    }
+
     pub fn generate_pickup_point(&self, trip_id: u64) -> (f64, f64) {
         let (x, y) = self.generate_point(trip_id);
         if let Some(aff) = &self.config.affine {
@@ -86,6 +235,10 @@ impl SpiderGenerator {
     }
 }
 
+pub fn rand_unit(rng: &mut StdRng) -> f64 {
+    rng.gen::<f64>() // random number in [0.0, 1.0)
+}
+
 // Affine transform
 fn apply_affine(x: f64, y: f64, m: &[f64; 6]) -> (f64, f64) {
     let x_out = m[0] * x + m[1] * y + m[2];
@@ -114,18 +267,16 @@ fn spider_bit(rng: &mut StdRng, prob: f64, digits: u32) 
-> f64 {
         .sum()
 }
 
-// In tpchgen/src/spider.rs
-
-impl Default for SpiderGenerator {
-    fn default() -> Self {
-        let config = SpiderConfig {
-            dist: SpiderDistribution::Uniform,
-            global_seed: 42,
-            affine: Some([
-                58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to 
-125.24°
-                0.0, 25.175375, 24.006328,    // scale Y to 25.18°, offset to 
24.00°
-            ]),
-        };
-        SpiderGenerator::new(config)
-    }
-}
\ No newline at end of file
+// impl Default for SpiderGenerator {
+//     fn default() -> Self {
+//         let config = SpiderConfig {
+//             dist: SpiderDistribution::Uniform,
+//             global_seed: 42,
+//             affine: Some([
+//                 58.368269, 0.0, -125.244606, // scale X to 58.37°, offset 
to -125.24°
+//                 0.0, 25.175375, 24.006328,    // scale Y to 25.18°, offset 
to 24.00°
+//             ]),
+//         };
+//         SpiderGenerator::new(config)
+//     }
+// }
\ No newline at end of file
diff --git a/tpchgen/src/spider_presets.rs b/tpchgen/src/spider_presets.rs
new file mode 100644
index 0000000..e6827b0
--- /dev/null
+++ b/tpchgen/src/spider_presets.rs
@@ -0,0 +1,53 @@
+use crate::spider::{SpiderGenerator, SpiderConfig, DistributionType, 
DistributionParams, GeomType};
+
+pub struct SpiderPresets;
+
+impl SpiderPresets {
+    pub fn for_trip_pickups() -> SpiderGenerator {
+        let config = SpiderConfig {
+            dist_type: DistributionType::Uniform,
+            geom_type: GeomType::Point,
+            dim: 2,
+            seed: 42,
+            affine: Some([
+                58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to 
-125.24°
+                0.0, 25.175375, 24.006328,   // scale Y to 25.18°, offset to 
24.00°
+            ]),
+
+            // geometry = box
+            width: 0.0,
+            height: 0.0,
+
+            // geometry = polygon
+            maxseg: 0,
+            polysize: 0.0,
+
+            params: DistributionParams::None,
+        };
+        SpiderGenerator::new(config)
+    }
+
+    pub fn for_building_polygons() -> SpiderGenerator {
+        let config = SpiderConfig {
+            dist_type: DistributionType::Parcel,
+            geom_type: GeomType::Box,
+            dim: 2,
+            seed: 12345,
+            affine: Some([
+                58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to 
-125.24°
+                0.0, 25.175375, 24.006328,   // scale Y to 25.18°, offset to 
24.00°
+            ]),
+
+            // geometry = box
+            width: 0.0,
+            height: 0.0,
+
+            // geometry = polygon
+            maxseg: 0,
+            polysize: 0.0,
+
+            params: DistributionParams::Parcel { srange: 0.1, dither: 2.0 },
+        };
+        SpiderGenerator::new(config)
+    }
+}
\ No newline at end of file

Reply via email to