(sedona-spatialbench) branch main updated: add queries + queries CLI (#21)

jiayu Fri, 19 Sep 2025 13:51:11 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git



The following commit(s) were added to refs/heads/main by this push:
     new c9b7cdc  add queries + queries CLI (#21)
c9b7cdc is described below

commit c9b7cdc06c5cec80a66d494ddfe0fd3ce9c6d9aa
Author: James Willis <[email protected]>
AuthorDate: Fri Sep 19 13:43:27 2025 -0700

    add queries + queries CLI (#21)
    
    * add queries
    
    * update readme
    
    * Update print_queries.py
    
    Co-authored-by: Copilot <[email protected]>
    
    * fix q3 sql
    
    ---------
    
    Co-authored-by: Copilot <[email protected]>
---
 README.md        |  26 +++
 geopandas.py     | 624 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 print_queries.py | 459 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1109 insertions(+)

diff --git a/README.md b/README.md
index fc0330b..c844daa 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,32 @@ Goals:
 - Help users compare engines and frameworks across different data scales.
 - Support open standards and foster collaboration in the spatial computing 
community.
 
+## Queries
+SpatialBench includes a set of 12 SQL queries. Because spatial sql syntaxes 
vary widely across systems, we provide a cli
+to print all 12 queries in the dialect of your choice. Currently supported 
dialects are:
+
+- Databricks Spatial SQL
+- DuckDB
+- Geopandas (distinct case)
+- SedonaDB
+- SedonaSpark
+
+
+We tried to vary the queries only as much as necessary to accommodate dialect 
differences.
+
+Geopandas is obviously a distinct case, as it is not SQL-based, however, due 
to its popularity, we felt it was important
+to include it. Pandas/Geopandas users often hand optimize their queries in 
ways that SQL engines handle automatically.
+We felt hand-tuning the queries was unfair for this exercise, and tried to do 
as little of that as possible while still
+writing "idiomatic" pandas code. We would be interested in hearing feedback on 
this approach as well as seeing a "fully
+hand-optimized" version of the queries.
+
+We welcome contributions and civil discussions on how to improve the queries 
and their implementations.
+
+You can print the queries in your dialect of choice using the following 
command:
+```bash
+./print_queries.py <dialect>
+```
+
 ## Data Model
 
 SpatialBench defines a spatial star schema with the following tables:
diff --git a/geopandas.py b/geopandas.py
new file mode 100644
index 0000000..ac149f0
--- /dev/null
+++ b/geopandas.py
@@ -0,0 +1,624 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+from typing import cast
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+
+from pandas import DataFrame
+from shapely import wkb
+from shapely.geometry import LineString, MultiPoint, Point, Polygon
+
+
+def q1(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q1 (GeoPandas): Trips starting within 50km of Sedona city center."""
+    trip_df = pd.read_parquet(data_paths["trip"])[
+        ["t_tripkey", "t_pickuploc", "t_pickuptime"]
+    ]
+    trip_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    trip_gdf = gpd.GeoDataFrame(trip_df, geometry="pickup_geom", 
crs="EPSG:4326")
+    trip_gdf["pickup_lon"] = trip_gdf.geometry.x
+    trip_gdf["pickup_lat"] = trip_gdf.geometry.y
+    center = Point(-111.7610, 34.8697)
+    trip_gdf["distance_to_center"] = trip_gdf.geometry.distance(center)
+    filtered = trip_gdf[
+        trip_gdf["distance_to_center"].notna()
+        & (trip_gdf["distance_to_center"] <= 0.45)
+        ]
+    return filtered.sort_values(  # type: ignore[no-any-return]
+        ["distance_to_center", "t_tripkey"], ascending=[True, True]
+    )[
+        [
+            "t_tripkey",
+            "pickup_lon",
+            "pickup_lat",
+            "t_pickuptime",
+            "distance_to_center",
+        ]
+    ].reset_index(drop=True)
+
+
+def q2(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q2 (GeoPandas): Count trips starting within Coconino County zone.
+
+    Finds the first zone row where z_name == 'Coconino County' and counts 
trips whose
+    pickup point intersects that polygon. Returns single-row DataFrame with
+    trip_count_in_coconino_county.
+    """
+    trip_df = pd.read_parquet(data_paths["trip"])
+    zone_df = pd.read_parquet(data_paths["zone"])
+    target = zone_df[zone_df["z_name"] == "Coconino County"].head(1)
+    if target.empty:
+        return pd.DataFrame({"trip_count_in_coconino_county": [0]})
+    poly = wkb.loads(target.iloc[0]["z_boundary"])
+    trip_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    # Ensure intersects is called on a GeoSeries, not a Series
+    pickup_geoms = gpd.GeoSeries(trip_df["pickup_geom"], crs="EPSG:4326")
+    count = int(pickup_geoms.intersects(poly).sum())
+    return pd.DataFrame({"trip_count_in_coconino_county": [count]})
+
+
+def q3(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q3 (GeoPandas): Monthly trip stats within 15km (10km box + 5km buffer) 
of Sedona center.
+
+    Implements: filter trips whose pickup location is within 0.045 degrees 
(~5km) of the 10km bounding
+    box polygon (approximating ST_DWithin(pickup_point, polygon, 0.045)). Then 
aggregates monthly:
+      * total_trips   = COUNT(t_tripkey)
+      * avg_distance  = AVG(t_distance) (set NaN if column absent)
+      * avg_duration  = AVG(t_dropofftime - t_pickuptime) in seconds
+      * avg_fare      = AVG(t_fare) (set NaN if column absent)
+    Ordered by pickup_month ASC.
+    Returns columns: pickup_month, total_trips, avg_distance, avg_duration, 
avg_fare
+    """
+    trip_df = pd.read_parquet(data_paths["trip"])
+
+    trip_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    trips_gdf = gpd.GeoDataFrame(trip_df, geometry="pickup_geom", 
crs="EPSG:4326")
+
+    base_poly = Polygon(
+        [
+            (-111.9060, 34.7347),
+            (-111.6160, 34.7347),
+            (-111.6160, 35.0047),
+            (-111.9060, 35.0047),
+            (-111.9060, 34.7347),
+        ]
+    )
+
+    distances = trips_gdf["pickup_geom"].distance(base_poly)
+    mask = distances <= 0.045
+    filtered = trips_gdf.loc[mask]
+
+    filtered["_duration_seconds"] = (
+            filtered["t_dropofftime"] - filtered["t_pickuptime"]
+    ).dt.total_seconds()
+
+    filtered["pickup_month"] = (
+        filtered["t_pickuptime"].dt.to_period("M").dt.to_timestamp()
+    )
+
+    agg = (
+        filtered.groupby("pickup_month", as_index=False)
+        .agg(
+            total_trips=("t_tripkey", "count"),
+            avg_distance=("t_distance", "mean"),
+            avg_duration=("_duration_seconds", "mean"),
+            avg_fare=("t_fare", "mean"),
+        )
+        .sort_values("pickup_month")
+        .reset_index(drop=True)
+    )
+    return cast(DataFrame, agg)
+
+
+def q4(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q4 (GeoPandas): Zone distribution of top 1000 trips by tip amount.
+
+    Steps:
+      * Select top 1000 trips ordered by t_tip DESC, t_tripkey ASC.
+      * Spatial join (pickup point within zone polygon).
+      * Group by z_zonekey, z_name counting trips.
+      * Order by trip_count DESC, z_zonekey ASC.
+    Returns columns: z_zonekey, z_name, trip_count.
+    """
+    trip_df = pd.read_parquet(data_paths["trip"])
+    if "t_tip" not in trip_df.columns:
+        return pd.DataFrame(columns=["z_zonekey", "z_name", "trip_count"])
+    top_trips = trip_df.sort_values(
+        ["t_tip", "t_tripkey"], ascending=[False, True]
+    ).head(1000)
+    top_trips["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        top_trips["t_pickuploc"], crs="EPSG:4326"
+    )
+    top_gdf = gpd.GeoDataFrame(top_trips, geometry="pickup_geom", 
crs="EPSG:4326")
+    zone_df = pd.read_parquet(data_paths["zone"])[
+        ["z_zonekey", "z_name", "z_boundary"]
+    ]
+    zone_df["zone_geom"] = gpd.GeoSeries.from_wkb(
+        zone_df["z_boundary"], crs="EPSG:4326"
+    )
+    zones_gdf = gpd.GeoDataFrame(zone_df, geometry="zone_geom", 
crs="EPSG:4326")[
+        ["z_zonekey", "z_name", "zone_geom"]
+    ]
+
+    result = (
+        gpd.sjoin(top_gdf, zones_gdf, how="inner", predicate="within")
+        .groupby(["z_zonekey", "z_name"], as_index=False)
+        .size()
+        .rename(columns={"size": "trip_count"})
+        .sort_values(["trip_count", "z_zonekey"], ascending=[False, True])
+        .reset_index(drop=True)
+    )
+    return result  # type: ignore[no-any-return]
+
+
+def q5(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q5 (GeoPandas): Monthly travel patterns for repeat customers (convex 
hull of dropoff points)."""
+    trip_df = pd.read_parquet(data_paths["trip"])
+    cust_df = pd.read_parquet(data_paths["customer"])
+    trip_df["dropoff_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_dropoffloc"], crs="EPSG:4326"
+    )
+    joined = trip_df.merge(
+        cust_df[["c_custkey", "c_name"]],
+        left_on="t_custkey",
+        right_on="c_custkey",
+        how="inner",
+    )
+    joined["pickup_month"] = (
+        joined["t_pickuptime"].dt.to_period("M").dt.to_timestamp()
+    )
+    grouped = (
+        joined.groupby(["c_custkey", "c_name", "pickup_month"], as_index=False)
+        .agg(
+            trip_count=("t_tripkey", "count"),
+            dropoff_points=("dropoff_geom", lambda x: list(x)),
+        )
+        .loc[lambda d: d["trip_count"] > 5]
+    )
+    grouped["monthly_travel_hull_area"] = gpd.GeoSeries(
+        grouped["dropoff_points"].map(MultiPoint), crs="EPSG:4326"
+    ).convex_hull.area
+
+    result = (
+        grouped.sort_values(["trip_count", "c_custkey"], ascending=[False, 
True])[
+            ["c_custkey", "c_name", "pickup_month", "monthly_travel_hull_area"]
+        ]
+        .rename(columns={"c_name": "customer_name"})
+        .reset_index(drop=True)
+    )
+    return result
+
+
+def q6(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q6 (GeoPandas): Zone statistics for trips within 50km bounding box 
around Sedona.
+
+    Mirrors original SQL intent:
+      * Filter zones fully contained in the provided bounding box polygon.
+      * Count trips whose pickup point lies within each zone (inner semantics: 
zones with 0 pickups excluded).
+      * Compute:
+          total_pickups = COUNT(t_tripkey)
+          avg_distance  = AVG(t_totalamount) (matches original aliasing; falls 
back to t_distance if needed)
+          avg_duration  = AVG(t_dropofftime - t_pickuptime) in seconds
+      * Order by total_pickups DESC, z_zonekey ASC.
+    Returns DataFrame with columns: z_zonekey, z_name, total_pickups, 
avg_distance, avg_duration
+    """
+    trip_df = pd.read_parquet(data_paths["trip"])
+    zone_df = pd.read_parquet(data_paths["zone"])
+
+    trip_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    pickup_points = gpd.GeoDataFrame(
+        trip_df, geometry="pickup_geom", crs="EPSG:4326"
+    )
+
+    zone_df["zone_geom"] = gpd.GeoSeries.from_wkb(
+        zone_df["z_boundary"], crs="EPSG:4326"
+    )
+    zones_gdf = gpd.GeoDataFrame(zone_df, geometry="zone_geom", 
crs="EPSG:4326")[
+        ["z_zonekey", "z_name", "zone_geom"]
+    ]
+
+    bbox_poly = Polygon(
+        [
+            (-112.2110, 34.4197),
+            (-111.3110, 34.4197),
+            (-111.3110, 35.3197),
+            (-112.2110, 35.3197),
+            (-112.2110, 34.4197),
+        ]
+    )
+
+    candidate_zones = zones_gdf[
+        zones_gdf["zone_geom"].notna()
+        & zones_gdf["zone_geom"].intersects(bbox_poly)
+        ]
+
+    distance_col = (
+        "t_totalamount"
+        if "t_totalamount" in trip_df.columns
+        else ("t_distance" if "t_distance" in trip_df.columns else None)
+    )
+
+    result = (
+        gpd.sjoin(pickup_points, candidate_zones, how="inner", 
predicate="within")
+        .assign(
+            _duration_seconds=lambda d: (
+                    d["t_dropofftime"] - d["t_pickuptime"]
+            ).dt.total_seconds(),
+            _distance_metric=lambda d: d[distance_col] if distance_col else 
pd.NA,
+        )
+        .groupby(["z_zonekey", "z_name"], as_index=False)
+        .agg(
+            total_pickups=("t_tripkey", "count"),
+            avg_distance=("_distance_metric", "mean"),
+            avg_duration=("_duration_seconds", "mean"),
+        )
+        .sort_values(["total_pickups", "z_zonekey"], ascending=[False, True])
+        .reset_index(drop=True)
+    )
+    return result  # type: ignore[no-any-return]
+
+
+def q7(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q7 (GeoPandas): Detect potential route detours by comparing reported vs 
geometric distances.
+
+    Mirrors SQL semantics:
+      * Join trip with driver and vehicle
+      * Filter trips where t_distance > 0
+      * reported_distance_m = t_distance (coerced to float)
+      * line_distance_m = length of straight line between pickup and dropoff 
(meters)
+      * detour_ratio = (reported_distance_m) / line_distance_m (NULL if 
line_distance_m==0)
+      * Ordered by detour_ratio DESC, reported_distance_m DESC, t_tripkey ASC
+    """
+    trip_df = pd.read_parquet(data_paths["trip"])
+    trip_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    trip_df["dropoff_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_dropoffloc"], crs="EPSG:4326"
+    )
+    trip_df["reported_distance_m"] = trip_df["t_distance"].astype(float)
+    pickup_vals = trip_df["pickup_geom"].to_numpy()
+    dropoff_vals = trip_df["dropoff_geom"].to_numpy()
+    line_lengths = np.fromiter(
+        (
+            LineString([pg, dg]).length / 0.000009  # 1 meter = 0.000009 degree
+            if (pg is not None and dg is not None)
+            else np.nan
+            for pg, dg in zip(pickup_vals, dropoff_vals, strict=False)
+        ),
+        dtype=float,
+        count=len(trip_df),
+    )
+    trip_df["line_distance_m"] = line_lengths
+    trip_df["detour_ratio"] = np.divide(
+        trip_df["reported_distance_m"].to_numpy(dtype=float, copy=False),
+        line_lengths,
+        out=np.full_like(
+            trip_df["reported_distance_m"].to_numpy(dtype=float, copy=False), 
np.nan
+        ),
+        where=(line_lengths != 0.0),
+    )
+    result = (
+        trip_df[
+            [
+                "t_tripkey",
+                "reported_distance_m",
+                "line_distance_m",
+                "detour_ratio",
+            ]
+        ]
+        .sort_values(
+            ["detour_ratio", "reported_distance_m", "t_tripkey"],
+            ascending=[False, False, True],
+            na_position="last",
+        )
+        .reset_index(drop=True)
+    )
+    return result
+
+
+def q8(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q8 (GeoPandas): Count nearby pickups for each building within ~500m."""
+    trips_df = pd.read_parquet(data_paths["trip"])
+    trips_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trips_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    pickups_gdf = gpd.GeoDataFrame(
+        trips_df, geometry="pickup_geom", crs="EPSG:4326"
+    )
+
+    buildings_df = pd.read_parquet(data_paths["building"])
+    buildings_df["boundary_geom"] = gpd.GeoSeries.from_wkb(
+        buildings_df["b_boundary"], crs="EPSG:4326"
+    )
+    buildings_gdf = gpd.GeoDataFrame(
+        buildings_df, geometry="boundary_geom", crs="EPSG:4326"
+    )
+
+    threshold = 0.0045  # degrees (~500m)
+    result = (
+        buildings_gdf.sjoin(pickups_gdf, predicate="dwithin", 
distance=threshold)
+        .groupby(["b_buildingkey", "b_name"], as_index=False)
+        .size()
+        .rename(columns={"size": "nearby_pickup_count"})
+        .sort_values(
+            ["nearby_pickup_count", "b_buildingkey"], ascending=[False, True]
+        )
+        .reset_index(drop=True)
+    )
+    return result  # type: ignore[no-any-return]
+
+
+def q9(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q9 (GeoPandas): Building conflation via IoU (intersection over union) 
detection.
+
+    Uses spatial self-join (predicate='intersects') to find overlapping 
(intersecting) building boundary polygons.
+    Robust to differing GeoPandas suffix behaviors by detecting column names 
and falling back to index_right.
+    Output columns: building_1, building_2, area1, area2, overlap_area, iou 
ordered by
+    iou DESC, building_1 ASC, building_2 ASC.
+    """
+    buildings_df = pd.read_parquet(data_paths["building"])
+    buildings_df["boundary_geom"] = gpd.GeoSeries.from_wkb(
+        buildings_df["b_boundary"], crs="EPSG:4326"
+    )
+    bdf = gpd.GeoDataFrame(buildings_df, geometry="boundary_geom", 
crs="EPSG:4326")[
+        ["b_buildingkey", "boundary_geom"]
+    ].rename(columns={"b_buildingkey": "building_key"})
+
+    pairs = gpd.sjoin(bdf, bdf, how="inner", predicate="intersects")
+
+    left_key_candidates = ["building_key_left", "building_key_1", 
"building_key"]
+    right_key_candidates = ["building_key_right", "building_key_2"]
+    left_key_col = next(c for c in left_key_candidates if c in pairs.columns)
+    right_key_col = next(
+        (c for c in right_key_candidates if c in pairs.columns), None
+    )
+    if right_key_col is None:
+        pairs["_building_key_right_temp"] = bdf.loc[
+            pairs["index_right"], "building_key"
+        ].to_numpy()
+        right_key_col = "_building_key_right_temp"
+
+    pairs = pairs.rename(
+        columns={left_key_col: "building_1", right_key_col: "building_2"}
+    ).rename_geometry("boundary_geom_1")
+    pairs["boundary_geom_2"] = bdf.loc[
+        pairs["index_right"], "boundary_geom"
+    ].to_numpy()
+
+    # Filter to only building_1 < building_2 (exclude self-pairs)
+    pairs = pairs[pairs["building_1"] < pairs["building_2"]]
+
+    # Compute metrics
+    boundary_geom_1_gs = gpd.GeoSeries(pairs["boundary_geom_1"], crs=pairs.crs)
+    boundary_geom_2_gs = gpd.GeoSeries(pairs["boundary_geom_2"], crs=pairs.crs)
+    pairs["area1"] = boundary_geom_1_gs.area
+    pairs["area2"] = boundary_geom_2_gs.area
+    intersection = boundary_geom_1_gs.intersection(boundary_geom_2_gs)
+    pairs["overlap_area"] = intersection.area
+    overlap = pairs["overlap_area"].to_numpy(dtype=float, copy=False)
+    area1 = pairs["area1"].to_numpy(dtype=float, copy=False)
+    area2 = pairs["area2"].to_numpy(dtype=float, copy=False)
+    union = area1 + area2 - overlap
+    iou = np.divide(overlap, union, out=np.zeros_like(overlap), where=union != 
0.0)
+    mask_union_zero = (union == 0.0) & (overlap > 0.0)
+    if mask_union_zero.any():
+        iou[mask_union_zero] = 1.0
+    pairs["iou"] = iou
+    result = (
+        pairs[["building_1", "building_2", "area1", "area2", "overlap_area", 
"iou"]]
+        .sort_values(
+            ["iou", "building_1", "building_2"], ascending=[False, True, True]
+        )
+        .reset_index(drop=True)
+    )
+    return cast(DataFrame, result)
+
+
+def q10(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q10 (GeoPandas): Zone stats for trips starting within each zone.
+
+    Produces columns: z_zonekey, pickup_zone (z_name), avg_duration (seconds), 
avg_distance, num_trips
+    Ordered by avg_duration DESC (NULLS last), z_zonekey ASC.
+    Zones with zero trips retained (avg_* = NaN, num_trips = 0).
+    """
+    trip_df = pd.read_parquet(data_paths["trip"])
+    zone_df = pd.read_parquet(data_paths["zone"])
+
+    trip_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trip_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    pickup_points = gpd.GeoDataFrame(
+        trip_df, geometry="pickup_geom", crs="EPSG:4326"
+    )
+
+    zone_df["zone_geom"] = gpd.GeoSeries.from_wkb(
+        zone_df["z_boundary"], crs="EPSG:4326"
+    )
+    zones_gdf = gpd.GeoDataFrame(zone_df, geometry="zone_geom", 
crs="EPSG:4326")
+
+    aggregations = {
+        "duration_seconds": "mean",
+        "t_distance": "mean",
+        "t_tripkey": "count",
+    }
+    result = (
+        gpd.sjoin(pickup_points, zones_gdf, how="right", predicate="within")
+        .assign(
+            duration_seconds=lambda d: (
+                    d["t_dropofftime"] - d["t_pickuptime"]
+            ).dt.total_seconds()
+        )
+        .groupby(["z_zonekey", "z_name"], dropna=False)
+        .agg(aggregations)
+        .rename(
+            columns={
+                "duration_seconds": "avg_duration",
+                "t_distance": "avg_distance",
+                "t_tripkey": "num_trips",
+            }
+        )
+        .reset_index()
+        .assign(num_trips=lambda d: d["num_trips"].fillna(0).astype(int))
+        .sort_values(
+            by=["avg_duration", "z_zonekey"],
+            ascending=[False, True],
+            na_position="last",
+        )
+        .rename(columns={"z_name": "pickup_zone"})
+        .reset_index(drop=True)
+    )
+    return result  # type: ignore[no-any-return]
+
+
+def q11(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q11 (GeoPandas): Count trips that cross between different zones.
+
+    Returns a single-row DataFrame with column: cross_zone_trip_count
+    """
+    trip_df = pd.read_parquet(data_paths["trip"])
+    zone_df = pd.read_parquet(data_paths["zone"])
+
+    pickup_df = trip_df[["t_tripkey", "t_pickuploc"]].copy()
+    pickup_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        pickup_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    pickup_points = gpd.GeoDataFrame(
+        pickup_df, geometry="pickup_geom", crs="EPSG:4326"
+    )
+
+    dropoff_df = trip_df[["t_tripkey", "t_dropoffloc"]].copy()
+    dropoff_df["dropoff_geom"] = gpd.GeoSeries.from_wkb(
+        dropoff_df["t_dropoffloc"], crs="EPSG:4326"
+    )
+    dropoff_points = gpd.GeoDataFrame(
+        dropoff_df, geometry="dropoff_geom", crs="EPSG:4326"
+    )
+
+    zones_pickup = (
+        zone_df[["z_zonekey", "z_boundary"]]
+        .rename(columns={"z_zonekey": "pickup_zonekey"})
+        .copy()
+    )
+    zones_pickup["zone_geom"] = gpd.GeoSeries.from_wkb(
+        zones_pickup["z_boundary"], crs="EPSG:4326"
+    )
+    zones_gdf = gpd.GeoDataFrame(
+        zones_pickup, geometry="zone_geom", crs="EPSG:4326"
+    )
+
+    zones_dropoff = (
+        zone_df[["z_zonekey", "z_boundary"]]
+        .rename(columns={"z_zonekey": "dropoff_zonekey"})
+        .copy()
+    )
+    zones_dropoff["zone_geom"] = gpd.GeoSeries.from_wkb(
+        zones_dropoff["z_boundary"], crs="EPSG:4326"
+    )
+    zones2_gdf = gpd.GeoDataFrame(
+        zones_dropoff, geometry="zone_geom", crs="EPSG:4326"
+    )
+
+    pickup_join = gpd.sjoin(
+        pickup_points,
+        zones_gdf,
+        how="left",
+        predicate="within",
+    )
+    dropoff_join = gpd.sjoin(
+        dropoff_points,
+        zones2_gdf,
+        how="left",
+        predicate="within",
+    )
+
+    merged = pickup_join[["t_tripkey", "pickup_zonekey"]].merge(
+        dropoff_join[["t_tripkey", "dropoff_zonekey"]], on="t_tripkey", 
how="inner"
+    )
+
+    mask = (
+            merged["pickup_zonekey"].notna()
+            & merged["dropoff_zonekey"].notna()
+            & (merged["pickup_zonekey"] != merged["dropoff_zonekey"])
+    )
+    count = int(mask.sum())
+    return pd.DataFrame({"cross_zone_trip_count": [count]})
+
+
+def q12(data_paths: dict[str, str]) -> DataFrame:  # type: ignore[override]
+    """Q12 (GeoPandas): Find 5 nearest buildings to each trip pickup location 
(NLJ, memory-efficient).
+
+    Uses a Python loop (nested loop join) to avoid materializing the full 
cross join.
+    Optionally uses STRtree to shortlist candidate buildings for each pickup 
point.
+    For each pickup, computes distances to candidates, selects 5 closest (ties 
by building key ASC).
+    Output columns: t_tripkey, t_pickuploc, b_buildingkey, building_name, 
distance_to_building
+    """
+    trips_df = pd.read_parquet(data_paths["trip"])
+    buildings_df = pd.read_parquet(data_paths["building"])
+
+    trips_df["pickup_geom"] = gpd.GeoSeries.from_wkb(
+        trips_df["t_pickuploc"], crs="EPSG:4326"
+    )
+    buildings_df["boundary_geom"] = gpd.GeoSeries.from_wkb(
+        buildings_df["b_boundary"], crs="EPSG:4326"
+    )
+    trips_gdf = gpd.GeoDataFrame(trips_df, geometry="pickup_geom", 
crs="EPSG:4326")
+    buildings_gdf = gpd.GeoDataFrame(
+        buildings_df, geometry="boundary_geom", crs="EPSG:4326"
+    )
+
+    pickup_geoms = trips_gdf["pickup_geom"].to_list()
+    building_geoms = buildings_gdf["boundary_geom"].to_list()
+    building_keys = buildings_gdf["b_buildingkey"].to_numpy()
+    building_names = buildings_gdf["b_name"].to_numpy()
+
+    results = []
+    # Since geopandas doesn't support KNN join, we had to choose either a 
cross join + filter or a NLJ.
+    # The cross join would be more pandas-esque, but would require too much 
memory.
+    # The NLJ is arguably methodologically unfair (a hand optimization) but 
the only way to
+    # actually get the query to run.
+    for i, pt in enumerate(pickup_geoms):
+        dists = [pt.distance(geom) for geom in building_geoms]
+        # Sort by distance, then building key
+        nearest_idx = np.lexsort((building_keys, dists))[:5]
+        for idx in nearest_idx:
+            results.append(
+                {
+                    "t_tripkey": trips_gdf.iloc[i]["t_tripkey"],
+                    "t_pickuploc": trips_gdf.iloc[i]["t_pickuploc"],
+                    "b_buildingkey": building_keys[idx],
+                    "building_name": building_names[idx],
+                    "distance_to_building": dists[idx],
+                }
+            )
+    return (
+        pd.DataFrame(results)
+        .sort_values(
+            ["distance_to_building", "b_buildingkey"], ascending=[True, True]
+        )
+        .reset_index(drop=True)
+    )
diff --git a/print_queries.py b/print_queries.py
new file mode 100755
index 0000000..3636b2c
--- /dev/null
+++ b/print_queries.py
@@ -0,0 +1,459 @@
+#!/usr/bin/env python3
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+import inspect
+import re
+import sys
+
+
+class SpatialBenchBenchmark:
+    """A benchmark for the performance of analytical spatial queries on a 
spatial dataset.
+
+    These queries are written in the Sedona/Spark SQL dialect. Because spatial 
functions are not as standardized as
+    other analytical functions, many engines needs specific implementations of 
a couple of these queries where dialects
+    vary slightly.
+
+    To deal with these differences,  other engine-specific implementations of 
this benchmark subclass this class and
+    override only the queries that need to be changed.
+
+    """
+
+    def queries(self) -> dict[str, str]:
+        """
+        Collects all methods of the subclass whose names start with 'q' 
followed by a number and have no arguments (other than self),
+        and returns them as a dictionary of query functions, partially applied 
with the current instance.
+
+        Returns:
+            Dict[str, str]: A dictionary mapping query names to their 
corresponding functions.
+        """
+
+        queries = {}
+        for name, method in inspect.getmembers(
+                self.__class__, predicate=inspect.isfunction
+        ):
+            if re.fullmatch(r"q\d+", name):
+                sig = inspect.signature(method)
+                if len(sig.parameters) == 0:
+                    queries[name] = method()
+                else:
+                    raise ValueError("Query methods must not take any 
arguments")
+
+        # Sort queries numerically by extracting the number from the query name
+        sorted_queries = dict(sorted(queries.items(), key=lambda x: 
int(x[0][1:])))
+
+        return sorted_queries
+
+    def dialect(self) -> str:
+        """Return the dialect of the benchmark."""
+        return "SedonaSpark"
+
+    @staticmethod
+    def q1() -> str:
+        return """
+-- Q1: Find trips starting within 50km of Sedona city center, ordered by 
distance
+SELECT
+   t.t_tripkey, ST_X(ST_GeomFromWKB(t.t_pickuploc)) AS pickup_lon, 
ST_Y(ST_GeomFromWKB(t.t_pickuploc)) AS pickup_lat, t.t_pickuptime,
+   ST_Distance(ST_GeomFromWKB(t.t_pickuploc), ST_GeomFromText('POINT 
(-111.7610 34.8697)')) AS distance_to_center
+FROM trip t
+WHERE ST_DWithin(ST_GeomFromWKB(t.t_pickuploc), ST_GeomFromText('POINT 
(-111.7610 34.8697)'), 0.45) -- 50km radius around Sedona center
+ORDER BY distance_to_center ASC, t.t_tripkey ASC
+               """
+
+    @staticmethod
+    def q2() -> str:
+        return """
+-- Q2: Count trips starting within Coconino County (Arizona) zone
+SELECT COUNT(*) AS trip_count_in_coconino_county
+FROM trip t
+WHERE ST_Intersects(ST_GeomFromWKB(t.t_pickuploc), (SELECT 
ST_GeomFromWKB(z.z_boundary) FROM zone z WHERE z.z_name = 'Coconino County' 
LIMIT 1))
+               """
+
+    @staticmethod
+    def q3() -> str:
+        return """
+-- Q3: Monthly trip statistics within 15km radius of Sedona city center (10km 
base + 5km buffer)
+SELECT
+   DATE_TRUNC('month', t.t_pickuptime) AS pickup_month, COUNT(t.t_tripkey) AS 
total_trips,
+   AVG(t.t_distance) AS avg_distance, AVG(t.t_dropofftime - t.t_pickuptime) AS 
avg_duration,
+   AVG(t.t_fare) AS avg_fare
+FROM trip t
+WHERE ST_DWithin(
+             ST_GeomFromWKB(t.t_pickuploc),
+             ST_GeomFromText('POLYGON((-111.9060 34.7347, -111.6160 34.7347, 
-111.6160 35.0047, -111.9060 35.0047, -111.9060 34.7347))'), -- 10km bounding 
box around Sedona
+             0.045 -- Additional 5km buffer
+     )
+GROUP BY pickup_month
+ORDER BY pickup_month
+"""
+
+    @staticmethod
+    def q4() -> str:
+        return """
+-- Q4: Zone distribution of top 1000 trips by tip amount
+SELECT z.z_zonekey, z.z_name, COUNT(*) AS trip_count
+FROM
+   zone z
+       JOIN (
+       SELECT t.t_pickuploc
+       FROM trip t
+       ORDER BY t.t_tip DESC, t.t_tripkey ASC
+           LIMIT 1000 -- Replace 1000 with x (how many top tips you want)
+   ) top_trips ON ST_Within(ST_GeomFromWKB(top_trips.t_pickuploc), 
ST_GeomFromWKB(z.z_boundary))
+GROUP BY z.z_zonekey, z.z_name
+ORDER BY trip_count DESC, z.z_zonekey ASC
+               """
+
+    @staticmethod
+    def q5() -> str:
+        return """
+-- Q5: Monthly travel patterns for repeat customers (convex hull of dropoff 
locations)
+SELECT
+   c.c_custkey, c.c_name AS customer_name,
+   DATE_TRUNC('month', t.t_pickuptime) AS pickup_month,
+   
ST_Area(ST_ConvexHull(ST_Collect(ARRAY_AGG(ST_GeomFromWKB(t.t_dropoffloc))))) 
AS monthly_travel_hull_area
+FROM trip t JOIN customer c ON t.t_custkey = c.c_custkey
+GROUP BY c.c_custkey, c.c_name, pickup_month
+HAVING COUNT(*) > 5 -- Only include repeat customers for meaningful hulls
+ORDER BY COUNT(*) DESC, c.c_custkey ASC
+               """
+
+    @staticmethod
+    def q6() -> str:
+        return """
+-- Q6: Zone statistics for trips within 50km radius of Sedona city center
+SELECT
+   z.z_zonekey, z.z_name,
+   COUNT(t.t_tripkey) AS total_pickups, AVG(t.t_totalamount) AS avg_distance,
+   AVG(t.t_dropofftime - t.t_pickuptime) AS avg_duration
+FROM trip t, zone z
+WHERE ST_Contains(ST_GeomFromText('POLYGON((-112.2110 34.4197, -111.3110 
34.4197, -111.3110 35.3197, -112.2110 35.3197, -112.2110 34.4197))'), 
ST_GeomFromWKB(z.z_boundary)) -- 50km bounding box around Sedona
+ AND ST_Within(ST_GeomFromWKB(t.t_pickuploc), ST_GeomFromWKB(z.z_boundary))
+GROUP BY z.z_zonekey, z.z_name
+ORDER BY total_pickups DESC, z.z_zonekey ASC
+               """
+
+    @staticmethod
+    def q7() -> str:
+        return """
+-- Q7: Detect potential route detours by comparing reported vs. geometric 
distances
+WITH trip_lengths AS (
+   SELECT
+       t.t_tripkey,
+       t.t_distance AS reported_distance_m,
+       ST_Length(
+               ST_MakeLine(
+                       ST_GeomFromWKB(t.t_pickuploc),
+                       ST_GeomFromWKB(t.t_dropoffloc)
+               )
+       ) / 0.000009 AS line_distance_m -- 1 meter = 0.000009 degree
+   FROM trip t
+)
+SELECT
+   t.t_tripkey,
+   t.reported_distance_m,
+   t.line_distance_m,
+   t.reported_distance_m / NULLIF(t.line_distance_m, 0) AS detour_ratio
+FROM trip_lengths t
+ORDER BY detour_ratio DESC NULLS LAST, reported_distance_m DESC, t_tripkey ASC
+               """
+
+    @staticmethod
+    def q8() -> str:
+        return """
+-- Q8: Count nearby pickups for each building within 500m radius
+SELECT b.b_buildingkey, b.b_name, COUNT(*) AS nearby_pickup_count
+FROM trip t JOIN building b ON ST_DWithin(ST_GeomFromWKB(t.t_pickuploc), 
ST_GeomFromWKB(b.b_boundary), 0.0045) -- ~500m
+GROUP BY b.b_buildingkey, b.b_name
+ORDER BY nearby_pickup_count DESC, b.b_buildingkey ASC
+               """
+
+    @staticmethod
+    def q9() -> str:
+        return """
+-- Q9: Building Conflation (duplicate/overlap detection via IoU), 
deterministic order
+WITH b1 AS (
+   SELECT b_buildingkey AS id, ST_GeomFromWKB(b_boundary) AS geom
+   FROM building
+),
+    b2 AS (
+        SELECT b_buildingkey AS id, ST_GeomFromWKB(b_boundary) AS geom
+        FROM building
+    ),
+    pairs AS (
+        SELECT
+            b1.id AS building_1,
+            b2.id AS building_2,
+            ST_Area(b1.geom) AS area1,
+            ST_Area(b2.geom) AS area2,
+            ST_Area(ST_Intersection(b1.geom, b2.geom)) AS overlap_area
+        FROM b1
+                 JOIN b2
+                      ON b1.id < b2.id
+                          AND ST_Intersects(b1.geom, b2.geom)
+    )
+SELECT
+   building_1,
+   building_2,
+   area1,
+   area2,
+   overlap_area,
+   CASE
+       WHEN overlap_area = 0 THEN 0.0
+       WHEN (area1 + area2 - overlap_area) = 0 THEN 1.0
+       ELSE overlap_area / (area1 + area2 - overlap_area)
+       END AS iou
+FROM pairs
+ORDER BY iou DESC, building_1 ASC, building_2 ASC
+               """
+
+    @staticmethod
+    def q10() -> str:
+        return """
+-- Q10: Zone statistics for trips starting within each zone
+SELECT
+   z.z_zonekey, z.z_name AS pickup_zone, AVG(t.t_dropofftime - t.t_pickuptime) 
AS avg_duration,
+   AVG(t.t_distance) AS avg_distance, COUNT(t.t_tripkey) AS num_trips
+FROM zone z LEFT JOIN trip t ON ST_Within(ST_GeomFromWKB(t.t_pickuploc), 
ST_GeomFromWKB(z.z_boundary))
+GROUP BY z.z_zonekey, z.z_name
+ORDER BY avg_duration DESC NULLS LAST, z.z_zonekey ASC
+               """
+
+    @staticmethod
+    def q11() -> str:
+        return """
+-- Q11: Count trips that cross between different zones
+SELECT COUNT(*) AS cross_zone_trip_count
+FROM
+   trip t
+       JOIN zone pickup_zone ON ST_Within(ST_GeomFromWKB(t.t_pickuploc), 
ST_GeomFromWKB(pickup_zone.z_boundary))
+       JOIN zone dropoff_zone ON ST_Within(ST_GeomFromWKB(t.t_dropoffloc), 
ST_GeomFromWKB(dropoff_zone.z_boundary))
+WHERE pickup_zone.z_zonekey != dropoff_zone.z_zonekey
+               """
+
+    @staticmethod
+    def q12() -> str:
+        # There is some odd bug with missing columns in EMR. Using CTEs to 
work around it.
+        return """
+-- Q12: Find 5 nearest buildings to each trip pickup location using KNN join
+WITH trip_with_geom AS (
+   SELECT t_tripkey, t_pickuploc, ST_GeomFromWKB(t_pickuploc) as pickup_geom
+   FROM trip
+),
+    building_with_geom AS (
+        SELECT b_buildingkey, b_name, b_boundary, ST_GeomFromWKB(b_boundary) 
as boundary_geom
+        FROM building
+    )
+SELECT
+   t.t_tripkey,
+   t.t_pickuploc,
+   b.b_buildingkey,
+   b.b_name AS building_name,
+   ST_Distance(t.pickup_geom, b.boundary_geom) AS distance_to_building
+FROM trip_with_geom t JOIN building_with_geom b
+                          ON ST_KNN(t.pickup_geom, b.boundary_geom, 5, FALSE)
+ORDER BY distance_to_building ASC, b.b_buildingkey ASC
+               """
+
+
+class DatabricksSpatialBenchBenchmark(SpatialBenchBenchmark):
+    """A Databricks-specific implementation of the SpatialBench benchmark.
+
+    This class is used to run the SpatialBench benchmark using Databricks' 
spatial functions. It varies only as
+    needed from the base class.
+
+    """
+
+    def dialect(self) -> str:
+        """Return the dialect of the benchmark."""
+        return "Databricks"
+
+    @staticmethod
+    def q5() -> str:
+        return """
+-- Q5 (Databricks): NO ST_Collect function, using ST_Union_Agg instead. This 
is more expensive, but should give the same results.
+SELECT
+   c.c_custkey, c.c_name AS customer_name,
+   DATE_TRUNC('month', t.t_pickuptime) AS pickup_month,
+   ST_Area(ST_ConvexHull(ST_Union_Agg(ST_GeomFromWKB(t.t_dropoffloc)))) AS 
monthly_travel_hull_area
+FROM trip t JOIN customer c ON t.t_custkey = c.c_custkey
+GROUP BY c.c_custkey, c.c_name, pickup_month
+HAVING COUNT(*) > 5 -- Only include repeat customers for meaningful hulls
+ORDER BY COUNT(*) DESC, c.c_custkey ASC
+               """
+
+    @staticmethod
+    def q8() -> str:
+        return """
+-- Q8 (Databricks): ST_MakeLine takes an array of points rather than varargs
+WITH trip_lengths AS (
+   SELECT
+       t.t_tripkey,
+       t.t_distance AS reported_distance_m,
+       ST_Length(
+               ST_MakeLine(
+                       Array(
+                               ST_GeomFromWKB(t.t_pickuploc),
+                               ST_GeomFromWKB(t.t_dropoffloc)
+                       )
+               )
+       ) / 0.000009 AS line_distance_m -- 1 meter = 0.000009 degree
+   FROM trip t
+)
+SELECT
+   t.t_tripkey,
+   t.reported_distance_m,
+   t.line_distance_m,
+   t.reported_distance_m / NULLIF(t.line_distance_m, 0) AS detour_ratio
+FROM trip_lengths t
+ORDER BY detour_ratio DESC NULLS LAST, reported_distance_m DESC, t_tripkey ASC
+               """
+
+    @staticmethod
+    def q12() -> str:
+        return """
+-- Q12 (Databricks): No KNN join, using cross join + ROW_NUMBER() window 
function instead.
+-- Note: Databricks doesn't have  cross join lateral support.
+SELECT
+   t_tripkey,
+   t_pickuploc,
+   b_buildingkey,
+   building_name,
+   distance_to_building
+FROM (
+        SELECT
+            t.t_tripkey,
+            t.t_pickuploc,
+            b.b_buildingkey,
+            b.b_name AS building_name,
+            ST_Distance(ST_GeomFromWKB(t.t_pickuploc), 
ST_GeomFromWKB(b.b_boundary)) AS distance_to_building,
+            ROW_NUMBER() OVER (
+        PARTITION BY t.t_tripkey
+        ORDER BY ST_Distance(ST_GeomFromWKB(t.t_pickuploc), 
ST_GeomFromWKB(b.b_boundary)) ASC
+    ) AS rn
+        FROM trip t
+                 JOIN building b
+    ) AS ranked_buildings
+WHERE rn <= 5
+ORDER BY distance_to_building ASC, b_buildingkey ASC
+               """
+
+
+class DuckDBSpatialBenchBenchmark(SpatialBenchBenchmark):
+    """A DuckDB-specific implementation of the SpatialBench benchmark.
+
+    This class is used to run the SpatialBench benchmark using DuckDB's 
spatial extension. It varies only as
+    needed from the base class.
+    """
+
+    def dialect(self) -> str:
+        """Return the dialect of the benchmark."""
+        return "DuckDB"
+
+    @staticmethod
+    def q12() -> str:
+        return """
+-- Q12 (DuckDB): No KNN join, using cross join lateral instead.
+SELECT
+   t.t_tripkey,
+   t.t_pickuploc,
+   nb.b_buildingkey,
+   nb.building_name,
+   nb.distance_to_building
+FROM trip t
+        CROSS JOIN LATERAL (
+   SELECT
+       b.b_buildingkey,
+       b.b_name AS building_name,
+       ST_Distance(ST_GeomFromWKB(t.t_pickuploc), 
ST_GeomFromWKB(b.b_boundary)) AS distance_to_building
+   FROM building b
+   ORDER BY distance_to_building
+       LIMIT 5
+) AS nb
+ORDER BY nb.distance_to_building, nb.b_buildingkey
+               """
+
+
+class SedonaDBSpatialBenchBenchmark(SpatialBenchBenchmark):
+    """A SedonaDB-specific implementation of the SpatialBench benchmark.
+
+    This class is used to run the SpatialBench benchmark using SedonaDB's 
spatial functions.
+    It inherits from the SpatialBenchBenchmark class and uses SedonaDB's 
spatial functions.
+
+    """
+
+    def dialect(self) -> str:
+        """Return the dialect of the benchmark."""
+        return "SedonaDB"
+
+    @staticmethod
+    def q5() -> str:
+        return """
+-- Q5 (SedonaDB): In SedonaDB ST_Collect is an aggregate function so no need 
to use ARRAY_AGG first.
+-- ST_Collect does not accept an array as input so we cannot use the query 
with ARRAY_AGG.
+WITH per AS (
+   SELECT
+       c.c_custkey,
+       c.c_name AS customer_name,
+       DATE_TRUNC('month', t.t_pickuptime) AS pickup_month,
+       COUNT(t.t_tripkey) AS n_trips,
+       ST_Area(ST_ConvexHull(
+               ST_Collect(ST_GeomFromWKB(t.t_dropoffloc))
+               )) AS monthly_travel_hull_area
+   FROM trip t
+            JOIN customer c ON t.t_custkey = c.c_custkey
+   GROUP BY c.c_custkey, c.c_name, pickup_month
+)
+SELECT *
+FROM per
+WHERE n_trips > 5
+ORDER BY n_trips DESC, c_custkey ASC;
+               """
+
+
+def main():
+    query_classes = {
+        "SedonaSpark": SpatialBenchBenchmark,
+        "Databricks": DatabricksSpatialBenchBenchmark,
+        "DuckDB": DuckDBSpatialBenchBenchmark,
+        "SedonaDB": SedonaDBSpatialBenchBenchmark,
+        "Geopandas": None  # Special case, we will catch this below
+    }
+
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <dialect>")
+        print(f"Available dialects: {', '.join(query_classes.keys())}")
+        sys.exit(1)
+
+    dialect_arg = sys.argv[1]
+
+    if dialect_arg == "Geopandas":
+        print("Geopandas does not support SQL queries directly. Please use the 
provided Python script geopandas.py.")
+        sys.exit(0)
+
+    if dialect_arg not in query_classes:
+        print(f"Unknown dialect: {dialect_arg}")
+        print(f"Available dialects: {', '.join(query_classes.keys())}")
+        sys.exit(1)
+
+    queries = query_classes[dialect_arg]().queries()
+
+    for query in queries.values():
+        print(query)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

(sedona-spatialbench) branch main updated: add queries + queries CLI (#21)

Reply via email to