This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 301086f1 feat(rust/sedona-pointcloud): Add LAS support (#628)
301086f1 is described below
commit 301086f17f77c7b1fc7cebae7918c344a517fc91
Author: Balthasar Teuscher <[email protected]>
AuthorDate: Thu Feb 19 00:05:57 2026 +0100
feat(rust/sedona-pointcloud): Add LAS support (#628)
---
rust/sedona-pointcloud/src/{laz => las}/builder.rs | 18 +--
rust/sedona-pointcloud/src/{laz => las}/format.rs | 126 ++++++++++-----
.../sedona-pointcloud/src/{laz => las}/metadata.rs | 97 ++++++++----
rust/sedona-pointcloud/src/{laz => las}/mod.rs | 0
rust/sedona-pointcloud/src/{laz => las}/opener.rs | 70 +++++++--
rust/sedona-pointcloud/src/{laz => las}/options.rs | 27 +++-
rust/sedona-pointcloud/src/{laz => las}/reader.rs | 86 +++++-----
rust/sedona-pointcloud/src/{laz => las}/schema.rs | 2 +-
rust/sedona-pointcloud/src/{laz => las}/source.rs | 40 +++--
.../src/{laz => las}/statistics.rs | 173 +++++++++++----------
rust/sedona-pointcloud/src/lib.rs | 2 +-
rust/sedona-pointcloud/src/options.rs | 2 +-
.../tests/data/{extra.laz => extra.las} | Bin 7018 -> 6345 bytes
rust/sedona-pointcloud/tests/data/extra.laz | Bin 7018 -> 7018 bytes
rust/sedona-pointcloud/tests/data/generate.py | 83 +++++-----
rust/sedona-pointcloud/tests/data/large.las | Bin 0 -> 3000375 bytes
rust/sedona-pointcloud/tests/data/large.laz | Bin 8567 -> 1487 bytes
rust/sedona/src/context.rs | 8 +-
18 files changed, 467 insertions(+), 267 deletions(-)
diff --git a/rust/sedona-pointcloud/src/laz/builder.rs
b/rust/sedona-pointcloud/src/las/builder.rs
similarity index 97%
rename from rust/sedona-pointcloud/src/laz/builder.rs
rename to rust/sedona-pointcloud/src/las/builder.rs
index 0b39a40a..8e2e8a85 100644
--- a/rust/sedona-pointcloud/src/laz/builder.rs
+++ b/rust/sedona-pointcloud/src/las/builder.rs
@@ -36,7 +36,7 @@ use geoarrow_schema::Dimension;
use las::{Header, Point};
use crate::{
- laz::{metadata::ExtraAttribute, options::LasExtraBytes,
schema::try_schema_from_header},
+ las::{metadata::ExtraAttribute, options::LasExtraBytes,
schema::try_schema_from_header},
options::GeometryEncoding,
};
@@ -516,7 +516,7 @@ mod tests {
use object_store::{local::LocalFileSystem, path::Path, ObjectStore};
use crate::{
- laz::{options::LasExtraBytes, reader::LazFileReaderFactory},
+ las::{options::LasExtraBytes, reader::LasFileReaderFactory},
options::PointcloudOptions,
};
@@ -541,15 +541,15 @@ mod tests {
let location = Path::from_filesystem_path(tmp_path).unwrap();
let object = store.head(&location).await.unwrap();
- let laz_file_reader = LazFileReaderFactory::new(Arc::new(store),
None)
+ let file_reader = LasFileReaderFactory::new(Arc::new(store), None)
.create_reader(
PartitionedFile::new(location, object.size),
PointcloudOptions::default(),
)
.unwrap();
- let metadata = laz_file_reader.get_metadata().await.unwrap();
+ let metadata = file_reader.get_metadata().await.unwrap();
- let batch = laz_file_reader
+ let batch = file_reader
.get_batch(&metadata.chunk_table[0])
.await
.unwrap();
@@ -570,20 +570,20 @@ mod tests {
// file with extra attributes generated with `tests/data/generate.py`
let extra_path = "tests/data/extra.laz";
- // read batch with `LazFileReader`
+ // read batch with `LasFileReader`
let store = LocalFileSystem::new();
let location = Path::from_filesystem_path(extra_path).unwrap();
let object = store.head(&location).await.unwrap();
- let laz_file_reader = LazFileReaderFactory::new(Arc::new(store), None)
+ let file_reader = LasFileReaderFactory::new(Arc::new(store), None)
.create_reader(
PartitionedFile::new(location, object.size),
PointcloudOptions::default().with_las_extra_bytes(LasExtraBytes::Typed),
)
.unwrap();
- let metadata = laz_file_reader.get_metadata().await.unwrap();
+ let metadata = file_reader.get_metadata().await.unwrap();
- let batch = laz_file_reader
+ let batch = file_reader
.get_batch(&metadata.chunk_table[0])
.await
.unwrap();
diff --git a/rust/sedona-pointcloud/src/laz/format.rs
b/rust/sedona-pointcloud/src/las/format.rs
similarity index 73%
rename from rust/sedona-pointcloud/src/laz/format.rs
rename to rust/sedona-pointcloud/src/las/format.rs
index f187a675..48e07054 100644
--- a/rust/sedona-pointcloud/src/laz/format.rs
+++ b/rust/sedona-pointcloud/src/las/format.rs
@@ -34,34 +34,51 @@ use futures::{StreamExt, TryStreamExt};
use object_store::{ObjectMeta, ObjectStore};
use crate::{
- laz::{metadata::LazMetadataReader, reader::LazFileReaderFactory,
source::LazSource},
+ las::{metadata::LasMetadataReader, reader::LasFileReaderFactory,
source::LasSource},
options::PointcloudOptions,
};
-const DEFAULT_LAZ_EXTENSION: &str = ".laz";
+#[derive(Debug, Clone, Copy)]
+pub enum Extension {
+ Las,
+ Laz,
+}
-/// Factory struct used to create [LazFormat]
-#[derive(Default)]
-pub struct LazFormatFactory {
- // inner options for LAZ
+impl Extension {
+ pub fn as_str(&self) -> &str {
+ match self {
+ Extension::Las => "las",
+ Extension::Laz => "laz",
+ }
+ }
+}
+
+/// Factory struct used to create [LasFormat]
+pub struct LasFormatFactory {
+ // inner options for LAS/LAZ
pub options: Option<PointcloudOptions>,
+ extension: Extension,
}
-impl LazFormatFactory {
- /// Creates an instance of [LazFormatFactory]
- pub fn new() -> Self {
- Self { options: None }
+impl LasFormatFactory {
+ /// Creates an instance of [LasFormatFactory]
+ pub fn new(extension: Extension) -> Self {
+ Self {
+ options: None,
+ extension,
+ }
}
- /// Creates an instance of [LazFormatFactory] with customized default
options
- pub fn new_with(options: PointcloudOptions) -> Self {
+ /// Creates an instance of [LasFormatFactory] with customized default
options
+ pub fn new_with(options: PointcloudOptions, extension: Extension) -> Self {
Self {
options: Some(options),
+ extension,
}
}
}
-impl FileFormatFactory for LazFormatFactory {
+impl FileFormatFactory for LasFormatFactory {
fn create(
&self,
state: &dyn Session,
@@ -80,11 +97,13 @@ impl FileFormatFactory for LazFormatFactory {
options.set(k, v)?;
}
- Ok(Arc::new(LazFormat::default().with_options(options)))
+ Ok(Arc::new(
+ LasFormat::new(self.extension).with_options(options),
+ ))
}
fn default(&self) -> Arc<dyn FileFormat> {
- Arc::new(LazFormat::default())
+ Arc::new(LasFormat::new(self.extension))
}
fn as_any(&self) -> &dyn Any {
@@ -92,28 +111,36 @@ impl FileFormatFactory for LazFormatFactory {
}
}
-impl GetExt for LazFormatFactory {
+impl GetExt for LasFormatFactory {
fn get_ext(&self) -> String {
- // Removes the dot, i.e. ".laz" -> "laz"
- DEFAULT_LAZ_EXTENSION[1..].to_string()
+ self.extension.as_str().to_string()
}
}
-impl fmt::Debug for LazFormatFactory {
+impl fmt::Debug for LasFormatFactory {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("LazFormatFactory")
- .field("LazFormatFactory", &self.options)
+ f.debug_struct("LasFormatFactory")
+ .field("options", &self.options)
+ .field("extension", &self.extension)
.finish()
}
}
-/// The LAZ `FileFormat` implementation
-#[derive(Debug, Default)]
-pub struct LazFormat {
+/// The LAS/LAZ `FileFormat` implementation
+#[derive(Debug)]
+pub struct LasFormat {
pub options: PointcloudOptions,
+ extension: Extension,
}
-impl LazFormat {
+impl LasFormat {
+ pub fn new(extension: Extension) -> Self {
+ Self {
+ options: Default::default(),
+ extension,
+ }
+ }
+
pub fn with_options(mut self, options: PointcloudOptions) -> Self {
self.options = options;
self
@@ -121,13 +148,13 @@ impl LazFormat {
}
#[async_trait::async_trait]
-impl FileFormat for LazFormat {
+impl FileFormat for LasFormat {
fn as_any(&self) -> &dyn Any {
self
}
fn get_ext(&self) -> String {
- LazFormatFactory::new().get_ext()
+ LasFormatFactory::new(self.extension).get_ext()
}
fn get_ext_with_compression(
@@ -159,7 +186,7 @@ impl FileFormat for LazFormat {
.map(|object_meta| async {
let loc_path = object_meta.location.clone();
- let schema = LazMetadataReader::new(store, object_meta)
+ let schema = LasMetadataReader::new(store, object_meta)
.with_file_metadata_cache(Some(Arc::clone(&file_metadata_cache)))
.with_options(self.options.clone())
.fetch_schema()
@@ -193,7 +220,7 @@ impl FileFormat for LazFormat {
object: &ObjectMeta,
) -> Result<Statistics, DataFusionError> {
let file_metadata_cache =
state.runtime_env().cache_manager.get_file_metadata_cache();
- LazMetadataReader::new(store, object)
+ LasMetadataReader::new(store, object)
.with_options(self.options.clone())
.with_file_metadata_cache(Some(Arc::clone(&file_metadata_cache)))
.fetch_statistics(&table_schema)
@@ -208,17 +235,17 @@ impl FileFormat for LazFormat {
let mut source = conf
.file_source()
.as_any()
- .downcast_ref::<LazSource>()
+ .downcast_ref::<LasSource>()
.cloned()
- .ok_or_else(|| DataFusionError::External("Expected
LazSource".into()))?;
+ .ok_or_else(|| DataFusionError::External("Expected
LasSource".into()))?;
source = source.with_options(self.options.clone());
let metadata_cache =
state.runtime_env().cache_manager.get_file_metadata_cache();
let store = state
.runtime_env()
.object_store(conf.object_store_url.clone())?;
- let laz_reader_factory = Arc::new(LazFileReaderFactory::new(store,
Some(metadata_cache)));
- let source = source.with_reader_factory(laz_reader_factory);
+ let reader_factory = Arc::new(LasFileReaderFactory::new(store,
Some(metadata_cache)));
+ let source = source.with_reader_factory(reader_factory);
let conf = FileScanConfigBuilder::from(conf)
.with_source(Arc::new(source))
@@ -228,7 +255,7 @@ impl FileFormat for LazFormat {
}
fn file_source(&self) -> Arc<dyn FileSource> {
- Arc::new(LazSource::default().with_options(self.options.clone()))
+
Arc::new(LasSource::new(self.extension).with_options(self.options.clone()))
}
}
@@ -240,29 +267,48 @@ mod test {
use datafusion_datasource::file_format::FileFormatFactory;
use las::{point::Format, Builder, Writer};
- use crate::laz::format::{LazFormat, LazFormatFactory};
+ use crate::las::format::{Extension, LasFormat, LasFormatFactory};
fn setup_context() -> SessionContext {
- let file_format = Arc::new(LazFormatFactory::new());
-
let mut state = SessionStateBuilder::new().build();
+
+ let file_format = Arc::new(LasFormatFactory::new(Extension::Las));
+ state.register_file_format(file_format, true).unwrap();
+
+ let file_format = Arc::new(LasFormatFactory::new(Extension::Laz));
state.register_file_format(file_format, true).unwrap();
SessionContext::new_with_state(state).enable_url_table()
}
#[tokio::test]
- async fn laz_format_factory() {
+ async fn format_factory() {
let ctx = SessionContext::new();
- let format_factory = Arc::new(LazFormatFactory::new());
+ let format_factory = Arc::new(LasFormatFactory::new(Extension::Las));
let dyn_format = format_factory
.create(&ctx.state(), &HashMap::new())
.unwrap();
- assert!(dyn_format.as_any().downcast_ref::<LazFormat>().is_some());
+ assert!(dyn_format.as_any().downcast_ref::<LasFormat>().is_some());
+
+ let ctx = SessionContext::new();
+ let format_factory = Arc::new(LasFormatFactory::new(Extension::Laz));
+ let dyn_format = format_factory
+ .create(&ctx.state(), &HashMap::new())
+ .unwrap();
+ assert!(dyn_format.as_any().downcast_ref::<LasFormat>().is_some());
}
#[tokio::test]
async fn projection() {
+ let ctx = setup_context();
+
+ let df = ctx
+ .sql("SELECT x, y, z FROM 'tests/data/extra.las'")
+ .await
+ .unwrap();
+
+ assert_eq!(df.schema().fields().len(), 3);
+
let ctx = setup_context();
let df = ctx
.sql("SELECT x, y, z FROM 'tests/data/extra.laz'")
diff --git a/rust/sedona-pointcloud/src/laz/metadata.rs
b/rust/sedona-pointcloud/src/las/metadata.rs
similarity index 86%
rename from rust/sedona-pointcloud/src/laz/metadata.rs
rename to rust/sedona-pointcloud/src/las/metadata.rs
index c70b7434..ab9b40ac 100644
--- a/rust/sedona-pointcloud/src/laz/metadata.rs
+++ b/rust/sedona-pointcloud/src/las/metadata.rs
@@ -37,14 +37,14 @@ use laz::laszip::ChunkTable;
use object_store::{ObjectMeta, ObjectStore};
use crate::{
- laz::{
+ las::{
schema::try_schema_from_header,
- statistics::{chunk_statistics, LazStatistics},
+ statistics::{chunk_statistics, LasStatistics},
},
options::PointcloudOptions,
};
-/// Laz chunk metadata
+/// LAS/LAZ chunk metadata
#[derive(Debug, Clone)]
pub struct ChunkMeta {
pub num_points: u64,
@@ -52,16 +52,16 @@ pub struct ChunkMeta {
pub byte_range: Range<u64>,
}
-/// Laz metadata
+/// LAS/LAZ metadata
#[derive(Debug, Clone)]
-pub struct LazMetadata {
+pub struct LasMetadata {
pub header: Arc<Header>,
pub extra_attributes: Arc<Vec<ExtraAttribute>>,
pub chunk_table: Vec<ChunkMeta>,
- pub statistics: Option<LazStatistics>,
+ pub statistics: Option<LasStatistics>,
}
-impl FileMetadata for LazMetadata {
+impl FileMetadata for LasMetadata {
fn as_any(&self) -> &dyn Any {
self
}
@@ -87,15 +87,15 @@ impl FileMetadata for LazMetadata {
}
}
-/// Reader for laz file metadata in object storage.
-pub struct LazMetadataReader<'a> {
+/// Reader for LAS/LAZ file metadata in object storage.
+pub struct LasMetadataReader<'a> {
store: &'a dyn ObjectStore,
object_meta: &'a ObjectMeta,
file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
options: PointcloudOptions,
}
-impl<'a> LazMetadataReader<'a> {
+impl<'a> LasMetadataReader<'a> {
pub fn new(store: &'a dyn ObjectStore, object_meta: &'a ObjectMeta) ->
Self {
Self {
store,
@@ -127,8 +127,8 @@ impl<'a> LazMetadataReader<'a> {
.map_err(DataFusionError::External)
}
- /// Fetch laz metadata from the remote object store
- pub async fn fetch_metadata(&self) -> Result<Arc<LazMetadata>,
DataFusionError> {
+ /// Fetch LAS/LAZ metadata from the remote object store
+ pub async fn fetch_metadata(&self) -> Result<Arc<LasMetadata>,
DataFusionError> {
let Self {
store,
object_meta,
@@ -142,8 +142,8 @@ impl<'a> LazMetadataReader<'a> {
.and_then(|file_metadata| {
file_metadata
.as_any()
- .downcast_ref::<LazMetadata>()
- .map(|laz_file_metadata|
Arc::new(laz_file_metadata.to_owned()))
+ .downcast_ref::<LasMetadata>()
+ .map(|las_file_metadata|
Arc::new(las_file_metadata.to_owned()))
})
{
return Ok(las_file_metadata);
@@ -151,7 +151,11 @@ impl<'a> LazMetadataReader<'a> {
let header = self.fetch_header().await?;
let extra_attributes = extra_bytes_attributes(&header)?;
- let chunk_table = chunk_table(*store, object_meta, &header).await?;
+ let chunk_table = if header.laz_vlr().is_ok() {
+ laz_chunk_table(*store, object_meta, &header).await?
+ } else {
+ las_chunk_table(&header).await?
+ };
let statistics = if options.collect_statistics {
Some(
chunk_statistics(
@@ -167,7 +171,7 @@ impl<'a> LazMetadataReader<'a> {
None
};
- let metadata = Arc::new(LazMetadata {
+ let metadata = Arc::new(LasMetadata {
header: Arc::new(header),
extra_attributes: Arc::new(extra_attributes),
chunk_table,
@@ -181,7 +185,7 @@ impl<'a> LazMetadataReader<'a> {
Ok(metadata)
}
- /// Read and parse the schema of the laz file
+ /// Read and parse the schema of the LAS/LAZ file
pub async fn fetch_schema(&mut self) -> Result<Schema, DataFusionError> {
let metadata = self.fetch_metadata().await?;
@@ -194,7 +198,7 @@ impl<'a> LazMetadataReader<'a> {
Ok(schema)
}
- /// Fetch the metadata from the laz file via [`Self::fetch_metadata`] and
extracts
+ /// Fetch the metadata from the LAS/LAZ file via [`Self::fetch_metadata`]
and extracts
/// the statistics in the metadata
pub async fn fetch_statistics(
&self,
@@ -237,7 +241,7 @@ impl<'a> LazMetadataReader<'a> {
}
}
-pub(crate) async fn fetch_header(
+async fn fetch_header(
store: &(impl ObjectStore + ?Sized),
object_meta: &ObjectMeta,
) -> Result<Header, Box<dyn Error + Send + Sync>> {
@@ -300,7 +304,8 @@ pub struct ExtraAttribute {
pub offset: Option<f64>,
}
-pub(crate) fn extra_bytes_attributes(
+/// Extract [ExtraAttribute]s from [Header]
+fn extra_bytes_attributes(
header: &Header,
) -> Result<Vec<ExtraAttribute>, Box<dyn Error + Send + Sync>> {
let mut attributes = Vec::new();
@@ -363,19 +368,16 @@ pub(crate) fn extra_bytes_attributes(
Ok(attributes)
}
-pub(crate) async fn chunk_table(
+async fn laz_chunk_table(
store: &(impl ObjectStore + ?Sized),
object_meta: &ObjectMeta,
header: &Header,
) -> Result<Vec<ChunkMeta>, Box<dyn Error + Send + Sync>> {
+ let laz_vlr = header.laz_vlr()?;
+
let num_points = header.number_of_points();
let mut point_offset = 0;
-
- let vlr_len = header.vlrs().iter().map(|v| v.len(false)).sum::<usize>();
- let header_size = header.version().header_size() as usize +
header.padding().len();
- let mut byte_offset = (header_size + vlr_len + header.vlr_padding().len())
as u64;
-
- let laz_vlr = header.laz_vlr()?;
+ let mut byte_offset = offset_to_point_data(header);
let ranges = [
byte_offset..byte_offset + 8,
@@ -438,6 +440,41 @@ pub(crate) async fn chunk_table(
Ok(chunks)
}
+async fn las_chunk_table(header: &Header) -> Result<Vec<ChunkMeta>, Box<dyn
Error + Send + Sync>> {
+ const CHUNK_SIZE: u64 = 50000;
+
+ let num_points = header.number_of_points();
+ let mut point_offset = 0;
+ let mut byte_offset = offset_to_point_data(header);
+ let record_size = header.point_format().len() as u64;
+
+ let num_chunks = num_points.div_ceil(CHUNK_SIZE);
+ let mut chunks = Vec::with_capacity(num_chunks as usize);
+
+ for _ in 0..num_chunks {
+ let point_count = CHUNK_SIZE.min(num_points - point_offset);
+ let byte_count = point_count * record_size;
+
+ let chunk = ChunkMeta {
+ num_points: point_count,
+ point_offset,
+ byte_range: byte_offset..byte_offset + byte_count,
+ };
+
+ chunks.push(chunk);
+ point_offset += point_count;
+ byte_offset += byte_count;
+ }
+
+ Ok(chunks)
+}
+
+fn offset_to_point_data(header: &Header) -> u64 {
+ let vlr_len = header.vlrs().iter().map(|v| v.len(false)).sum::<usize>();
+ let header_size = header.version().header_size() as usize +
header.padding().len();
+ (header_size + vlr_len + header.vlr_padding().len()) as u64
+}
+
#[cfg(test)]
mod tests {
use std::fs::File;
@@ -445,7 +482,7 @@ mod tests {
use las::{point::Format, Builder, Reader, Writer};
use object_store::{local::LocalFileSystem, path::Path, ObjectStore};
- use crate::laz::metadata::LazMetadataReader;
+ use crate::las::metadata::LasMetadataReader;
#[tokio::test]
async fn header_basic_e2e() {
@@ -462,11 +499,11 @@ mod tests {
let mut writer = Writer::new(tmp_file, header).unwrap();
writer.close().unwrap();
- // read with `LazMetadataReader`
+ // read with `LasMetadataReader`
let store = LocalFileSystem::new();
let location = Path::from_filesystem_path(&tmp_path).unwrap();
let object_meta = store.head(&location).await.unwrap();
- let metadata_reader = LazMetadataReader::new(&store, &object_meta);
+ let metadata_reader = LasMetadataReader::new(&store, &object_meta);
// read with las `Reader`
let reader = Reader::from_path(&tmp_path).unwrap();
diff --git a/rust/sedona-pointcloud/src/laz/mod.rs
b/rust/sedona-pointcloud/src/las/mod.rs
similarity index 100%
rename from rust/sedona-pointcloud/src/laz/mod.rs
rename to rust/sedona-pointcloud/src/las/mod.rs
diff --git a/rust/sedona-pointcloud/src/laz/opener.rs
b/rust/sedona-pointcloud/src/las/opener.rs
similarity index 79%
rename from rust/sedona-pointcloud/src/laz/opener.rs
rename to rust/sedona-pointcloud/src/las/opener.rs
index aa78691c..249e5392 100644
--- a/rust/sedona-pointcloud/src/laz/opener.rs
+++ b/rust/sedona-pointcloud/src/las/opener.rs
@@ -30,28 +30,28 @@ use sedona_expr::spatial_filter::SpatialFilter;
use sedona_geometry::bounding_box::BoundingBox;
use crate::{
- laz::{
- reader::{LazFileReader, LazFileReaderFactory},
+ las::{
+ reader::{LasFileReader, LasFileReaderFactory},
schema::try_schema_from_header,
},
options::PointcloudOptions,
};
-pub struct LazOpener {
+pub struct LasOpener {
/// Column indexes in `table_schema` needed by the query
pub projection: Arc<[usize]>,
/// Optional limit on the number of rows to read
pub limit: Option<usize>,
pub predicate: Option<Arc<dyn PhysicalExpr>>,
- /// Factory for instantiating laz reader
- pub laz_file_reader_factory: Arc<LazFileReaderFactory>,
+ /// Factory for instantiating LAS/LAZ reader
+ pub file_reader_factory: Arc<LasFileReaderFactory>,
/// Table options
pub options: PointcloudOptions,
/// Target batch size
pub(crate) batch_size: usize,
}
-impl FileOpener for LazOpener {
+impl FileOpener for LasOpener {
fn open(&self, file: PartitionedFile) -> Result<FileOpenFuture,
DataFusionError> {
let projection = self.projection.clone();
let limit = self.limit;
@@ -59,16 +59,16 @@ impl FileOpener for LazOpener {
let predicate = self.predicate.clone();
- let laz_reader: Box<LazFileReader> = self
- .laz_file_reader_factory
+ let file_reader: Box<LasFileReader> = self
+ .file_reader_factory
.create_reader(file.clone(), self.options.clone())?;
Ok(Box::pin(async move {
- let metadata = laz_reader.get_metadata().await?;
+ let metadata = file_reader.get_metadata().await?;
let schema = Arc::new(try_schema_from_header(
&metadata.header,
- laz_reader.options.geometry_encoding,
- laz_reader.options.las.extra_bytes,
+ file_reader.options.geometry_encoding,
+ file_reader.options.las.extra_bytes,
)?);
let pruning_predicate = predicate.and_then(|physical_expr| {
@@ -146,7 +146,7 @@ impl FileOpener for LazOpener {
}
// fetch batch
- let record_batch = laz_reader.get_batch(chunk_meta).await?;
+ let record_batch =
file_reader.get_batch(chunk_meta).await?;
let num_rows = record_batch.num_rows();
row_count += num_rows;
@@ -179,6 +179,52 @@ impl FileOpener for LazOpener {
mod tests {
use sedona::context::SedonaContext;
+ #[tokio::test]
+ async fn las_statistics_pruning() {
+ // file with two clusters, one at 0.5 one at 1.0
+ let path = "tests/data/large.las";
+
+ let ctx = SedonaContext::new_local_interactive().await.unwrap();
+
+ // ensure no faulty chunk pruning
+ ctx.sql("SET pointcloud.geometry_encoding = 'plain'")
+ .await
+ .unwrap();
+ ctx.sql("SET pointcloud.collect_statistics = 'true'")
+ .await
+ .unwrap();
+
+ let count = ctx
+ .sql(&format!("SELECT * FROM \"{path}\" WHERE x < 0.7"))
+ .await
+ .unwrap()
+ .count()
+ .await
+ .unwrap();
+ assert_eq!(count, 50000);
+
+ let count = ctx
+ .sql(&format!("SELECT * FROM \"{path}\" WHERE y < 0.7"))
+ .await
+ .unwrap()
+ .count()
+ .await
+ .unwrap();
+ assert_eq!(count, 50000);
+
+ ctx.sql("SET pointcloud.geometry_encoding = 'wkb'")
+ .await
+ .unwrap();
+ let count = ctx
+ .sql(&format!("SELECT * FROM \"{path}\" WHERE
ST_Intersects(geometry, ST_GeomFromText('POLYGON ((0 0, 0.7 0, 0.7 0.7, 0 0.7,
0 0))'))"))
+ .await
+ .unwrap()
+ .count()
+ .await
+ .unwrap();
+ assert_eq!(count, 50000);
+ }
+
#[tokio::test]
async fn laz_statistics_pruning() {
// file with two clusters, one at 0.5 one at 1.0
diff --git a/rust/sedona-pointcloud/src/laz/options.rs
b/rust/sedona-pointcloud/src/las/options.rs
similarity index 84%
rename from rust/sedona-pointcloud/src/laz/options.rs
rename to rust/sedona-pointcloud/src/las/options.rs
index 124b9104..de02628f 100644
--- a/rust/sedona-pointcloud/src/laz/options.rs
+++ b/rust/sedona-pointcloud/src/las/options.rs
@@ -97,13 +97,19 @@ mod test {
prelude::{SessionConfig, SessionContext},
};
- use crate::{laz::format::LazFormatFactory, options::PointcloudOptions};
+ use crate::{
+ las::format::{Extension, LasFormatFactory},
+ options::PointcloudOptions,
+ };
fn setup_context() -> SessionContext {
- let file_format = Arc::new(LazFormatFactory::new());
-
let config =
SessionConfig::new().with_option_extension(PointcloudOptions::default());
let mut state = SessionStateBuilder::new().with_config(config).build();
+
+ let file_format = Arc::new(LasFormatFactory::new(Extension::Las));
+ state.register_file_format(file_format, true).unwrap();
+
+ let file_format = Arc::new(LasFormatFactory::new(Extension::Laz));
state.register_file_format(file_format, true).unwrap();
SessionContext::new_with_state(state).enable_url_table()
@@ -114,6 +120,13 @@ mod test {
let ctx = setup_context();
// default options
+ let df = ctx
+ .sql("SELECT x, y, z FROM 'tests/data/extra.las'")
+ .await
+ .unwrap();
+
+ assert_eq!(df.schema().fields().len(), 3);
+
let df = ctx
.sql("SELECT x, y, z FROM 'tests/data/extra.laz'")
.await
@@ -128,6 +141,14 @@ mod test {
ctx.sql("SET pointcloud.las.extra_bytes = 'blob'")
.await
.unwrap();
+
+ let df = ctx
+ .sql("SELECT geometry, extra_bytes FROM 'tests/data/extra.las'")
+ .await
+ .unwrap();
+
+ assert_eq!(df.schema().fields().len(), 2);
+
let df = ctx
.sql("SELECT geometry, extra_bytes FROM 'tests/data/extra.laz'")
.await
diff --git a/rust/sedona-pointcloud/src/laz/reader.rs
b/rust/sedona-pointcloud/src/las/reader.rs
similarity index 76%
rename from rust/sedona-pointcloud/src/laz/reader.rs
rename to rust/sedona-pointcloud/src/las/reader.rs
index 895bc0fd..64939a19 100644
--- a/rust/sedona-pointcloud/src/laz/reader.rs
+++ b/rust/sedona-pointcloud/src/las/reader.rs
@@ -15,7 +15,11 @@
// specific language governing permissions and limitations
// under the License.
-use std::{io::Cursor, ops::Range, sync::Arc};
+use std::{
+ io::{Cursor, Read},
+ ops::Range,
+ sync::Arc,
+};
use arrow_array::RecordBatch;
use bytes::Bytes;
@@ -33,22 +37,22 @@ use laz::{
use object_store::ObjectStore;
use crate::{
- laz::{
+ las::{
builder::RowBuilder,
- metadata::{ChunkMeta, LazMetadata, LazMetadataReader},
+ metadata::{ChunkMeta, LasMetadata, LasMetadataReader},
},
options::PointcloudOptions,
};
-/// Laz file reader factory
+/// LAS/LAZ file reader factory
#[derive(Debug)]
-pub struct LazFileReaderFactory {
+pub struct LasFileReaderFactory {
store: Arc<dyn ObjectStore>,
metadata_cache: Option<Arc<dyn FileMetadataCache>>,
}
-impl LazFileReaderFactory {
- /// Create a new `LazFileReaderFactory`.
+impl LasFileReaderFactory {
+ /// Create a new `LasFileReaderFactory`.
pub fn new(
store: Arc<dyn ObjectStore>,
metadata_cache: Option<Arc<dyn FileMetadataCache>>,
@@ -63,8 +67,8 @@ impl LazFileReaderFactory {
&self,
partitioned_file: PartitionedFile,
options: PointcloudOptions,
- ) -> Result<Box<LazFileReader>, DataFusionError> {
- Ok(Box::new(LazFileReader {
+ ) -> Result<Box<LasFileReader>, DataFusionError> {
+ Ok(Box::new(LasFileReader {
partitioned_file,
store: self.store.clone(),
metadata_cache: self.metadata_cache.clone(),
@@ -73,21 +77,21 @@ impl LazFileReaderFactory {
}
}
-/// Reader for a laz file in object storage.
-pub struct LazFileReader {
+/// Reader for a LAS/LAZ file in object storage.
+pub struct LasFileReader {
partitioned_file: PartitionedFile,
store: Arc<dyn ObjectStore>,
metadata_cache: Option<Arc<dyn FileMetadataCache>>,
pub options: PointcloudOptions,
}
-impl LazFileReader {
- pub fn get_metadata<'a>(&'a self) -> BoxFuture<'a,
Result<Arc<LazMetadata>, DataFusionError>> {
+impl LasFileReader {
+ pub fn get_metadata<'a>(&'a self) -> BoxFuture<'a,
Result<Arc<LasMetadata>, DataFusionError>> {
let object_meta = self.partitioned_file.object_meta.clone();
let metadata_cache = self.metadata_cache.clone();
async move {
- LazMetadataReader::new(&self.store, &object_meta)
+ LasMetadataReader::new(&self.store, &object_meta)
.with_file_metadata_cache(metadata_cache)
.with_options(self.options.clone())
.fetch_metadata()
@@ -103,10 +107,6 @@ impl LazFileReader {
// fetch bytes
let bytes = self.get_bytes(chunk_meta.byte_range.clone()).await?;
- // laz decompressor
- let mut decompressor = record_decompressor(&header, bytes)
- .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
// record batch builder
let num_points = chunk_meta.num_points as usize;
let mut builder = RowBuilder::new(num_points, header.clone())
@@ -116,22 +116,28 @@ impl LazFileReader {
self.options.las.extra_bytes,
);
- // transform
- let format = header.point_format();
- let transforms = header.transforms();
-
- let out = vec![0; format.len() as usize];
- let mut buffer = Cursor::new(out);
-
- for _ in 0..chunk_meta.num_points {
- buffer.set_position(0);
- decompressor.decompress_next(buffer.get_mut())?;
-
- let point = RawPoint::read_from(&mut buffer, format)
- .map(|raw_point| Point::new(raw_point, transforms))
+ // parse points
+ if header.laz_vlr().is_ok() {
+ // laz decompressor
+ let mut decompressor = record_decompressor(&header, bytes)
.map_err(|e| DataFusionError::External(Box::new(e)))?;
- builder.append(point);
+ let out = vec![0; header.point_format().len() as usize];
+ let mut buffer = Cursor::new(out);
+
+ for _ in 0..chunk_meta.num_points {
+ buffer.set_position(0);
+ decompressor.decompress_next(buffer.get_mut())?;
+ let point = read_point(&mut buffer, &header)?;
+ builder.append(point);
+ }
+ } else {
+ let mut buffer = Cursor::new(bytes);
+
+ for _ in 0..chunk_meta.num_points {
+ let point = read_point(&mut buffer, &header)?;
+ builder.append(point);
+ }
}
let struct_array = builder.finish()?;
@@ -181,6 +187,12 @@ pub fn record_decompressor(
Ok(decompressor)
}
+pub(crate) fn read_point<R: Read>(buffer: R, header: &Header) -> Result<Point,
DataFusionError> {
+ RawPoint::read_from(buffer, header.point_format())
+ .map(|raw_point| Point::new(raw_point, header.transforms()))
+ .map_err(|e| DataFusionError::External(Box::new(e)))
+}
+
#[cfg(test)]
mod tests {
use std::{fs::File, sync::Arc};
@@ -189,7 +201,7 @@ mod tests {
use las::{point::Format, Builder, Writer};
use object_store::{local::LocalFileSystem, path::Path, ObjectStore};
- use crate::laz::reader::LazFileReaderFactory;
+ use crate::las::reader::LasFileReaderFactory;
#[tokio::test]
async fn reader_basic_e2e() {
@@ -206,20 +218,20 @@ mod tests {
writer.write_point(Default::default()).unwrap();
writer.close().unwrap();
- // read batch with `LazFileReader`
+ // read batch with `LasFileReader`
let store = LocalFileSystem::new();
let location = Path::from_filesystem_path(tmp_path).unwrap();
let object = store.head(&location).await.unwrap();
- let laz_file_reader = LazFileReaderFactory::new(Arc::new(store), None)
+ let file_reader = LasFileReaderFactory::new(Arc::new(store), None)
.create_reader(
PartitionedFile::new(location, object.size),
Default::default(),
)
.unwrap();
- let metadata = laz_file_reader.get_metadata().await.unwrap();
+ let metadata = file_reader.get_metadata().await.unwrap();
- let batch = laz_file_reader
+ let batch = file_reader
.get_batch(&metadata.chunk_table[0])
.await
.unwrap();
diff --git a/rust/sedona-pointcloud/src/laz/schema.rs
b/rust/sedona-pointcloud/src/las/schema.rs
similarity index 98%
rename from rust/sedona-pointcloud/src/laz/schema.rs
rename to rust/sedona-pointcloud/src/las/schema.rs
index bdcca623..c3e68394 100644
--- a/rust/sedona-pointcloud/src/laz/schema.rs
+++ b/rust/sedona-pointcloud/src/las/schema.rs
@@ -22,7 +22,7 @@ use geoarrow_schema::{CoordType, Crs, Dimension, Metadata,
PointType, WkbType};
use las::Header;
use las_crs::{get_epsg_from_geotiff_crs, get_epsg_from_wkt_crs_bytes};
-use crate::{laz::options::LasExtraBytes, options::GeometryEncoding};
+use crate::{las::options::LasExtraBytes, options::GeometryEncoding};
// Arrow schema for LAS points
pub fn try_schema_from_header(
diff --git a/rust/sedona-pointcloud/src/laz/source.rs
b/rust/sedona-pointcloud/src/las/source.rs
similarity index 82%
rename from rust/sedona-pointcloud/src/laz/source.rs
rename to rust/sedona-pointcloud/src/las/source.rs
index 79ea2df8..004d726c 100644
--- a/rust/sedona-pointcloud/src/laz/source.rs
+++ b/rust/sedona-pointcloud/src/las/source.rs
@@ -29,39 +29,53 @@ use datafusion_physical_plan::{
use object_store::ObjectStore;
use crate::{
- laz::{opener::LazOpener, reader::LazFileReaderFactory},
+ las::{format::Extension, opener::LasOpener, reader::LasFileReaderFactory},
options::PointcloudOptions,
};
-#[derive(Clone, Default, Debug)]
-pub struct LazSource {
+#[derive(Clone, Debug)]
+pub struct LasSource {
/// Optional metrics
metrics: ExecutionPlanMetricsSet,
/// The schema of the file.
pub(crate) table_schema: Option<TableSchema>,
/// Optional predicate for row filtering during parquet scan
pub(crate) predicate: Option<Arc<dyn PhysicalExpr>>,
- /// Laz file reader factory
- pub(crate) reader_factory: Option<Arc<LazFileReaderFactory>>,
+ /// LAS/LAZ file reader factory
+ pub(crate) reader_factory: Option<Arc<LasFileReaderFactory>>,
/// Batch size configuration
pub(crate) batch_size: Option<usize>,
pub(crate) projected_statistics: Option<Statistics>,
pub(crate) options: PointcloudOptions,
+ pub(crate) extension: Extension,
}
-impl LazSource {
+impl LasSource {
+ pub fn new(extension: Extension) -> Self {
+ Self {
+ metrics: Default::default(),
+ table_schema: Default::default(),
+ predicate: Default::default(),
+ reader_factory: Default::default(),
+ batch_size: Default::default(),
+ projected_statistics: Default::default(),
+ options: Default::default(),
+ extension,
+ }
+ }
+
pub fn with_options(mut self, options: PointcloudOptions) -> Self {
self.options = options;
self
}
- pub fn with_reader_factory(mut self, reader_factory:
Arc<LazFileReaderFactory>) -> Self {
+ pub fn with_reader_factory(mut self, reader_factory:
Arc<LasFileReaderFactory>) -> Self {
self.reader_factory = Some(reader_factory);
self
}
}
-impl FileSource for LazSource {
+impl FileSource for LasSource {
fn create_file_opener(
&self,
object_store: Arc<dyn ObjectStore>,
@@ -72,17 +86,17 @@ impl FileSource for LazSource {
.file_column_projection_indices()
.unwrap_or_else(||
(0..base_config.projected_file_schema().fields().len()).collect());
- let laz_file_reader_factory = self
+ let file_reader_factory = self
.reader_factory
.clone()
- .unwrap_or_else(||
Arc::new(LazFileReaderFactory::new(object_store, None)));
+ .unwrap_or_else(||
Arc::new(LasFileReaderFactory::new(object_store, None)));
- Arc::new(LazOpener {
+ Arc::new(LasOpener {
projection: Arc::from(projection),
batch_size: self.batch_size.expect("Must be set"),
limit: base_config.limit,
predicate: self.predicate.clone(),
- laz_file_reader_factory,
+ file_reader_factory,
options: self.options.clone(),
})
}
@@ -132,7 +146,7 @@ impl FileSource for LazSource {
}
fn file_type(&self) -> &str {
- "laz"
+ self.extension.as_str()
}
fn try_pushdown_filters(
diff --git a/rust/sedona-pointcloud/src/laz/statistics.rs
b/rust/sedona-pointcloud/src/las/statistics.rs
similarity index 78%
rename from rust/sedona-pointcloud/src/laz/statistics.rs
rename to rust/sedona-pointcloud/src/las/statistics.rs
index 75e38a3b..87f11abd 100644
--- a/rust/sedona-pointcloud/src/laz/statistics.rs
+++ b/rust/sedona-pointcloud/src/las/statistics.rs
@@ -27,22 +27,25 @@ use arrow_ipc::{reader::FileReader, writer::FileWriter};
use arrow_schema::{DataType, Field, Schema};
use datafusion_common::{arrow::compute::concat_batches, Column,
DataFusionError, ScalarValue};
use datafusion_pruning::PruningStatistics;
-use las::{raw::Point as RawPoint, Header, Point};
+use las::{Header, Point};
use object_store::{path::Path, ObjectMeta, ObjectStore, PutPayload};
use sedona_geometry::bounding_box::BoundingBox;
-use crate::laz::{metadata::ChunkMeta, reader::record_decompressor};
+use crate::las::{
+ metadata::ChunkMeta,
+ reader::{read_point, record_decompressor},
+};
-/// Spatial statistics (extent) of LAZ chunks for pruning.
+/// Spatial statistics (extent) of LAS/LAZ chunks for pruning.
///
/// It wraps a `RecordBatch` with x, y, z min and max values and row count per
chunk.
#[derive(Clone, Debug)]
-pub struct LazStatistics {
+pub struct LasStatistics {
pub values: RecordBatch,
}
-impl LazStatistics {
+impl LasStatistics {
/// Get the [BoundingBox] of a chunk by index.
pub fn get_bbox(&self, index: usize) -> Option<BoundingBox> {
if index >= self.values.num_rows() {
@@ -93,7 +96,7 @@ impl LazStatistics {
}
}
-impl PruningStatistics for LazStatistics {
+impl PruningStatistics for LasStatistics {
fn min_values(&self, column: &Column) -> Option<ArrayRef> {
match column.name.as_str() {
"x" => self.values.column_by_name("x_min").cloned(),
@@ -165,7 +168,7 @@ impl LasStatisticsBuilder {
self.row_counts.append_value(row_count);
}
- pub fn finish(mut self) -> LazStatistics {
+ pub fn finish(mut self) -> LasStatistics {
let schema = Schema::new([
Arc::new(Field::new("x_min", DataType::Float64, false)),
Arc::new(Field::new("x_max", DataType::Float64, false)),
@@ -190,11 +193,11 @@ impl LasStatisticsBuilder {
)
.unwrap();
- LazStatistics { values: batch }
+ LasStatistics { values: batch }
}
}
-/// Extract the [LazStatistics] from a LAZ file in an object store.
+/// Extract the [LasStatistics] from a LAS/LAZ file in an object store.
///
/// This will scan the entire file. To reuse the statistics, they can
/// optionally be persisted, which creates a sidecar file with a `.stats`
@@ -205,7 +208,7 @@ pub async fn chunk_statistics(
chunk_table: &[ChunkMeta],
header: &Header,
persist: bool,
-) -> Result<LazStatistics, DataFusionError> {
+) -> Result<LasStatistics, DataFusionError> {
let stats_path = Path::parse(format!("{}.stats",
object_meta.location.as_ref()))?;
match store.head(&stats_path).await {
@@ -225,7 +228,7 @@ pub async fn chunk_statistics(
assert_eq!(values.num_rows(), chunk_table.len());
- Ok(LazStatistics { values })
+ Ok(LasStatistics { values })
}
Err(object_store::Error::NotFound { path: _, source: _ }) => {
// extract statistics
@@ -261,15 +264,6 @@ async fn extract_chunk_stats(
chunk_meta: &ChunkMeta,
header: &Header,
) -> Result<[f64; 6], DataFusionError> {
- // fetch chunk bytes
- let bytes = store
- .get_range(&object_meta.location, chunk_meta.byte_range.clone())
- .await?;
-
- // setup laz decompressor
- let mut decompressor =
- record_decompressor(header, bytes).map_err(|e|
DataFusionError::External(Box::new(e)))?;
-
// statistics
let mut stats = [
f64::INFINITY,
@@ -280,18 +274,8 @@ async fn extract_chunk_stats(
f64::NEG_INFINITY,
];
- let out = vec![0; header.point_format().len() as usize];
- let mut buffer = Cursor::new(out);
-
- for _ in 0..chunk_meta.num_points {
- buffer.set_position(0);
- decompressor.decompress_next(buffer.get_mut())?;
-
- let point = RawPoint::read_from(&mut buffer, header.point_format())
- .map(|raw_point| Point::new(raw_point, header.transforms()))
- .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
- stats = [
+ let extend = |stats: &mut [f64; 6], point: Point| {
+ *stats = [
stats[0].min(point.x),
stats[1].max(point.x),
stats[2].min(point.y),
@@ -299,6 +283,34 @@ async fn extract_chunk_stats(
stats[4].min(point.z),
stats[5].max(point.z),
];
+ };
+
+ // fetch chunk bytes
+ let bytes = store
+ .get_range(&object_meta.location, chunk_meta.byte_range.clone())
+ .await?;
+
+ if header.laz_vlr().is_ok() {
+ // setup laz decompressor
+ let mut decompressor = record_decompressor(header, bytes)
+ .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+ let out = vec![0; header.point_format().len() as usize];
+ let mut buffer = Cursor::new(out);
+
+ for _ in 0..chunk_meta.num_points {
+ buffer.set_position(0);
+ decompressor.decompress_next(buffer.get_mut())?;
+ let point = read_point(&mut buffer, header)?;
+ extend(&mut stats, point);
+ }
+ } else {
+ let mut buffer = Cursor::new(bytes);
+
+ for _ in 0..chunk_meta.num_points {
+ let point = read_point(&mut buffer, header)?;
+ extend(&mut stats, point);
+ }
}
Ok(stats)
@@ -315,55 +327,56 @@ mod tests {
use object_store::{local::LocalFileSystem, path::Path, ObjectStore};
use sedona_geometry::bounding_box::BoundingBox;
- use crate::{laz::metadata::LazMetadataReader, options::PointcloudOptions};
+ use crate::{las::metadata::LasMetadataReader, options::PointcloudOptions};
#[tokio::test]
async fn chunk_statistics() {
- let path = "tests/data/large.laz";
-
- // read with `LazMetadataReader`
- let store = LocalFileSystem::new();
- let location = Path::from_filesystem_path(path).unwrap();
- let object_meta = store.head(&location).await.unwrap();
-
- let metadata_reader = LazMetadataReader::new(&store, &object_meta);
- let metadata = metadata_reader.fetch_metadata().await.unwrap();
- assert!(metadata.statistics.is_none());
-
- let options = PointcloudOptions {
- collect_statistics: true,
- ..Default::default()
- };
- let metadata_reader = LazMetadataReader::new(&store,
&object_meta).with_options(options);
- let metadata = metadata_reader.fetch_metadata().await.unwrap();
- let statistics = metadata.statistics.as_ref().unwrap();
- assert_eq!(statistics.num_containers(), 2);
- assert_eq!(
- statistics
- .row_counts(&Column::from_name(""))
- .unwrap()
- .as_primitive::<UInt64Type>()
- .value(0),
- 50000
- );
- assert_eq!(
- statistics.get_bbox(0),
- Some(BoundingBox::xyzm(
- (0.5, 0.5),
- (0.5, 0.5),
- Some((0.5, 0.5).into()),
- None
- ))
- );
- assert_eq!(
- statistics.get_bbox(1),
- Some(BoundingBox::xyzm(
- (1.0, 1.0),
- (1.0, 1.0),
- Some((1.0, 1.0).into()),
- None
- ))
- );
+ for path in ["tests/data/large.las", "tests/data/large.laz"] {
+ // read with `LasMetadataReader`
+ let store = LocalFileSystem::new();
+ let location = Path::from_filesystem_path(path).unwrap();
+ let object_meta = store.head(&location).await.unwrap();
+
+ let metadata_reader = LasMetadataReader::new(&store, &object_meta);
+ let metadata = metadata_reader.fetch_metadata().await.unwrap();
+ assert!(metadata.statistics.is_none());
+
+ let options = PointcloudOptions {
+ collect_statistics: true,
+ ..Default::default()
+ };
+ let metadata_reader =
+ LasMetadataReader::new(&store,
&object_meta).with_options(options);
+ let metadata = metadata_reader.fetch_metadata().await.unwrap();
+ let statistics = metadata.statistics.as_ref().unwrap();
+ assert_eq!(statistics.num_containers(), 2);
+ assert_eq!(
+ statistics
+ .row_counts(&Column::from_name(""))
+ .unwrap()
+ .as_primitive::<UInt64Type>()
+ .value(0),
+ 50000
+ );
+ assert_eq!(
+ statistics.get_bbox(0),
+ Some(BoundingBox::xyzm(
+ (0.5, 0.5),
+ (0.5, 0.5),
+ Some((0.5, 0.5).into()),
+ None
+ ))
+ );
+ assert_eq!(
+ statistics.get_bbox(1),
+ Some(BoundingBox::xyzm(
+ (1.0, 1.0),
+ (1.0, 1.0),
+ Some((1.0, 1.0).into()),
+ None
+ ))
+ );
+ }
}
#[tokio::test]
@@ -388,7 +401,7 @@ mod tests {
writer.write_point(point).unwrap();
writer.close().unwrap();
- // read with `LazMetadataReader`
+ // read with `LasMetadataReader`
let store = LocalFileSystem::new();
let location = Path::from_filesystem_path(&tmp_path).unwrap();
let object_meta = store.head(&location).await.unwrap();
@@ -398,7 +411,7 @@ mod tests {
persist_statistics: true,
..Default::default()
};
- let metadata_reader = LazMetadataReader::new(&store,
&object_meta).with_options(options);
+ let metadata_reader = LasMetadataReader::new(&store,
&object_meta).with_options(options);
let metadata = metadata_reader.fetch_metadata().await.unwrap();
assert!(tmp_path.with_extension("laz.stats").exists());
diff --git a/rust/sedona-pointcloud/src/lib.rs
b/rust/sedona-pointcloud/src/lib.rs
index 5c9acc71..7a75e041 100644
--- a/rust/sedona-pointcloud/src/lib.rs
+++ b/rust/sedona-pointcloud/src/lib.rs
@@ -15,5 +15,5 @@
// specific language governing permissions and limitations
// under the License.
-pub mod laz;
+pub mod las;
pub mod options;
diff --git a/rust/sedona-pointcloud/src/options.rs
b/rust/sedona-pointcloud/src/options.rs
index c8d2fd90..51e5067b 100644
--- a/rust/sedona-pointcloud/src/options.rs
+++ b/rust/sedona-pointcloud/src/options.rs
@@ -23,7 +23,7 @@ use datafusion_common::{
extensions_options,
};
-use crate::laz::options::{LasExtraBytes, LasOptions};
+use crate::las::options::{LasExtraBytes, LasOptions};
/// Geometry representation
#[derive(Clone, Copy, Default, PartialEq, Eq, Debug)]
diff --git a/rust/sedona-pointcloud/tests/data/extra.laz
b/rust/sedona-pointcloud/tests/data/extra.las
similarity index 86%
copy from rust/sedona-pointcloud/tests/data/extra.laz
copy to rust/sedona-pointcloud/tests/data/extra.las
index 016c6c29..42b63528 100644
Binary files a/rust/sedona-pointcloud/tests/data/extra.laz and
b/rust/sedona-pointcloud/tests/data/extra.las differ
diff --git a/rust/sedona-pointcloud/tests/data/extra.laz
b/rust/sedona-pointcloud/tests/data/extra.laz
index 016c6c29..9b6d45cb 100644
Binary files a/rust/sedona-pointcloud/tests/data/extra.laz and
b/rust/sedona-pointcloud/tests/data/extra.laz differ
diff --git a/rust/sedona-pointcloud/tests/data/generate.py
b/rust/sedona-pointcloud/tests/data/generate.py
index 0d4cc522..1b9f367a 100644
--- a/rust/sedona-pointcloud/tests/data/generate.py
+++ b/rust/sedona-pointcloud/tests/data/generate.py
@@ -29,24 +29,19 @@ import numpy as np
DATA_DIR = Path(__file__).resolve().parent
-
-
LAS_VERSIONS = [f"1.{p}" for p in range(5)] # 1.0 - 1.4
POINT_FORMAT = list(range(11)) # 0 - 10 (>= 6 for LAS 1.4+)
-# Pragmatic choice
-version = LAS_VERSIONS[4]
-point_format = POINT_FORMAT[6]
-# Header
-header = laspy.LasHeader(point_format=point_format, version=version)
+# -----------------------------------------------------------------------------
+# Extra attribute test file with a single point (extra.las/extra.laz)
+# -----------------------------------------------------------------------------
+# header
+header = laspy.LasHeader(point_format=POINT_FORMAT[6], version=LAS_VERSIONS[4])
header.offsets = np.array([1.0, 1.0, 1.0])
header.scales = np.array([0.1, 0.1, 0.1])
-
-# -----------------------------------------------------------------------------
-# Extra attribute test file with a single point (extra.laz)
-# -----------------------------------------------------------------------------
+# extra attributes
DATA_TYPES = [
"uint8",
"int8",
@@ -59,8 +54,6 @@ DATA_TYPES = [
"float32",
"float64",
]
-
-# Extra attributes
for dt in DATA_TYPES:
name = f"{dt}_plain"
header.add_extra_dim(laspy.point.format.ExtraBytesParams(name, dt, "",
None, None))
@@ -75,41 +68,55 @@ for dt in DATA_TYPES:
laspy.point.format.ExtraBytesParams(name, dt, "", None, None, [42])
)
-# Write laz with one point
-with laspy.open(
- DATA_DIR.joinpath("extra.laz"), mode="w", header=header, do_compress=True
-) as writer:
- point_record = laspy.ScaleAwarePointRecord.zeros(point_count=1,
header=header)
- point_record.x = [0.5]
- point_record.y = [0.5]
- point_record.z = [0.5]
+point_record = laspy.ScaleAwarePointRecord.zeros(point_count=1, header=header)
+point_record.x = [0.5]
+point_record.y = [0.5]
+point_record.z = [0.5]
- for dt in DATA_TYPES:
- name = f"{dt}_plain"
- point_record[name] = [21]
+for dt in DATA_TYPES:
+ name = f"{dt}_plain"
+ point_record[name] = [21]
+
+ name = f"{dt}_scaled"
+ point_record[name] = [21]
- name = f"{dt}_scaled"
- point_record[name] = [21]
+ name = f"{dt}_nodata"
+ point_record[name] = [42]
- name = f"{dt}_nodata"
- point_record[name] = [42]
+# write las with one point
+with laspy.open(DATA_DIR.joinpath("extra.las"), mode="w", header=header) as
writer:
+ writer.write_points(point_record)
+# write laz with one point
+with laspy.open(
+ DATA_DIR.joinpath("extra.laz"), mode="w", header=header, do_compress=True
+) as writer:
writer.write_points(point_record)
# -----------------------------------------------------------------------------
-# Large test file to evaluate pruning (large.laz)
+# Large test file to evaluate pruning (large.las/large.laz)
# -----------------------------------------------------------------------------
-with laspy.open(
- DATA_DIR.joinpath("large.laz"), mode="w", header=header, do_compress=True
-) as writer:
- N = 100000
+# header
+header = laspy.LasHeader(point_format=POINT_FORMAT[6], version=LAS_VERSIONS[4])
+header.offsets = np.array([1.0, 1.0, 1.0])
+header.scales = np.array([0.1, 0.1, 0.1])
- point_record = laspy.ScaleAwarePointRecord.zeros(point_count=N,
header=header)
+# points
+N = 100000
+point_record = laspy.ScaleAwarePointRecord.zeros(point_count=N, header=header)
- # create two distinct chunks
- point_record.x = [0.5] * int(N / 2) + [1] * int(N / 2)
- point_record.y = [0.5] * int(N / 2) + [1] * int(N / 2)
- point_record.z = [0.5] * int(N / 2) + [1] * int(N / 2)
+# create two distinct chunks
+point_record.x = [0.5] * int(N / 2) + [1] * int(N / 2)
+point_record.y = [0.5] * int(N / 2) + [1] * int(N / 2)
+point_record.z = [0.5] * int(N / 2) + [1] * int(N / 2)
+
+# write las file
+with laspy.open(DATA_DIR.joinpath("large.las"), mode="w", header=header) as
writer:
+ writer.write_points(point_record)
+# write laz file
+with laspy.open(
+ DATA_DIR.joinpath("large.laz"), mode="w", header=header, do_compress=True
+) as writer:
writer.write_points(point_record)
diff --git a/rust/sedona-pointcloud/tests/data/large.las
b/rust/sedona-pointcloud/tests/data/large.las
new file mode 100644
index 00000000..e51f717f
Binary files /dev/null and b/rust/sedona-pointcloud/tests/data/large.las differ
diff --git a/rust/sedona-pointcloud/tests/data/large.laz
b/rust/sedona-pointcloud/tests/data/large.laz
index e4cb06db..2dc07bb5 100644
Binary files a/rust/sedona-pointcloud/tests/data/large.laz and
b/rust/sedona-pointcloud/tests/data/large.laz differ
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index fa659f82..0de88261 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -58,7 +58,10 @@ use sedona_geoparquet::{
};
#[cfg(feature = "pointcloud")]
use sedona_pointcloud::{
- laz::{format::LazFormatFactory, options::LasExtraBytes},
+ las::{
+ format::{Extension, LasFormatFactory},
+ options::LasExtraBytes,
+ },
options::{GeometryEncoding, PointcloudOptions},
};
@@ -124,7 +127,8 @@ impl SedonaContext {
state.register_file_format(Arc::new(GeoParquetFormatFactory::new()),
true)?;
#[cfg(feature = "pointcloud")]
{
- state.register_file_format(Arc::new(LazFormatFactory::new()),
false)?;
+
state.register_file_format(Arc::new(LasFormatFactory::new(Extension::Laz)),
false)?;
+
state.register_file_format(Arc::new(LasFormatFactory::new(Extension::Las)),
false)?;
}
// Enable dynamic file query (i.e., select * from 'filename')