This is an automated email from the ASF dual-hosted git repository.

tison pushed a commit to branch codec
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git

commit b5c41679b04cb5092d140552adf9adfad6b8fa7e
Author: tison <[email protected]>
AuthorDate: Fri Feb 13 23:51:13 2026 +0800

    refactor: expose codec and add centralized Family
    
    Signed-off-by: tison <[email protected]>
---
 datasketches/src/{codec.rs => codec/decode.rs} | 138 +++++--------------------
 datasketches/src/codec/encode.rs               | 113 ++++++++++++++++++++
 datasketches/src/codec/family.rs               |  64 ++++++++++++
 datasketches/src/codec/mod.rs                  |  27 +++++
 datasketches/src/lib.rs                        |   2 +-
 5 files changed, 229 insertions(+), 115 deletions(-)

diff --git a/datasketches/src/codec.rs b/datasketches/src/codec/decode.rs
similarity index 54%
rename from datasketches/src/codec.rs
rename to datasketches/src/codec/decode.rs
index 4df7b22..52b005e 100644
--- a/datasketches/src/codec.rs
+++ b/datasketches/src/codec/decode.rs
@@ -1,240 +1,150 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-
 use std::io;
-use std::io::Cursor;
-use std::io::Read;
-
-pub(crate) struct SketchBytes {
-    bytes: Vec<u8>,
-}
-
-impl SketchBytes {
-    pub fn with_capacity(capacity: usize) -> Self {
-        Self {
-            bytes: Vec::with_capacity(capacity),
-        }
-    }
-
-    pub fn into_bytes(self) -> Vec<u8> {
-        self.bytes
-    }
-
-    pub fn write(&mut self, buf: &[u8]) {
-        self.bytes.extend_from_slice(buf);
-    }
-
-    pub fn write_u8(&mut self, n: u8) {
-        self.bytes.push(n);
-    }
-
-    pub fn write_i8(&mut self, n: i8) {
-        self.bytes.push(n as u8);
-    }
-
-    pub fn write_u16_le(&mut self, n: u16) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_u16_be(&mut self, n: u16) {
-        self.write(&n.to_be_bytes());
-    }
-
-    pub fn write_i16_le(&mut self, n: i16) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_i16_be(&mut self, n: i16) {
-        self.write(&n.to_be_bytes());
-    }
-
-    pub fn write_u32_le(&mut self, n: u32) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_u32_be(&mut self, n: u32) {
-        self.write(&n.to_be_bytes());
-    }
-
-    pub fn write_i32_le(&mut self, n: i32) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_i32_be(&mut self, n: i32) {
-        self.write(&n.to_be_bytes());
-    }
-
-    pub fn write_u64_le(&mut self, n: u64) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_u64_be(&mut self, n: u64) {
-        self.write(&n.to_be_bytes());
-    }
-
-    pub fn write_i64_le(&mut self, n: i64) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_i64_be(&mut self, n: i64) {
-        self.write(&n.to_be_bytes());
-    }
-
-    pub fn write_f32_le(&mut self, n: f32) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_f32_be(&mut self, n: f32) {
-        self.write(&n.to_be_bytes());
-    }
-
-    pub fn write_f64_le(&mut self, n: f64) {
-        self.write(&n.to_le_bytes());
-    }
-
-    pub fn write_f64_be(&mut self, n: f64) {
-        self.write(&n.to_be_bytes());
-    }
-}
+use std::io::{Cursor, Read};
 
-pub(crate) struct SketchSlice<'a> {
+/// A wrapper around a byte slice that provides methods for reading various 
types of data from it.
+pub struct SketchSlice<'a> {
     slice: Cursor<&'a [u8]>,
 }
 
 impl SketchSlice<'_> {
+    /// Creates a new `SketchSlice` from the given byte slice.
     pub fn new(slice: &[u8]) -> SketchSlice<'_> {
         SketchSlice {
             slice: Cursor::new(slice),
         }
     }
 
+    /// Advances the position of the slice by `n` bytes.
     pub fn advance(&mut self, n: u64) {
         let pos = self.slice.position();
         self.slice.set_position(pos + n);
     }
 
+    /// Reads exactly `buf.len()` bytes from the slice into `buf`.
     pub fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
         self.slice.read_exact(buf)
     }
 
+    /// Reads a single byte from the slice and returns it as a `u8`.
     pub fn read_u8(&mut self) -> io::Result<u8> {
         let mut buf = [0u8; 1];
         self.read_exact(&mut buf)?;
         Ok(buf[0])
     }
 
+    /// Reads a single byte from the slice and returns it as an `i8`.
     pub fn read_i8(&mut self) -> io::Result<i8> {
         let mut buf = [0u8; 1];
         self.read_exact(&mut buf)?;
         Ok(buf[0] as i8)
     }
 
+    /// Reads a 16-bit unsigned integer from the slice in little-endian byte 
order.
     pub fn read_u16_le(&mut self) -> io::Result<u16> {
         let mut buf = [0u8; 2];
         self.read_exact(&mut buf)?;
         Ok(u16::from_le_bytes(buf))
     }
 
+    /// Reads a 16-bit unsigned integer from the slice in big-endian byte 
order.
     pub fn read_u16_be(&mut self) -> io::Result<u16> {
         let mut buf = [0u8; 2];
         self.read_exact(&mut buf)?;
         Ok(u16::from_be_bytes(buf))
     }
 
+    /// Reads a 16-bit signed integer from the slice in little-endian byte 
order.
     pub fn read_i16_le(&mut self) -> io::Result<i16> {
         let mut buf = [0u8; 2];
         self.read_exact(&mut buf)?;
         Ok(i16::from_le_bytes(buf))
     }
 
+    /// Reads a 16-bit signed integer from the slice in big-endian byte order.
     pub fn read_i16_be(&mut self) -> io::Result<i16> {
         let mut buf = [0u8; 2];
         self.read_exact(&mut buf)?;
         Ok(i16::from_be_bytes(buf))
     }
 
+    /// Reads a 32-bit unsigned integer from the slice in little-endian byte 
order.
     pub fn read_u32_le(&mut self) -> io::Result<u32> {
         let mut buf = [0u8; 4];
         self.read_exact(&mut buf)?;
         Ok(u32::from_le_bytes(buf))
     }
 
+    /// Reads a 32-bit unsigned integer from the slice in big-endian byte 
order.
     pub fn read_u32_be(&mut self) -> io::Result<u32> {
         let mut buf = [0u8; 4];
         self.read_exact(&mut buf)?;
         Ok(u32::from_be_bytes(buf))
     }
 
+    /// Reads a 32-bit signed integer from the slice in little-endian byte 
order.
     pub fn read_i32_le(&mut self) -> io::Result<i32> {
         let mut buf = [0u8; 4];
         self.read_exact(&mut buf)?;
         Ok(i32::from_le_bytes(buf))
     }
 
+    /// Reads a 32-bit signed integer from the slice in big-endian byte order.
     pub fn read_i32_be(&mut self) -> io::Result<i32> {
         let mut buf = [0u8; 4];
         self.read_exact(&mut buf)?;
         Ok(i32::from_be_bytes(buf))
     }
 
+    /// Reads a 16-bit unsigned integer from the slice in little-endian byte 
order.
     pub fn read_u64_le(&mut self) -> io::Result<u64> {
         let mut buf = [0u8; 8];
         self.read_exact(&mut buf)?;
         Ok(u64::from_le_bytes(buf))
     }
 
+    /// Reads a 16-bit unsigned integer from the slice in big-endian byte 
order.
     pub fn read_u64_be(&mut self) -> io::Result<u64> {
         let mut buf = [0u8; 8];
         self.read_exact(&mut buf)?;
         Ok(u64::from_be_bytes(buf))
     }
 
+    /// Reads a 16-bit signed integer from the slice in little-endian byte 
order.
     pub fn read_i64_le(&mut self) -> io::Result<i64> {
         let mut buf = [0u8; 8];
         self.read_exact(&mut buf)?;
         Ok(i64::from_le_bytes(buf))
     }
 
+    /// Reads a 16-bit signed integer from the slice in big-endian byte order.
     pub fn read_i64_be(&mut self) -> io::Result<i64> {
         let mut buf = [0u8; 8];
         self.read_exact(&mut buf)?;
         Ok(i64::from_be_bytes(buf))
     }
 
+    /// Reads a 32-bit floating-point number from the slice in little-endian 
byte order.
     pub fn read_f32_le(&mut self) -> io::Result<f32> {
         let mut buf = [0u8; 4];
         self.read_exact(&mut buf)?;
         Ok(f32::from_le_bytes(buf))
     }
 
+    /// Reads a 32-bit floating-point number from the slice in big-endian byte 
order.
     pub fn read_f32_be(&mut self) -> io::Result<f32> {
         let mut buf = [0u8; 4];
         self.read_exact(&mut buf)?;
         Ok(f32::from_be_bytes(buf))
     }
 
+    /// Reads a 64-bit floating-point number from the slice in little-endian 
byte order.
     pub fn read_f64_le(&mut self) -> io::Result<f64> {
         let mut buf = [0u8; 8];
         self.read_exact(&mut buf)?;
         Ok(f64::from_le_bytes(buf))
     }
 
+    /// Reads a 64-bit floating-point number from the slice in big-endian byte 
order.
     pub fn read_f64_be(&mut self) -> io::Result<f64> {
         let mut buf = [0u8; 8];
         self.read_exact(&mut buf)?;
diff --git a/datasketches/src/codec/encode.rs b/datasketches/src/codec/encode.rs
new file mode 100644
index 0000000..0f1423f
--- /dev/null
+++ b/datasketches/src/codec/encode.rs
@@ -0,0 +1,113 @@
+/// A simple wrapper around a `Vec<u8>` that provides methods for writing 
various types of data.
+pub struct SketchBytes {
+    bytes: Vec<u8>,
+}
+
+impl SketchBytes {
+    /// Constructs an empty `SketchBytes` with at least the specified capacity.
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            bytes: Vec::with_capacity(capacity),
+        }
+    }
+
+    /// Consumes the `SketchBytes` and returns the underlying `Vec<u8>`.
+    pub fn into_bytes(self) -> Vec<u8> {
+        self.bytes
+    }
+
+    /// Writes the given byte slice to the `SketchBytes`.
+    pub fn write(&mut self, buf: &[u8]) {
+        self.bytes.extend_from_slice(buf);
+    }
+
+    /// Writes a single byte to the `SketchBytes`.
+    pub fn write_u8(&mut self, n: u8) {
+        self.bytes.push(n);
+    }
+
+    /// Writes a single byte to the `SketchBytes`.
+    pub fn write_i8(&mut self, n: i8) {
+        self.bytes.push(n as u8);
+    }
+
+    /// Writes a 16-bit unsigned integer to the `SketchBytes` in little-endian 
byte order.
+    pub fn write_u16_le(&mut self, n: u16) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 16-bit unsigned integer to the `SketchBytes` in big-endian 
byte order.
+    pub fn write_u16_be(&mut self, n: u16) {
+        self.write(&n.to_be_bytes());
+    }
+
+    /// Writes a 16-bit signed integer to the `SketchBytes` in little-endian 
byte order.
+    pub fn write_i16_le(&mut self, n: i16) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 16-bit signed integer to the `SketchBytes` in big-endian byte 
order.
+    pub fn write_i16_be(&mut self, n: i16) {
+        self.write(&n.to_be_bytes());
+    }
+
+    /// Writes a 32-bit unsigned integer to the `SketchBytes` in little-endian 
byte order.
+    pub fn write_u32_le(&mut self, n: u32) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 32-bit unsigned integer to the `SketchBytes` in big-endian 
byte order.
+    pub fn write_u32_be(&mut self, n: u32) {
+        self.write(&n.to_be_bytes());
+    }
+
+    /// Writes a 32-bit signed integer to the `SketchBytes` in little-endian 
byte order.
+    pub fn write_i32_le(&mut self, n: i32) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 32-bit signed integer to the `SketchBytes` in big-endian byte 
order.
+    pub fn write_i32_be(&mut self, n: i32) {
+        self.write(&n.to_be_bytes());
+    }
+
+    /// Writes a 64-bit unsigned integer to the `SketchBytes` in little-endian 
byte order.
+    pub fn write_u64_le(&mut self, n: u64) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 64-bit unsigned integer to the `SketchBytes` in big-endian 
byte order.
+    pub fn write_u64_be(&mut self, n: u64) {
+        self.write(&n.to_be_bytes());
+    }
+
+    /// Writes a 64-bit signed integer to the `SketchBytes` in little-endian 
byte order.
+    pub fn write_i64_le(&mut self, n: i64) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 64-bit signed integer to the `SketchBytes` in big-endian byte 
order.
+    pub fn write_i64_be(&mut self, n: i64) {
+        self.write(&n.to_be_bytes());
+    }
+
+    /// Writes a 32-bit floating-point number to the `SketchBytes` in 
little-endian byte order.
+    pub fn write_f32_le(&mut self, n: f32) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 32-bit floating-point number to the `SketchBytes` in 
big-endian byte order.
+    pub fn write_f32_be(&mut self, n: f32) {
+        self.write(&n.to_be_bytes());
+    }
+
+    /// Writes a 64-bit floating-point number to the `SketchBytes` in 
little-endian byte order.
+    pub fn write_f64_le(&mut self, n: f64) {
+        self.write(&n.to_le_bytes());
+    }
+
+    /// Writes a 64-bit floating-point number to the `SketchBytes` in 
big-endian byte order.
+    pub fn write_f64_be(&mut self, n: f64) {
+        self.write(&n.to_be_bytes());
+    }
+}
diff --git a/datasketches/src/codec/family.rs b/datasketches/src/codec/family.rs
new file mode 100644
index 0000000..ab741a9
--- /dev/null
+++ b/datasketches/src/codec/family.rs
@@ -0,0 +1,64 @@
+/// Defines the various families of sketch and set operation classes.
+///
+/// A family defines a set of classes that share fundamental algorithms and 
behaviors. The classes
+/// within a family may still differ by how they are stored and accessed.
+pub struct Family {
+    /// The byte ID for this family.
+    pub id: u8,
+    /// The name for this family.
+    pub name: &'static str,
+    /// The minimum preamble size for this family in longs (8-bytes integer).
+    pub min_pre_longs: u8,
+    /// The maximum preamble size for this family in longs (8-bytes integer).
+    pub max_pre_longs: u8,
+}
+
+impl Family {
+    /// The HLL family of sketches.
+    pub const HLL: Family = Family {
+        id: 7,
+        name: "HLL",
+        min_pre_longs: 1,
+        max_pre_longs: 1,
+    };
+
+    /// The Frequency family of sketches.
+    pub const FREQUENCY: Family = Family {
+        id: 10,
+        name: "FREQUENCY",
+        min_pre_longs: 1,
+        max_pre_longs: 4,
+    };
+
+    /// Compressed Probabilistic Counting (CPC) Sketch.
+    pub const CPC: Family = Family {
+        id: 16,
+        name: "CPC",
+        min_pre_longs: 1,
+        max_pre_longs: 5,
+    };
+
+    /// CountMin Sketch
+    pub const COUNTMIN: Family = Family {
+        id: 17,
+        name: "COUNTMIN",
+        min_pre_longs: 2,
+        max_pre_longs: 2,
+    };
+
+    /// T-Digest for estimating quantiles and ranks.
+    pub const TDIGEST: Family = Family {
+        id: 20,
+        name: "TDIGEST",
+        min_pre_longs: 1,
+        max_pre_longs: 2,
+    };
+
+    /// Bloom Filter.
+    pub const BLOOMFILTER: Family = Family {
+        id: 24,
+        name: "BLOOMFILTER",
+        min_pre_longs: 3,
+        max_pre_longs: 4,
+    };
+}
diff --git a/datasketches/src/codec/mod.rs b/datasketches/src/codec/mod.rs
new file mode 100644
index 0000000..947d228
--- /dev/null
+++ b/datasketches/src/codec/mod.rs
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Codec utilities for datasketches crate.
+
+// public common codec utilities for datasketches crate
+mod encode;
+mod decode;
+pub use self::encode::SketchBytes;
+pub use self::decode::SketchSlice;
+
+// private to datasketches crate
+pub(crate) mod family;
diff --git a/datasketches/src/lib.rs b/datasketches/src/lib.rs
index 17701ab..02dc692 100644
--- a/datasketches/src/lib.rs
+++ b/datasketches/src/lib.rs
@@ -31,6 +31,7 @@
 compile_error!("datasketches does not support big-endian targets");
 
 pub mod bloom;
+pub mod codec;
 pub mod common;
 pub mod countmin;
 pub mod cpc;
@@ -40,5 +41,4 @@ pub mod hll;
 pub mod tdigest;
 pub mod theta;
 
-mod codec;
 mod hash;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to