jecsand838 commented on code in PR #8242:
URL: https://github.com/apache/arrow-rs/pull/8242#discussion_r2311162003


##########
arrow-avro/src/schema.rs:
##########
@@ -373,39 +378,87 @@ impl AvroSchema {
 }
 
 /// Supported fingerprint algorithms for Avro schema identification.
-/// Currently only `Rabin` is supported, `SHA256` and `MD5` support will come 
in a future update
+/// For use with Confluent Schema Registry IDs, set to None.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
 pub enum FingerprintAlgorithm {
     /// 64‑bit CRC‑64‑AVRO Rabin fingerprint.
     #[default]
     Rabin,
+    /// Represents a fingerprint not based on a hash algorithm, (e.g., a 
32-bit Schema Registry ID.)
+    None,
+    #[cfg(feature = "md5")]
+    /// 128-bit MD5 message digest.
+    MD5,
+    #[cfg(feature = "sha256")]
+    /// 256-bit SHA-256 digest.
+    SHA256,
+}
+
+/// Allow easy extraction of the algorithm used to create a fingerprint.
+impl From<&Fingerprint> for FingerprintAlgorithm {
+    fn from(fp: &Fingerprint) -> Self {
+        match fp {
+            Fingerprint::Rabin(_) => FingerprintAlgorithm::Rabin,
+            Fingerprint::Id(_) => FingerprintAlgorithm::None,
+            #[cfg(feature = "md5")]
+            Fingerprint::MD5(_) => FingerprintAlgorithm::MD5,
+            #[cfg(feature = "sha256")]
+            Fingerprint::SHA256(_) => FingerprintAlgorithm::SHA256,
+        }
+    }
 }
 
 /// A schema fingerprint in one of the supported formats.
 ///
 /// This is used as the key inside `SchemaStore` `HashMap`. Each `SchemaStore`
 /// instance always stores only one variant, matching its configured
 /// `FingerprintAlgorithm`, but the enum makes the API uniform.
-/// Currently only `Rabin` is supported
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#schema-fingerprints>
+/// 
<https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format>
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub enum Fingerprint {
     /// A 64-bit Rabin fingerprint.
     Rabin(u64),
+    /// A 32-bit Schema Registry ID.
+    Id(u32),
+    #[cfg(feature = "md5")]
+    /// A 128-bit MD5 fingerprint.
+    MD5([u8; 16]),
+    #[cfg(feature = "sha256")]
+    /// A 256-bit SHA-256 fingerprint.
+    SHA256([u8; 32]),
 }
 
-/// Allow easy extraction of the algorithm used to create a fingerprint.
-impl From<&Fingerprint> for FingerprintAlgorithm {
-    fn from(fp: &Fingerprint) -> Self {
-        match fp {
-            Fingerprint::Rabin(_) => FingerprintAlgorithm::Rabin,
-        }
-    }
-}
-
-/// Generates a fingerprint for the given `Schema` using the specified 
`FingerprintAlgorithm`.
-pub(crate) fn generate_fingerprint(
+/// Generates a fingerprint for the given `Schema` using the specified 
[`FingerprintAlgorithm`].
+///
+/// The fingerprint is computed over the schema's Parsed Canonical Form
+/// as defined by the Avro specification. Depending on `hash_type`, this
+/// will return one of the supported [`Fingerprint`] variants:
+/// - [`Fingerprint::Rabin`] for [`FingerprintAlgorithm::Rabin`]
+/// - [`Fingerprint::MD5`] for [`FingerprintAlgorithm::MD5`]
+/// - [`Fingerprint::SHA256`] for [`FingerprintAlgorithm::SHA256`]
+///
+/// Note: [`FingerprintAlgorithm::None`] cannot be used to generate a 
fingerprint
+/// and will result in an error. If you intend to use a Schema Registry 
ID-based
+/// wire format, load or set the [`Fingerprint::Id`] directly via 
[`load_fingerprint_id`]
+/// or [`SchemaStore::set`].
+///
+/// See also: 
<https://avro.apache.org/docs/1.11.1/specification/#schema-fingerprints>
+///
+/// # Errors
+/// Returns an error if generating the canonical form of the schema fails,
+/// or if `hash_type` is [`FingerprintAlgorithm::None`].
+///
+/// # Examples
+/// ```no_run
+/// use arrow_avro::schema::{AvroSchema, FingerprintAlgorithm, 
generate_fingerprint};
+///
+/// let avro = AvroSchema::new("\"string\"".to_string());
+/// let schema = avro.schema().unwrap();
+/// let fp = generate_fingerprint(&schema, 
FingerprintAlgorithm::Rabin).unwrap();
+/// ```
+pub fn generate_fingerprint(

Review Comment:
   @mbrobbel Just wanted to jump in on this one real fast. This makes 100% 
sense, however I'd argue for making `generate_fingerprint` a method of 
`AvroSchema`. 
   
   We have a general plan to make most of the enums in the `schema.rs` file 
`pub(crate)` again prior to public release of `arrow-avro`. Meanwhile we'd 
expose `AvroSchema` publicly. Curious what your thoughts are on this direction 
however.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to