jecsand838 commented on code in PR #8006:
URL: https://github.com/apache/arrow-rs/pull/8006#discussion_r2244100996


##########
arrow-avro/src/reader/mod.rs:
##########
@@ -272,17 +450,70 @@ impl ReaderBuilder {
         self
     }
 
-    /// Sets the Avro schema.
+    /// Sets the Avro reader schema.
     ///
     /// If a schema is not provided, the schema will be read from the Avro 
file header.
-    pub fn with_schema(mut self, schema: AvroSchema<'static>) -> Self {
-        self.schema = Some(schema);
+    pub fn with_reader_schema(mut self, reader_schema: AvroSchema<'static>) -> 
Self {
+        self.reader_schema = Some(reader_schema);
         self
     }
 
+    /// Sets the `SchemaStore` used for resolving writer schemas.
+    ///
+    /// This is necessary when decoding single-object encoded data that 
identifies
+    /// schemas by a fingerprint. The store allows the decoder to look up the
+    /// full writer schema from a fingerprint embedded in the data.
+    ///
+    /// Defaults to `None`.
+    pub fn with_writer_schema_store(mut self, store: SchemaStore<'static>) -> 
Self {
+        self.writer_schema_store = Some(store);
+        self
+    }
+
+    /// Sets the initial schema fingerprint for decoding single-object encoded 
data.
+    ///
+    /// This is useful when the data stream does not begin with a schema 
definition
+    /// or fingerprint, allowing the decoder to start with a known schema from 
the
+    /// `SchemaStore`.
+    ///
+    /// Defaults to `None`.
+    pub fn with_active_fingerprint(mut self, fp: Fingerprint) -> Self {
+        self.active_fingerprint = Some(fp);
+        self
+    }
+
+    /// Set the maximum number of decoders to cache.
+    ///
+    /// When dealing with Avro files that contain multiple schemas, we may 
need to switch
+    /// between different decoders. This cache avoids rebuilding them from 
scratch every time.
+    ///
+    /// Defaults to `20`.
+    pub fn with_max_decoder_cache_size(mut self, n: usize) -> Self {
+        self.decoder_cache_size = n;
+        self
+    }
+
+    fn validate(&self) -> Result<(), ArrowError> {
+        match (
+            self.writer_schema_store.as_ref(),
+            self.reader_schema.as_ref(),
+            self.active_fingerprint.as_ref(),
+        ) {
+            (Some(_), None, _) => Err(ArrowError::ParseError(
+                "Reader schema must be set when writer schema store is 
provided".into(),
+            )),
+            (None, _, Some(_)) => Err(ArrowError::ParseError(
+                "Active fingerprint requires a writer schema store".into(),
+            )),
+            _ => Ok(()),
+        }
+    }
+
     /// Create a [`Reader`] from this builder and a `BufRead`
     pub fn build<R: BufRead>(self, mut reader: R) -> Result<Reader<R>, 
ArrowError> {
-        let (header, decoder) = self.build_impl(&mut reader)?;
+        self.validate()?;
+        let header = read_header(&mut reader)?;
+        let decoder = self.make_decoder(Some(&header))?;
         Ok(Reader {
             reader,
             header,

Review Comment:
   Actually I should have double checked this before blinding accepting, but 
due to the benefits of keeping the `Header` loosely coupled with the `Decoder` 
it's probably best to leave this alone for now imo. 
   
   I can follow up on this one though.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to