This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6948929f8a feat(json): Add temporal formatting options when write to 
JSON (#8482)
6948929f8a is described below

commit 6948929f8a82a33b7fac2b8b6160a03ad4fce4dc
Author: Lin Yihai <[email protected]>
AuthorDate: Sat Oct 4 04:27:07 2025 +0800

    feat(json): Add temporal formatting options when write to JSON (#8482)
    
    # Which issue does this PR close?
    
    
    - Closes https://github.com/apache/arrow-rs/issues/8398
    
    # Rationale for this change
    
    
    There is no method to overwride the temporal format options when writing
    to JSON.
    
    
    # What changes are included in this PR?
    
    **Offers a series of temporal format to overwrite the temporal field**
    
    # Are these changes tested?
    
    **I added the `with_timestamp_format` option test in the existed test.
    The functions of the other options are the same.**
    
    # Are there any user-facing changes?
    
    New APIs
---
 arrow-json/src/writer/encoder.rs | 75 ++++++++++++++++++++++++++++++++-
 arrow-json/src/writer/mod.rs     | 91 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 2 deletions(-)

diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index 719e16e350..c960da3e07 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -37,6 +37,16 @@ pub struct EncoderOptions {
     struct_mode: StructMode,
     /// An optional hook for customizing encoding behavior.
     encoder_factory: Option<Arc<dyn EncoderFactory>>,
+    /// Optional date format for date arrays
+    date_format: Option<String>,
+    /// Optional datetime format for datetime arrays
+    datetime_format: Option<String>,
+    /// Optional timestamp format for timestamp arrays
+    timestamp_format: Option<String>,
+    /// Optional timestamp format for timestamp with timezone arrays
+    timestamp_tz_format: Option<String>,
+    /// Optional time format for time arrays
+    time_format: Option<String>,
 }
 
 impl EncoderOptions {
@@ -72,6 +82,61 @@ impl EncoderOptions {
     pub fn encoder_factory(&self) -> Option<&Arc<dyn EncoderFactory>> {
         self.encoder_factory.as_ref()
     }
+
+    /// Set the JSON file's date format
+    pub fn with_date_format(mut self, format: String) -> Self {
+        self.date_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's date format if set, defaults to RFC3339
+    pub fn date_format(&self) -> Option<&str> {
+        self.date_format.as_deref()
+    }
+
+    /// Set the JSON file's datetime format
+    pub fn with_datetime_format(mut self, format: String) -> Self {
+        self.datetime_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's datetime format if set, defaults to RFC3339
+    pub fn datetime_format(&self) -> Option<&str> {
+        self.datetime_format.as_deref()
+    }
+
+    /// Set the JSON file's time format
+    pub fn with_time_format(mut self, format: String) -> Self {
+        self.time_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's datetime time if set, defaults to RFC3339
+    pub fn time_format(&self) -> Option<&str> {
+        self.time_format.as_deref()
+    }
+
+    /// Set the JSON file's timestamp format
+    pub fn with_timestamp_format(mut self, format: String) -> Self {
+        self.timestamp_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's timestamp format if set, defaults to RFC3339
+    pub fn timestamp_format(&self) -> Option<&str> {
+        self.timestamp_format.as_deref()
+    }
+
+    /// Set the JSON file's timestamp tz format
+    pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
+        self.timestamp_tz_format = Some(tz_format);
+        self
+    }
+
+    /// Get the JSON file's timestamp tz format if set, defaults to RFC3339
+    pub fn timestamp_tz_format(&self) -> Option<&str> {
+        self.timestamp_tz_format.as_deref()
+    }
 }
 
 /// A trait to create custom encoders for specific data types.
@@ -350,8 +415,14 @@ pub fn make_encoder<'a>(
                 // characters that would need to be escaped within a JSON 
string, e.g. `'"'`.
                 // If support for user-provided format specifications is 
added, this assumption
                 // may need to be revisited
-                let options = FormatOptions::new().with_display_error(true);
-                let formatter = ArrayFormatter::try_new(array, &options)?;
+                let fops = FormatOptions::new().with_display_error(true)
+                .with_date_format(options.date_format.as_deref())
+                .with_datetime_format(options.datetime_format.as_deref())
+                .with_timestamp_format(options.timestamp_format.as_deref())
+                
.with_timestamp_tz_format(options.timestamp_tz_format.as_deref())
+                .with_time_format(options.time_format.as_deref());
+
+                let formatter = ArrayFormatter::try_new(array, &fops)?;
                 let formatter = JsonArrayFormatter::new(formatter);
                 NullableEncoder::new(Box::new(formatter) as Box<dyn Encoder + 
'a>, nulls)
             }
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index c14182b7b1..ae3b2ee78a 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -279,6 +279,36 @@ impl WriterBuilder {
         self
     }
 
+    /// Set the JSON file's date format
+    pub fn with_date_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_date_format(format);
+        self
+    }
+
+    /// Set the JSON file's datetime format
+    pub fn with_datetime_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_datetime_format(format);
+        self
+    }
+
+    /// Set the JSON file's time format
+    pub fn with_time_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_time_format(format);
+        self
+    }
+
+    /// Set the JSON file's timestamp format
+    pub fn with_timestamp_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_timestamp_format(format);
+        self
+    }
+
+    /// Set the JSON file's timestamp tz format
+    pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
+        self.0 = self.0.with_timestamp_tz_format(tz_format);
+        self
+    }
+
     /// Create a new `Writer` with specified `JsonFormat` and builder options.
     pub fn build<W, F>(self, writer: W) -> Writer<W, F>
     where
@@ -724,6 +754,21 @@ mod tests {
             &buf,
             
r#"{"micros":"2018-11-13T17:11:10.011375","millis":"2018-11-13T17:11:10.011","name":"a","nanos":"2018-11-13T17:11:10.011375885","secs":"2018-11-13T17:11:10"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_timestamp_format("%m-%d-%Y".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            
r#"{"nanos":"11-13-2018","micros":"11-13-2018","millis":"11-13-2018","secs":"11-13-2018","name":"a"}
+{"name":"b"}
 "#,
         );
     }
@@ -785,6 +830,21 @@ mod tests {
             &buf,
             
r#"{"micros":"2018-11-13T17:11:10.011375Z","millis":"2018-11-13T17:11:10.011Z","name":"a","nanos":"2018-11-13T17:11:10.011375885Z","secs":"2018-11-13T17:11:10Z"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_timestamp_tz_format("%m-%d-%Y %Z".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"nanos":"11-13-2018 +00:00","micros":"11-13-2018 
+00:00","millis":"11-13-2018 +00:00","secs":"11-13-2018 +00:00","name":"a"}
+{"name":"b"}
 "#,
         );
     }
@@ -832,6 +892,22 @@ mod tests {
             &buf,
             
r#"{"date32":"2018-11-13","date64":"2018-11-13T17:11:10.011","name":"a"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_date_format("%m-%d-%Y".to_string())
+                .with_datetime_format("%m-%d-%Y %Mmin %Ssec 
%Hhour".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"date32":"11-13-2018","date64":"11-13-2018 11min 10sec 
17hour","name":"a"}
+{"name":"b"}
 "#,
         );
     }
@@ -875,6 +951,21 @@ mod tests {
             &buf,
             
r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_time_format("%H-%M-%S %f".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"time32sec":"00-02-00 000000000","time32msec":"00-00-00 
120000000","time64usec":"00-00-00 000120000","time64nsec":"00-00-00 
000000120","name":"a"}
+{"name":"b"}
 "#,
         );
     }

Reply via email to