This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 6948929f8a feat(json): Add temporal formatting options when write to
JSON (#8482)
6948929f8a is described below
commit 6948929f8a82a33b7fac2b8b6160a03ad4fce4dc
Author: Lin Yihai <[email protected]>
AuthorDate: Sat Oct 4 04:27:07 2025 +0800
feat(json): Add temporal formatting options when write to JSON (#8482)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/8398
# Rationale for this change
There is no method to overwride the temporal format options when writing
to JSON.
# What changes are included in this PR?
**Offers a series of temporal format to overwrite the temporal field**
# Are these changes tested?
**I added the `with_timestamp_format` option test in the existed test.
The functions of the other options are the same.**
# Are there any user-facing changes?
New APIs
---
arrow-json/src/writer/encoder.rs | 75 ++++++++++++++++++++++++++++++++-
arrow-json/src/writer/mod.rs | 91 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 164 insertions(+), 2 deletions(-)
diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index 719e16e350..c960da3e07 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -37,6 +37,16 @@ pub struct EncoderOptions {
struct_mode: StructMode,
/// An optional hook for customizing encoding behavior.
encoder_factory: Option<Arc<dyn EncoderFactory>>,
+ /// Optional date format for date arrays
+ date_format: Option<String>,
+ /// Optional datetime format for datetime arrays
+ datetime_format: Option<String>,
+ /// Optional timestamp format for timestamp arrays
+ timestamp_format: Option<String>,
+ /// Optional timestamp format for timestamp with timezone arrays
+ timestamp_tz_format: Option<String>,
+ /// Optional time format for time arrays
+ time_format: Option<String>,
}
impl EncoderOptions {
@@ -72,6 +82,61 @@ impl EncoderOptions {
pub fn encoder_factory(&self) -> Option<&Arc<dyn EncoderFactory>> {
self.encoder_factory.as_ref()
}
+
+ /// Set the JSON file's date format
+ pub fn with_date_format(mut self, format: String) -> Self {
+ self.date_format = Some(format);
+ self
+ }
+
+ /// Get the JSON file's date format if set, defaults to RFC3339
+ pub fn date_format(&self) -> Option<&str> {
+ self.date_format.as_deref()
+ }
+
+ /// Set the JSON file's datetime format
+ pub fn with_datetime_format(mut self, format: String) -> Self {
+ self.datetime_format = Some(format);
+ self
+ }
+
+ /// Get the JSON file's datetime format if set, defaults to RFC3339
+ pub fn datetime_format(&self) -> Option<&str> {
+ self.datetime_format.as_deref()
+ }
+
+ /// Set the JSON file's time format
+ pub fn with_time_format(mut self, format: String) -> Self {
+ self.time_format = Some(format);
+ self
+ }
+
+ /// Get the JSON file's datetime time if set, defaults to RFC3339
+ pub fn time_format(&self) -> Option<&str> {
+ self.time_format.as_deref()
+ }
+
+ /// Set the JSON file's timestamp format
+ pub fn with_timestamp_format(mut self, format: String) -> Self {
+ self.timestamp_format = Some(format);
+ self
+ }
+
+ /// Get the JSON file's timestamp format if set, defaults to RFC3339
+ pub fn timestamp_format(&self) -> Option<&str> {
+ self.timestamp_format.as_deref()
+ }
+
+ /// Set the JSON file's timestamp tz format
+ pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
+ self.timestamp_tz_format = Some(tz_format);
+ self
+ }
+
+ /// Get the JSON file's timestamp tz format if set, defaults to RFC3339
+ pub fn timestamp_tz_format(&self) -> Option<&str> {
+ self.timestamp_tz_format.as_deref()
+ }
}
/// A trait to create custom encoders for specific data types.
@@ -350,8 +415,14 @@ pub fn make_encoder<'a>(
// characters that would need to be escaped within a JSON
string, e.g. `'"'`.
// If support for user-provided format specifications is
added, this assumption
// may need to be revisited
- let options = FormatOptions::new().with_display_error(true);
- let formatter = ArrayFormatter::try_new(array, &options)?;
+ let fops = FormatOptions::new().with_display_error(true)
+ .with_date_format(options.date_format.as_deref())
+ .with_datetime_format(options.datetime_format.as_deref())
+ .with_timestamp_format(options.timestamp_format.as_deref())
+
.with_timestamp_tz_format(options.timestamp_tz_format.as_deref())
+ .with_time_format(options.time_format.as_deref());
+
+ let formatter = ArrayFormatter::try_new(array, &fops)?;
let formatter = JsonArrayFormatter::new(formatter);
NullableEncoder::new(Box::new(formatter) as Box<dyn Encoder +
'a>, nulls)
}
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index c14182b7b1..ae3b2ee78a 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -279,6 +279,36 @@ impl WriterBuilder {
self
}
+ /// Set the JSON file's date format
+ pub fn with_date_format(mut self, format: String) -> Self {
+ self.0 = self.0.with_date_format(format);
+ self
+ }
+
+ /// Set the JSON file's datetime format
+ pub fn with_datetime_format(mut self, format: String) -> Self {
+ self.0 = self.0.with_datetime_format(format);
+ self
+ }
+
+ /// Set the JSON file's time format
+ pub fn with_time_format(mut self, format: String) -> Self {
+ self.0 = self.0.with_time_format(format);
+ self
+ }
+
+ /// Set the JSON file's timestamp format
+ pub fn with_timestamp_format(mut self, format: String) -> Self {
+ self.0 = self.0.with_timestamp_format(format);
+ self
+ }
+
+ /// Set the JSON file's timestamp tz format
+ pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
+ self.0 = self.0.with_timestamp_tz_format(tz_format);
+ self
+ }
+
/// Create a new `Writer` with specified `JsonFormat` and builder options.
pub fn build<W, F>(self, writer: W) -> Writer<W, F>
where
@@ -724,6 +754,21 @@ mod tests {
&buf,
r#"{"micros":"2018-11-13T17:11:10.011375","millis":"2018-11-13T17:11:10.011","name":"a","nanos":"2018-11-13T17:11:10.011375885","secs":"2018-11-13T17:11:10"}
{"name":"b"}
+"#,
+ );
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = WriterBuilder::new()
+ .with_timestamp_format("%m-%d-%Y".to_string())
+ .build::<_, LineDelimited>(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ }
+
+ assert_json_eq(
+ &buf,
+
r#"{"nanos":"11-13-2018","micros":"11-13-2018","millis":"11-13-2018","secs":"11-13-2018","name":"a"}
+{"name":"b"}
"#,
);
}
@@ -785,6 +830,21 @@ mod tests {
&buf,
r#"{"micros":"2018-11-13T17:11:10.011375Z","millis":"2018-11-13T17:11:10.011Z","name":"a","nanos":"2018-11-13T17:11:10.011375885Z","secs":"2018-11-13T17:11:10Z"}
{"name":"b"}
+"#,
+ );
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = WriterBuilder::new()
+ .with_timestamp_tz_format("%m-%d-%Y %Z".to_string())
+ .build::<_, LineDelimited>(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ }
+
+ assert_json_eq(
+ &buf,
+ r#"{"nanos":"11-13-2018 +00:00","micros":"11-13-2018
+00:00","millis":"11-13-2018 +00:00","secs":"11-13-2018 +00:00","name":"a"}
+{"name":"b"}
"#,
);
}
@@ -832,6 +892,22 @@ mod tests {
&buf,
r#"{"date32":"2018-11-13","date64":"2018-11-13T17:11:10.011","name":"a"}
{"name":"b"}
+"#,
+ );
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = WriterBuilder::new()
+ .with_date_format("%m-%d-%Y".to_string())
+ .with_datetime_format("%m-%d-%Y %Mmin %Ssec
%Hhour".to_string())
+ .build::<_, LineDelimited>(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ }
+
+ assert_json_eq(
+ &buf,
+ r#"{"date32":"11-13-2018","date64":"11-13-2018 11min 10sec
17hour","name":"a"}
+{"name":"b"}
"#,
);
}
@@ -875,6 +951,21 @@ mod tests {
&buf,
r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
{"name":"b"}
+"#,
+ );
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = WriterBuilder::new()
+ .with_time_format("%H-%M-%S %f".to_string())
+ .build::<_, LineDelimited>(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ }
+
+ assert_json_eq(
+ &buf,
+ r#"{"time32sec":"00-02-00 000000000","time32msec":"00-00-00
120000000","time64usec":"00-00-00 000120000","time64nsec":"00-00-00
000000120","name":"a"}
+{"name":"b"}
"#,
);
}