alamb commented on code in PR #18025:
URL: https://github.com/apache/datafusion/pull/18025#discussion_r2500550853
##########
datafusion/functions/src/macros.rs:
##########
@@ -59,6 +59,24 @@ macro_rules! export_functions {
}
};
+ // function that requires config and takes a vector argument
+ (single $FUNC:ident, $DOC:expr, @config $arg:ident,) => {
+ #[doc = $DOC]
+ pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) ->
datafusion_expr::Expr {
+ use datafusion_common::config::ConfigOptions;
+ super::$FUNC(&ConfigOptions::default()).call($arg)
Review Comment:
Can someone remind me why we are going through all the trouble of threading
the ConfigOptions down into the functions, when the code to create the
functions just passes in the default ConfigOptions?
Shouldn't the current ConfigOptions be passed in? If so perhaps we can do
this as a follow on PR
##########
datafusion/functions/src/datetime/common.rs:
##########
@@ -42,6 +47,508 @@ pub(crate) fn string_to_timestamp_nanos_shim(s: &str) ->
Result<i64> {
string_to_timestamp_nanos(s).map_err(|e| e.into())
}
+#[derive(Clone, Copy, Debug)]
+enum ConfiguredZone {
+ Named(Tz),
+ Offset(FixedOffset),
+}
+
+#[derive(Clone)]
+pub(crate) struct ConfiguredTimeZone {
+ repr: Arc<str>,
+ zone: ConfiguredZone,
+}
+
+impl ConfiguredTimeZone {
+ pub(crate) fn utc() -> Self {
+ Self {
+ repr: Arc::from("+00:00"),
+ zone: ConfiguredZone::Offset(FixedOffset::east_opt(0).unwrap()),
+ }
+ }
+
+ pub(crate) fn parse(tz: &str) -> Result<Option<Self>> {
+ let tz = tz.trim();
+ if tz.is_empty() {
+ return Ok(None);
+ }
+
+ if let Ok(named) = Tz::from_str(tz) {
+ return Ok(Some(Self {
+ repr: Arc::from(tz),
+ zone: ConfiguredZone::Named(named),
+ }));
+ }
+
+ if let Some(offset) = parse_fixed_offset(tz) {
+ return Ok(Some(Self {
+ repr: Arc::from(tz),
+ zone: ConfiguredZone::Offset(offset),
+ }));
+ }
+
+ Err(exec_datafusion_err!(
+ "Invalid execution timezone '{tz}'. Please provide an IANA
timezone name (e.g. 'America/New_York') or an offset in the form '+HH:MM'."
+ ))
+ }
+
+ pub(crate) fn from_config(config: &ConfigOptions) -> Self {
+ match Self::parse(config.execution.time_zone.as_deref().unwrap_or(""))
{
+ Ok(Some(tz)) => tz,
+ _ => Self::utc(),
+ }
+ }
+
+ fn timestamp_from_naive(&self, naive: &NaiveDateTime) -> Result<i64> {
+ match self.zone {
+ ConfiguredZone::Named(tz) => {
+ local_datetime_to_timestamp(tz.from_local_datetime(naive),
&self.repr)
+ }
+ ConfiguredZone::Offset(offset) => {
+ local_datetime_to_timestamp(offset.from_local_datetime(naive),
&self.repr)
+ }
+ }
+ }
+
+ fn datetime_from_formatted(&self, s: &str, format: &str) ->
Result<DateTime<Utc>> {
+ let datetime = match self.zone {
+ ConfiguredZone::Named(tz) => {
+ string_to_datetime_formatted(&tz, s,
format)?.with_timezone(&Utc)
+ }
+ ConfiguredZone::Offset(offset) => {
+ string_to_datetime_formatted(&offset, s,
format)?.with_timezone(&Utc)
+ }
+ };
+ Ok(datetime)
+ }
+}
+
+impl fmt::Debug for ConfiguredTimeZone {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("ConfiguredTimeZone")
+ .field("repr", &self.repr)
+ .finish()
+ }
+}
+
+impl PartialEq for ConfiguredTimeZone {
+ fn eq(&self, other: &Self) -> bool {
+ self.repr == other.repr
+ }
+}
+
+impl Eq for ConfiguredTimeZone {}
+
+impl Hash for ConfiguredTimeZone {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.repr.hash(state);
+ }
+}
+
+fn parse_fixed_offset(tz: &str) -> Option<FixedOffset> {
+ let tz = tz.trim();
+ if tz.eq_ignore_ascii_case("utc") || tz.eq_ignore_ascii_case("z") {
+ return FixedOffset::east_opt(0);
+ }
+
+ let (sign, rest) = if let Some(rest) = tz.strip_prefix('+') {
Review Comment:
parsing timestamps is a delicate and subtle task and has many strange corner
cases. I don't understand why this code is parsing timestamps rather than
delegating to `chrono`
For example, perhaps
-
https://docs.rs/chrono/latest/chrono/format/struct.Parsed.html#method.to_datetime
-
https://docs.rs/chrono/latest/chrono/format/struct.Parsed.html#method.to_fixed_offset
-
https://docs.rs/chrono/latest/chrono/format/struct.Parsed.html#method.to_datetime_with_timezone
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]