paleolimbot commented on code in PR #23169:
URL: https://github.com/apache/datafusion/pull/23169#discussion_r3507403960
##########
datafusion/physical-expr/src/expressions/cast.rs:
##########
@@ -150,19 +182,50 @@ impl CastExpr {
&self.cast_options
}
+ /// Whether source metadata is preserved through the cast.
+ pub fn preserve_source_metadata(&self) -> bool {
+ self.preserve_source_metadata
+ }
+
fn resolved_target_field(&self, input_schema: &Schema) -> Result<FieldRef>
{
- if is_default_target_field(&self.target_field) {
- self.expr.return_field(input_schema).map(|field| {
+ // When using exact target field mode, return the target field directly
+ // without consulting the source expression (which may reference
columns
+ // beyond the schema, e.g., virtual row-index columns appended at scan
time).
+ if !self.preserve_source_metadata &&
!is_default_target_field(&self.target_field)
+ {
+ return Ok(Arc::clone(&self.target_field));
+ }
+
+ self.expr.return_field(input_schema).map(|source_field| {
+ if is_default_target_field(&self.target_field) {
+ // Type-only cast: derive from source field but strip
extension metadata
+ let mut metadata = source_field.metadata().clone();
+ metadata.remove(EXTENSION_TYPE_NAME_KEY);
+ metadata.remove(EXTENSION_TYPE_METADATA_KEY);
+
Arc::new(
- field
+ source_field
.as_ref()
.clone()
- .with_data_type(self.cast_type().clone()),
+ .with_data_type(self.cast_type().clone())
+ .with_metadata(metadata),
)
- })
- } else {
- Ok(Arc::clone(&self.target_field))
- }
+ } else {
+ // Explicit target field with source metadata preservation:
+ // - Start with source's non-extension metadata
+ // - Then add all target metadata (including extension type
metadata)
+ let mut metadata = source_field.metadata().clone();
+ metadata.remove(EXTENSION_TYPE_NAME_KEY);
+ metadata.remove(EXTENSION_TYPE_METADATA_KEY);
+
+ // Target metadata takes precedence (including extension type
metadata)
+ for (k, v) in self.target_field.metadata() {
Review Comment:
I think I fixed this (and added a test)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]