yukkit commented on code in PR #8143:
URL: https://github.com/apache/arrow-datafusion/pull/8143#discussion_r1391953675
##########
datafusion/core/src/execution/context/mod.rs:
##########
@@ -661,6 +667,28 @@ impl SessionContext {
}
}
+ async fn create_type(&self, cmd: CreateType) -> Result<DataFrame> {
Review Comment:
This is the core processing logic for creating SQL UDT.
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -37,7 +37,7 @@ use std::sync::Arc;
/// trait to allow expr to typable with respect to a schema
pub trait ExprSchemable {
/// given a schema, return the type of the expr
- fn get_type<S: ExprSchema>(&self, schema: &S) -> Result<DataType>;
+ fn get_type<S: ExprSchema>(&self, schema: &S) -> Result<LogicalType>;
Review Comment:
Replace all arrow `DataType` used in the logical planning stage with
`LogicalType`(literal ScalarValue TBD).
##########
datafusion/expr/src/field_util.rs:
##########
@@ -76,22 +74,72 @@ impl GetFieldAccessSchema {
(other, _) => plan_err!("The expression to get an indexed
field is only valid for `List`, `Struct`, or `Map` types, got {other}"),
}
}
- Self::ListIndex{ key_dt } => {
- match (data_type, key_dt) {
- (DataType::List(lt), DataType::Int64) =>
Ok(Field::new("list", lt.data_type().clone(), true)),
- (DataType::List(_), _) => plan_err!(
- "Only ints are valid as an indexed field in a list"
- ),
- (other, _) => plan_err!("The expression to get an indexed
field is only valid for `List` or `Struct` types, got {other}"),
+ Self::ListIndex => {
+ match data_type {
+ DataType::List(lt) => Ok(Field::new("list",
lt.data_type().clone(), true)),
+ other => plan_err!("The expression to get an indexed field
is only valid for `List` or `Struct` types, got {other}"),
+ }
+ }
+ Self::ListRange => {
+ match data_type {
+ DataType::List(_) => Ok(Field::new("list",
data_type.clone(), true)),
+ other => plan_err!("The expression to get an indexed field
is only valid for `List` or `Struct` types, got {other}"),
}
}
- Self::ListRange{ start_dt, stop_dt } => {
- match (data_type, start_dt, stop_dt) {
- (DataType::List(_), DataType::Int64, DataType::Int64) =>
Ok(Field::new("list", data_type.clone(), true)),
- (DataType::List(_), _, _) => plan_err!(
- "Only ints are valid as an indexed field in a list"
+ }
+ }
+ /// Returns the schema [`DFField`] from a [`LogicalType::List`] or
+ /// [`LogicalType::Struct`] indexed by this structure
+ ///
+ /// # Error
+ /// Errors if
+ /// * the `data_type` is not a Struct or a List,
+ /// * the `data_type` of the name/index/start-stop do not match a
supported index type
+ pub fn get_accessed_df_field(&self, data_type: &LogicalType) ->
Result<DFField> {
Review Comment:
TODO: More elegant implementation to avoid duplication of code
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -142,31 +180,39 @@ impl ExprSchemable for Expr {
| Expr::IsUnknown(_)
| Expr::IsNotTrue(_)
| Expr::IsNotFalse(_)
- | Expr::IsNotUnknown(_) => Ok(DataType::Boolean),
+ | Expr::IsNotUnknown(_) => Ok(LogicalType::Boolean),
Expr::ScalarSubquery(subquery) => {
Ok(subquery.subquery.schema().field(0).data_type().clone())
}
Expr::BinaryExpr(BinaryExpr {
ref left,
ref right,
ref op,
- }) => get_result_type(&left.get_type(schema)?, op,
&right.get_type(schema)?),
- Expr::Like { .. } | Expr::SimilarTo { .. } =>
Ok(DataType::Boolean),
+ }) => {
+ // TODO not convert to physical DataType
+ let physical_type = get_result_type(
Review Comment:
TODO: Modify the function `get_result_type` to accept LogicalType as
parameters.
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -332,8 +382,8 @@ impl ExprSchemable for Expr {
// TODO(kszucs): most of the operations do not validate the type
correctness
// like all of the binary expressions below. Perhaps Expr should track
the
// type of the expression?
-
- if can_cast_types(&this_type, cast_to_type) {
+ // TODO The basis for whether cast can be executed should be the
logical type
+ if can_cast_types(&this_type.physical_type(),
&cast_to_type.physical_type()) {
Review Comment:
TODO: Modify the function `can_cast_types` to accept LogicalType as
parameters.
##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -1223,17 +1222,21 @@ pub fn union(left_plan: LogicalPlan, right_plan:
LogicalPlan) -> Result<LogicalP
)
.map(|(left_field, right_field)| {
let nullable = left_field.is_nullable() || right_field.is_nullable();
- let data_type =
- comparison_coercion(left_field.data_type(),
right_field.data_type())
- .ok_or_else(|| {
- plan_datafusion_err!(
+ // FIXME support logical data types, not convert to DataType
+ let data_type = comparison_coercion(
Review Comment:
Modify the function `comparison_coercion ` to accept LogicalType as
parameters.
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -87,14 +91,27 @@ impl ExprSchemable for Expr {
.iter()
.map(|e| e.get_type(schema))
.collect::<Result<Vec<_>>>()?;
- Ok((fun.return_type)(&data_types)?.as_ref().clone())
+
+ // TODO not convert to DataType
+ let data_types = data_types
+ .into_iter()
+ .map(|e| e.physical_type())
+ .collect::<Vec<_>>();
+
+ Ok((fun.return_type)(&data_types)?.as_ref().clone().into())
Review Comment:
TODO: Functions signatures for `udf/udaf` use `TypeSignature` instead of the
arrow `DataType`
##########
datafusion/expr/src/utils.rs:
##########
@@ -886,39 +887,34 @@ pub(crate) fn find_column_indexes_referenced_by_expr(
/// can this data type be used in hash join equal conditions??
/// data types here come from function 'equal_rows', if more data types are
supported
/// in equal_rows(hash join), add those data types here to generate join
logical plan.
-pub fn can_hash(data_type: &DataType) -> bool {
+pub fn can_hash(data_type: &LogicalType) -> bool {
Review Comment:
This function `can_hash ` might be abstracted as a property of logical data
types.
##########
datafusion/core/src/execution/context/mod.rs:
##########
@@ -1846,6 +1895,22 @@ impl SessionState {
}
}
+impl TypeManager for SessionState {
Review Comment:
SessionState implements TymeManager for registering and retrieving extended
data types.
##########
datafusion/sql/src/planner.rs:
##########
@@ -295,15 +303,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
})
}
- pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) ->
Result<DataType> {
+ pub(crate) fn convert_data_type(
Review Comment:
Convert data types in the `AST` to logical data types.
##########
datafusion/core/src/execution/context/mod.rs:
##########
@@ -661,6 +667,28 @@ impl SessionContext {
}
}
+ async fn create_type(&self, cmd: CreateType) -> Result<DataFrame> {
+ let CreateType {
+ name, data_type, ..
+ } = cmd;
+
+ let extension_type = Arc::new(NamedLogicalType::new(name, data_type));
+ let name = extension_type.display_name();
+ let type_signature = extension_type.type_signature();
+
+ let exists_type = self.data_type(&type_signature)?;
+ if exists_type.is_some() {
+ return exec_err!("DataType '{name}' already exists");
+ }
+
+ self.register_data_type(
+ type_signature.to_owned_type_signature(),
+ extension_type,
+ )?;
Review Comment:
Register the extension types created through SQL UDT.
##########
datafusion/optimizer/src/analyzer/type_coercion.rs:
##########
@@ -209,8 +211,9 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
escape_char,
case_insensitive,
}) => {
- let left_type = expr.get_type(&self.schema)?;
- let right_type = pattern.get_type(&self.schema)?;
+ // FIXME like_coercion use LogicalType
+ let left_type = expr.get_type(&self.schema)?.physical_type();
+ let right_type =
pattern.get_type(&self.schema)?.physical_type();
let coerced_type = like_coercion(&left_type,
&right_type).ok_or_else(|| {
Review Comment:
TODO: Modify the function `like_coercion ` to accept LogicalType as
parameters.
##########
datafusion/sql/src/planner.rs:
##########
@@ -370,11 +379,21 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
};
make_decimal_type(precision, scale)
}
- SQLDataType::Bytea => Ok(DataType::Binary),
- SQLDataType::Interval =>
Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+ SQLDataType::Bytea => Ok(LogicalType::Binary),
+ SQLDataType::Interval =>
Ok(LogicalType::Interval(IntervalUnit::MonthDayNano)),
// Explicitly list all other types so that if sqlparser
// adds/changes the `SQLDataType` the compiler will tell us on
upgrade
// and avoid bugs like
https://github.com/apache/arrow-datafusion/issues/3059
+ SQLDataType::Custom(name, params) => {
+ let name = object_name_to_string(name);
+ let params = params.iter().map(Into::into).collect();
+ let type_signature = TypeSignature::new_with_params(name,
params);
+ if let Some(data_type) =
self.context_provider.get_data_type(&type_signature) {
+ return Ok(data_type);
+ }
+
+ plan_err!("User-Defined SQL type {sql_type:?} not found")
+ }
Review Comment:
Convert custom data types.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]