devinjdangelo commented on code in PR #7896:
URL: https://github.com/apache/arrow-datafusion/pull/7896#discussion_r1367742418
##########
datafusion/core/src/datasource/file_format/write/demux.rs:
##########
@@ -310,7 +311,37 @@ fn compute_partition_keys_by_row<'a>(
for i in 0..rb.num_rows() {
partition_values.push(array.value(i));
}
- }
+ },
+ DataType::Dictionary(key_type, _) => {
+ match **key_type{
+ DataType::UInt16 => {
+ let dict_array =
as_dictionary_array::<UInt16Type>(col_array);
+ let array = dict_array.downcast_dict::<StringArray>()
+ .ok_or(DataFusionError::NotImplemented(format!("It
is not yet supported to write to hive partitioned with datatype {}", dtype)))?;
+ for val in array.into_iter() {
+ partition_values.push(
+
val.ok_or(DataFusionError::Execution("Partition values cannot be
null!".into()))?
+ );
+ }
+ },
+ DataType::Int32 => {
+ let dict_array =
as_dictionary_array::<Int32Type>(col_array);
+ let array = dict_array.downcast_dict::<StringArray>()
+ .ok_or(DataFusionError::NotImplemented(format!("It
is not yet supported to write to hive partitioned with datatype {}", dtype)))?;
+ for val in array.into_iter() {
+ partition_values.push(
+
val.ok_or(DataFusionError::Execution("Partition values cannot be
null!".into()))?
Review Comment:
Traditionally null values are sent to __HIVE_DEFAULT_PARTITION__ imo it is
also a reasonable choice to throw an error instead.
https://cwiki.apache.org/confluence/display/Hive/Tutorial#Tutorial-Dynamic-PartitionInsert
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]