Nagato-Yuzuru commented on code in PR #22702:
URL: https://github.com/apache/datafusion/pull/22702#discussion_r3355905794
##########
datafusion/core/src/dataframe/mod.rs:
##########
@@ -2471,6 +2469,65 @@ impl DataFrame {
&self,
value: ScalarValue,
columns: Vec<String>,
+ ) -> Result<DataFrame> {
+ self.fill_columns(value, &columns, coalesce(), |_| true)
+ }
+
+ // Helper to find columns from names
+ fn find_columns(&self, names: &[impl AsRef<str>]) -> Result<Vec<FieldRef>>
{
+ let schema = self.logical_plan().schema();
+ names
+ .iter()
+ .map(|name| {
+ let name = name.as_ref();
+ schema
+ .field_with_name(None, name)
+ .cloned()
+ .map_err(|_| plan_datafusion_err!("Column '{}' not found",
name))
+ })
+ .collect()
+ }
+
+ /// Fill NaN values in specified floating-point columns with a given value
+ /// If no columns are specified (empty slice), applies to all columns
+ /// Only floating-point columns are affected; other columns are left
unchanged
+ /// Only fills if the value can be cast to the column's type
+ ///
+ /// # Arguments
+ /// * `value` - Value to fill NaNs with
+ /// * `columns` - List of column names to fill. If empty, fills all
columns.
+ ///
+ /// # Example
+ /// ```
+ /// # use datafusion::prelude::*;
+ /// # use datafusion::error::Result;
+ /// # use datafusion_common::ScalarValue;
+ /// # #[tokio::main]
+ /// # async fn main() -> Result<()> {
+ /// let ctx = SessionContext::new();
+ /// let df = ctx
+ /// .read_csv("tests/data/example.csv", CsvReadOptions::new())
+ /// .await?;
+ /// // Fill NaN in only columns "a" and "c":
+ /// let df = df.fill_nan(ScalarValue::from(0.0), &["a", "c"])?;
+ /// // Fill NaN across all columns:
+ /// let df = df.fill_nan(ScalarValue::from(0.0), &[])?;
+ /// # Ok(())
+ /// # }
+ /// ```
+ pub fn fill_nan(&self, value: ScalarValue, columns: &[&str]) ->
Result<DataFrame> {
+ self.fill_columns(value, columns, nanvl(), |field| {
+ field.data_type().is_floating()
+ })
+ }
+
+ #[expect(clippy::needless_pass_by_value)]
Review Comment:
Thanks for the review. I moved the #[expect] to the public method.
If we keep the call and consistent with fill_null, there'll always be one
#[expect] either here or on the helper.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]