timsaucer commented on code in PR #1036:
URL:
https://github.com/apache/datafusion-python/pull/1036#discussion_r2008766096
##########
src/dataframe.rs:
##########
@@ -111,56 +116,151 @@ impl PyDataFrame {
}
fn __repr__(&self, py: Python) -> PyDataFusionResult<String> {
- let df = self.df.as_ref().clone().limit(0, Some(10))?;
- let batches = wait_for_future(py, df.collect())?;
- let batches_as_string = pretty::pretty_format_batches(&batches);
- match batches_as_string {
- Ok(batch) => Ok(format!("DataFrame()\n{batch}")),
- Err(err) => Ok(format!("Error: {:?}", err.to_string())),
+ let (batches, has_more) = wait_for_future(
+ py,
+ collect_record_batches_to_display(self.df.as_ref().clone(), 10,
10),
+ )?;
+ if batches.is_empty() {
+ // This should not be reached, but do it for safety since we index
into the vector below
+ return Ok("No data to display".to_string());
}
- }
- fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
- let mut html_str = "<table border='1'>\n".to_string();
+ let batches_as_displ =
+
pretty::pretty_format_batches(&batches).map_err(py_datafusion_err)?;
+
+ let additional_str = match has_more {
+ true => "\nData truncated.",
+ false => "",
+ };
- let df = self.df.as_ref().clone().limit(0, Some(10))?;
- let batches = wait_for_future(py, df.collect())?;
+ Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}"))
+ }
+ fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
+ let (batches, has_more) = wait_for_future(
+ py,
+ collect_record_batches_to_display(
+ self.df.as_ref().clone(),
+ MIN_TABLE_ROWS_TO_DISPLAY,
+ usize::MAX,
+ ),
+ )?;
Review Comment:
Added to follow on issue
https://github.com/apache/datafusion-python/issues/1078
##########
src/dataframe.rs:
##########
@@ -70,6 +72,9 @@ impl PyTableProvider {
PyTable::new(table_provider)
}
}
+const MAX_TABLE_BYTES_TO_DISPLAY: usize = 2 * 1024 * 1024; // 2 MB
Review Comment:
Added to issue https://github.com/apache/datafusion-python/issues/1078
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]