bjornjorgensen commented on PR #40913:
URL: https://github.com/apache/spark/pull/40913#issuecomment-1519137932
add Counter to imports
```
from collections import defaultdict, namedtuple, Counter
def info(
self,
verbose: Optional[bool] = None,
buf: Optional[IO[str]] = None,
max_cols: Optional[int] = None,
) -> None:
# To avoid pandas' existing config affects pandas-on-Spark.
# TODO: should we have corresponding pandas-on-Spark configs?
#with pd.option_context(
# "display.max_info_columns", sys.maxsize,
"display.max_info_rows", sys.maxsize
#):
if verbose is None or verbose:
index_type: Type = type(self.index).__name__
print(f"<class
'{self.__class__.__module__}.{self.__class__.__name__}'>")
print(f"{index_type}: {len(self)} entries, {self.index.min()} to
{self.index.max()}")
# Print column header for the detailed DataFrame information
print(f"Data columns (total {len(self.columns)} columns):")
print(f" # Column{' ' * 106}Non-Null Count Dtype")
print(f"--- ------{' ' * 106}-------------- -----")
# Calculate non-null counts for each column
non_null_counts: Dict[str, int] = self.count().to_dict()
# Initialize a counter to store data type counts
dtype_counter: Counter = Counter()
# Iterate through the schema fields and print detailed column
information
for idx, column in enumerate(self.columns):
dtype: str = str(self[column].dtype)
non_null_count: int = non_null_counts[column]
if verbose is None or verbose:
print(f"{idx:<3} {column:<90} {non_null_count:>30} non-null
{dtype}")
# Update the data type counter
dtype_counter[dtype] += 1
if verbose is None or verbose:
# Print data type summary
dtypes_summary: str = ", ".join([f"{dtype}({count})" for dtype,
count in dtype_counter.items()])
print(f"\ndtypes: {dtypes_summary}")
elif not verbose:
print(f"<class
'{self.__class__.__module__}.{self.__class__.__name__}'>")
print(f"Index: {len(self)} entries, {self.index.min()} to
{self.index.max()}")
print(f"Columns: {len(self.columns)} entries, {self.columns[0]}
to {self.columns[-1]}")
dtypes_summary: str = ", ".join([f"{dtype}({count})" for dtype,
count in dtype_counter.items()])
print(f"dtypes: {dtypes_summary}")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]