HyukjinKwon commented on code in PR #36509:
URL: https://github.com/apache/spark/pull/36509#discussion_r870139491
##########
python/docs/source/user_guide/pandas_on_spark/supported_status_rst_generator.py:
##########
@@ -0,0 +1,225 @@
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Callable, Dict, List, Set, TextIO
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
+FILE_PATH_PREFIX = "./user_guide/pandas_on_spark"
+HEADER_DOC_FILE = f"{FILE_PATH_PREFIX}/supported_pandas_api_header.txt"
+TARGET_DOC_FILE = f"{FILE_PATH_PREFIX}/supported_pandas_api.rst"
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+
+@unique
+class Implemented(Enum):
+ IMPLEMENTED = "Y"
+ NOT_IMPLEMENTED = "N"
+ PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus:
+ def __init__(self, implemented: str, missing: str = ""):
+ self.implemented = implemented
+ self.missing = missing
+
+
+class SuppportedStatusRSTGenerator:
+ def __init__(self):
+ self.all_supported_status = {}
+
+ def execute(self) -> None:
+ for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+ pd_modules = self.get_pd_modules(pd_module_group)
+ self.update_all_supported_status(
+ pd_modules, pd_module_group, ps_module_group
+ )
+ self.write_rst()
+
+ def create_supported_by_module(
+ self, module_name: str, pd_module_group, ps_module_group
+ ) -> Dict[str, SupportedStatus]:
+ pd_module = (
+ getattr(pd_module_group, module_name) if module_name else
pd_module_group
+ )
+ try:
+ ps_module = (
+ getattr(ps_module_group, module_name)
+ if module_name
+ else ps_module_group
+ )
+ except AttributeError:
+ # module not implemented
+ return {}
+
+ pd_funcs = dict(
+ [m for m in getmembers(pd_module, isfunction) if not
m[0].startswith("_")]
+ )
+ if not pd_funcs:
+ return {}
+
+ ps_funcs = dict(
+ [m for m in getmembers(ps_module, isfunction) if not
m[0].startswith("_")]
+ )
+
+ return self.organize_by_implementation_status(
+ module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+ )
+
+ def organize_by_implementation_status(
+ self,
+ module_name: str,
+ pd_funcs: Dict[str, Callable],
+ ps_funcs: Dict[str, Callable],
+ pd_module_group,
+ ps_module_group,
+ ) -> Dict[str, SupportedStatus]:
+ pd_dict = {}
+ for pd_func_name, pd_func in pd_funcs.items():
+ ps_func = ps_funcs.get(pd_func_name)
+ if ps_func:
+ missing_set = (
+ set(signature(pd_func).parameters)
+ - set(signature(ps_func).parameters)
+ - COMMON_PARAMETER_SET
+ )
+ if missing_set:
+ # partially implemented
+ pd_dict[pd_func_name] = SupportedStatus(
+ Implemented.PARTIALLY_IMPLEMENTED.value,
+ self.transform_missing(
+ module_name,
+ pd_func_name,
+ missing_set,
+ pd_module_group.__name__,
+ ps_module_group.__name__,
+ ),
+ )
+ else:
+ # implemented including it's whole parameter
+ pd_dict[pd_func_name] = SupportedStatus(
+ Implemented.IMPLEMENTED.value
+ )
+ else:
+ # not implemented yet
+ pd_dict[pd_func_name] = SupportedStatus(
+ Implemented.NOT_IMPLEMENTED.value
+ )
+ return pd_dict
+
+ def transform_missing(
+ self,
+ module_name: str,
+ pd_func_name: str,
+ missing_set: Set[str],
+ pd_module_path: str,
+ ps_module_path: str,
+ ) -> str:
+ missing_str = " , ".join(
+ list(
+ map(lambda x: f"``{x}``",
sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+ )
Review Comment:
maybe:
```suggestion
f"``{x}``" for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE]
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]