xinrong-databricks commented on a change in pull request #32775:
URL: https://github.com/apache/spark/pull/32775#discussion_r645717705



##########
File path: python/pyspark/pandas/internal.py
##########
@@ -73,6 +80,131 @@
 SPARK_DEFAULT_SERIES_NAME = str(DEFAULT_SERIES_NAME)
 
 
+class Field:
+    """
+    The field metadata to store the dtype as well as the Spark's StructField 
optionally.
+
+    Parameters
+    ----------
+    dtype : numpy.dtype or pandas' ExtensionDtype
+        The dtype for the field
+    struct_field : StructField, optional
+        The `StructField` for the field. If None, InternalFrame will properly 
set.
+    """
+    def __init__(self, dtype: Dtype, struct_field: Optional[StructField] = 
None):
+        self._dtype = dtype
+        self._struct_field = struct_field
+
+    @staticmethod
+    def from_struct_field(
+        struct_field: StructField, *, use_extension_dtypes: bool = False
+    ) -> "Field":
+        """
+        Returns a new Field object created from the given StructField.
+
+        The dtype will be inferred from the data type of the given StructField.
+
+        Parameters
+        ----------
+        struct_field : StructField
+            The StructField used to create a new Field object.
+        use_extension_dtypes : bool
+            If True, try to use the extension dtypes.
+
+        Returns
+        -------
+        Field
+        """
+        return Field(
+            dtype=spark_type_to_pandas_dtype(
+                struct_field.dataType, 
use_extension_dtypes=use_extension_dtypes
+            ),
+            struct_field=struct_field,
+        )
+
+    @property
+    def dtype(self) -> Dtype:
+        """ Return the dtype for the field. """
+        return self._dtype
+
+    @property
+    def struct_field(self) -> Optional[StructField]:
+        """ Return the StructField for the field. """
+        return self._struct_field
+
+    @property
+    def name(self) -> str:
+        """ Return the field name if the StructField exists. """
+        assert self.struct_field is not None
+        return self.struct_field.name
+
+    @property
+    def spark_type(self) -> DataType:
+        """ Return the spark data type for the field if the StructField 
exists. """
+        assert self.struct_field is not None
+        return self.struct_field.dataType
+
+    @property
+    def nullable(self) -> bool:
+        """ Return the nullability for the field if the StructField exists. """
+        assert self.struct_field is not None
+        return self.struct_field.nullable
+
+    @property
+    def metadata(self) -> Dict[str, Any]:
+        """ Return the metadata for the field if the StructField exists. """
+        assert self.struct_field is not None
+        return self.struct_field.metadata
+
+    @property
+    def is_extension_dtype(self) -> bool:
+        """ Return whether the dtype for the field is an extension type or 
not. """
+        return isinstance(self.dtype, extension_dtypes)
+
+    def normalize_spark_type(self) -> "Field":
+        """ Return a new Field object with normalized Spark data type. """

Review comment:
       Out of curiosity, is the definition of `normalize` universal?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to