Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/19892#discussion_r157685008
--- Diff: python/pyspark/ml/feature.py ---
@@ -315,13 +315,19 @@ class BucketedRandomProjectionLSHModel(LSHModel,
JavaMLReadable, JavaMLWritable)
@inherit_doc
-class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol,
HasHandleInvalid,
- JavaMLReadable, JavaMLWritable):
- """
- Maps a column of continuous features to a column of feature buckets.
-
- >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),),
(float("nan"),)]
- >>> df = spark.createDataFrame(values, ["values"])
+class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols,
HasOutputCols,
+ HasHandleInvalid, JavaMLReadable, JavaMLWritable):
+ """
+ Maps a column of continuous features to a column of feature buckets.
Since 2.3.0,
+ :py:class:`Bucketizer` can map multiple columns at once by setting the
:py:attr:`inputCols`
+ parameter. Note that when both the :py:attr:`inputCol` and
:py:attr:`inputCols` parameters
+ are set, a log warning will be printed and only :py:attr:`inputCol`
will take effect, while
--- End diff --
Note: there is a work to change this behavior to throw an exception,
instead of a log warning. Remember to change this document later.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]