Github user WeichenXu123 commented on a diff in the pull request:
https://github.com/apache/spark/pull/21265#discussion_r192000596
--- Diff: python/pyspark/ml/fpm.py ---
@@ -243,3 +244,105 @@ def setParams(self, minSupport=0.3,
minConfidence=0.8, itemsCol="items",
def _create_model(self, java_model):
return FPGrowthModel(java_model)
+
+
+class PrefixSpan(JavaParams):
+ """
+ .. note:: Experimental
+
+ A parallel PrefixSpan algorithm to mine frequent sequential patterns.
+ The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan:
Mining Sequential Patterns
+ Efficiently by Prefix-Projected Pattern Growth
+ (see <a href="http://doi.org/10.1109/ICDE.2001.914830">here</a>).
+ This class is not yet an Estimator/Transformer, use
:py:func:`findFrequentSequentialPatterns`
+ method to run the PrefixSpan algorithm.
+
+ @see <a
href="https://en.wikipedia.org/wiki/Sequential_Pattern_Mining">Sequential
Pattern Mining
+ (Wikipedia)</a>
+ .. versionadded:: 2.4.0
+
+ """
+
+ minSupport = Param(Params._dummy(), "minSupport", "The minimal support
level of the " +
+ "sequential pattern. Sequential pattern that
appears more than " +
+ "(minSupport * size-of-the-dataset) times will be
output. Must be >= 0.",
+ typeConverter=TypeConverters.toFloat)
+
+ maxPatternLength = Param(Params._dummy(), "maxPatternLength",
+ "The maximal length of the sequential
pattern. Must be > 0.",
+ typeConverter=TypeConverters.toInt)
+
+ maxLocalProjDBSize = Param(Params._dummy(), "maxLocalProjDBSize",
+ "The maximum number of items (including
delimiters used in the " +
+ "internal storage format) allowed in a
projected database before " +
+ "local processing. If a projected database
exceeds this size, " +
+ "another iteration of distributed prefix
growth is run. " +
+ "Must be > 0.",
+ typeConverter=TypeConverters.toInt)
--- End diff --
Just test that python 'int' type range is the same with java 'long' type.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]