Github user Krimit commented on a diff in the pull request:
https://github.com/apache/spark/pull/16811#discussion_r99612768
--- Diff: mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
---
@@ -232,19 +232,40 @@ class Word2VecModel private[ml] (
@Since("1.5.0")
def findSynonyms(word: String, num: Int): DataFrame = {
val spark = SparkSession.builder().getOrCreate()
- spark.createDataFrame(wordVectors.findSynonyms(word,
num)).toDF("word", "similarity")
+ spark.createDataFrame(findSynonymsLocal(word, num)).toDF("word",
"similarity")
}
/**
- * Find "num" number of words whose vector representation most similar
to the supplied vector.
+ * Find "num" number of words whose vector representation is most
similar to the supplied vector.
* If the supplied vector is the vector representation of a word in the
model's vocabulary,
* that word will be in the results. Returns a dataframe with the words
and the cosine
* similarities between the synonyms and the given word vector.
*/
@Since("2.0.0")
def findSynonyms(vec: Vector, num: Int): DataFrame = {
val spark = SparkSession.builder().getOrCreate()
- spark.createDataFrame(wordVectors.findSynonyms(vec, num)).toDF("word",
"similarity")
+ spark.createDataFrame(findSynonymsLocal(vec, num)).toDF("word",
"similarity")
+ }
+
+ /**
+ * Find "num" number of words whose vector representation is most
similar to the supplied vector.
+ * If the supplied vector is the vector representation of a word in the
model's vocabulary,
+ * that word will be in the results. Returns an array of the words and
the cosine
+ * similarities between the synonyms and the given word vector.
+ */
+ @Since("2.2.0")
+ def findSynonymsLocal(vec: Vector, num: Int): Array[(String, Double)] = {
+ wordVectors.findSynonyms(vec, num)
+ }
+
+ /**
+ * Find "num" number of words closest in similarity to the given word,
not
+ * including the word itself. Returns a dataframe with the words and the
--- End diff --
ð
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]