Manybubbles has uploaded a new change for review.
https://gerrit.wikimedia.org/r/165514
Change subject: Javadoc
......................................................................
Javadoc
Change-Id: I1308b32dc7ac9bc3d59aa2fcd20cc35bf0ea2686
---
M src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
A src/main/java/org/wikimedia/search/extra/package-info.java
M src/main/java/org/wikimedia/search/extra/regex/expression/Expression.java
M
src/main/java/org/wikimedia/search/extra/regex/expression/ExpressionSource.java
M src/main/java/org/wikimedia/search/extra/regex/expression/False.java
M src/main/java/org/wikimedia/search/extra/regex/expression/True.java
M
src/main/java/org/wikimedia/search/extra/regex/ngram/AutomatonTooComplexException.java
M src/main/java/org/wikimedia/search/extra/regex/ngram/NGramExtractor.java
A src/main/java/org/wikimedia/search/extra/util/package-info.java
9 files changed, 93 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/search/extra
refs/changes/14/165514/1
diff --git a/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
b/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
index 70f0c95..f0a5b92 100644
--- a/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
+++ b/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
@@ -4,6 +4,9 @@
import org.elasticsearch.plugins.AbstractPlugin;
import org.wikimedia.search.extra.regex.SourceRegexFilterParser;
+/**
+ * Setup the Elasticsearch plugin.
+ */
public class ExtraPlugin extends AbstractPlugin {
@Override
public String description() {
@@ -15,6 +18,9 @@
return "wikimedia-extra";
}
+ /**
+ * Register our parsers.
+ */
public void onModule(IndicesQueriesModule module) {
module.addFilter(new SourceRegexFilterParser());
}
diff --git a/src/main/java/org/wikimedia/search/extra/package-info.java
b/src/main/java/org/wikimedia/search/extra/package-info.java
new file mode 100644
index 0000000..910721c
--- /dev/null
+++ b/src/main/java/org/wikimedia/search/extra/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Utilities.
+ */
+package org.wikimedia.search.extra;
\ No newline at end of file
diff --git
a/src/main/java/org/wikimedia/search/extra/regex/expression/Expression.java
b/src/main/java/org/wikimedia/search/extra/regex/expression/Expression.java
index 57d91f6..f123201 100644
--- a/src/main/java/org/wikimedia/search/extra/regex/expression/Expression.java
+++ b/src/main/java/org/wikimedia/search/extra/regex/expression/Expression.java
@@ -7,6 +7,8 @@
* transformed. Simplifying expressions eliminates extraneous terms and factors
* out common terms. Transformation allows client code to convert the
expression
* to some other (maybe evaluable) form.
+ *
+ * @param <T> type stored in leaves
*/
public interface Expression<T> {
/**
@@ -38,14 +40,51 @@
/**
* Transform this expression into another form.
+ *
+ * @param <T> type stored in leaves
+ * @param <J> result of the transformation.
*/
<J> J transform(Transformer<T, J> transformer);
+ /**
+ * Transformer for expression components.
+ *
+ * @param <T> type stored in leaves
+ * @param <J> result of the transformation.
+ */
interface Transformer<T, J> {
+ /**
+ * Transform an expression that is always true.
+ */
J alwaysTrue();
+
+ /**
+ * Transform an expression that is always false.
+ */
J alwaysFalse();
+
+ /**
+ * Transform a leaf expression.
+ *
+ * @param t data stored in the leaf
+ * @return result of the transform
+ */
J leaf(T t);
+
+ /**
+ * Transform an and expression.
+ *
+ * @param js transformed sub-expressions
+ * @return result of the transform
+ */
J and(ImmutableSet<J> js);
+
+ /**
+ * Transform an or expression.
+ *
+ * @param js transformed sub-expressions
+ * @return result of the transform
+ */
J or(ImmutableSet<J> js);
}
}
diff --git
a/src/main/java/org/wikimedia/search/extra/regex/expression/ExpressionSource.java
b/src/main/java/org/wikimedia/search/extra/regex/expression/ExpressionSource.java
index fd0b9e1..9e824c5 100644
---
a/src/main/java/org/wikimedia/search/extra/regex/expression/ExpressionSource.java
+++
b/src/main/java/org/wikimedia/search/extra/regex/expression/ExpressionSource.java
@@ -1,5 +1,14 @@
package org.wikimedia.search.extra.regex.expression;
+/**
+ * Type that can be expressed as an expression.
+ *
+ * @param <T> type stored in leaves
+ */
public interface ExpressionSource<T> {
+ /**
+ * This expressed as an expression. The result might not be simplified so
+ * call simplify on it if you need it simplified.
+ */
Expression<T> expression();
}
diff --git
a/src/main/java/org/wikimedia/search/extra/regex/expression/False.java
b/src/main/java/org/wikimedia/search/extra/regex/expression/False.java
index c81d0a5..b5fcd75 100644
--- a/src/main/java/org/wikimedia/search/extra/regex/expression/False.java
+++ b/src/main/java/org/wikimedia/search/extra/regex/expression/False.java
@@ -6,6 +6,9 @@
public class False<T> implements Expression<T> {
private static final False<Object> TRUE = new False<>();
+ /**
+ * There is only one false.
+ */
@SuppressWarnings("unchecked")
public static <T> False<T> instance() {
return (False<T>) TRUE;
diff --git
a/src/main/java/org/wikimedia/search/extra/regex/expression/True.java
b/src/main/java/org/wikimedia/search/extra/regex/expression/True.java
index 734084a..3a38d53 100644
--- a/src/main/java/org/wikimedia/search/extra/regex/expression/True.java
+++ b/src/main/java/org/wikimedia/search/extra/regex/expression/True.java
@@ -6,6 +6,9 @@
public class True<T> implements Expression<T> {
private static final True<Object> TRUE = new True<>();
+ /**
+ * There is only one True.
+ */
@SuppressWarnings("unchecked")
public static <T> True<T> instance() {
return (True<T>) TRUE;
diff --git
a/src/main/java/org/wikimedia/search/extra/regex/ngram/AutomatonTooComplexException.java
b/src/main/java/org/wikimedia/search/extra/regex/ngram/AutomatonTooComplexException.java
index e0f2960..38cc9f0 100644
---
a/src/main/java/org/wikimedia/search/extra/regex/ngram/AutomatonTooComplexException.java
+++
b/src/main/java/org/wikimedia/search/extra/regex/ngram/AutomatonTooComplexException.java
@@ -1,8 +1,15 @@
package org.wikimedia.search.extra.regex.ngram;
+/**
+ * Thrown when the automaton is too complex to convert to ngrams (as measured
by
+ * maxExpand).
+ */
public class AutomatonTooComplexException extends IllegalArgumentException {
private static final long serialVersionUID = -4686819368713525883L;
+ /**
+ * Build it.
+ */
public AutomatonTooComplexException() {
super("The supplied automaton is too complex to extract ngrams");
}
diff --git
a/src/main/java/org/wikimedia/search/extra/regex/ngram/NGramExtractor.java
b/src/main/java/org/wikimedia/search/extra/regex/ngram/NGramExtractor.java
index 0c6f7e0..d8d25a7 100644
--- a/src/main/java/org/wikimedia/search/extra/regex/ngram/NGramExtractor.java
+++ b/src/main/java/org/wikimedia/search/extra/regex/ngram/NGramExtractor.java
@@ -7,17 +7,35 @@
import org.wikimedia.search.extra.regex.expression.Leaf;
import org.wikimedia.search.extra.regex.expression.True;
+/**
+ * Extracts ngrams from automatons.
+ */
public class NGramExtractor {
private final int gramSize;
private final int maxExpand;
private final int maxStatesTraced;
+ /**
+ * Build it.
+ *
+ * @param gramSize size of the ngram. The "n" in ngram.
+ * @param maxExpand Maximum size of range transitions to expand into single
+ * transitions. Its roughly analogous to the number of character
+ * in a character class before it is considered a wildcard for
+ * optimization purposes.
+ * @param maxStatesTraced maximum number of states traced during automaton
+ * functions. Higher number allow more complex automata to be
+ * converted to ngram expressions at the cost of more time.
+ */
public NGramExtractor(int gramSize, int maxExpand, int maxStatesTraced) {
this.gramSize = gramSize;
this.maxExpand = maxExpand;
this.maxStatesTraced = maxStatesTraced;
}
+ /**
+ * Extract an Expression containing ngrams from an automaton.
+ */
public Expression<String> extract(Automaton automaton) {
if (automaton.getSingleton() != null) {
int end = automaton.getSingleton().length() - gramSize + 1;
diff --git a/src/main/java/org/wikimedia/search/extra/util/package-info.java
b/src/main/java/org/wikimedia/search/extra/util/package-info.java
new file mode 100644
index 0000000..bbf237a
--- /dev/null
+++ b/src/main/java/org/wikimedia/search/extra/util/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Extracts ngrams from an {@link org.apache.lucene.util.automaton.Automaton}.
+ */
+package org.wikimedia.search.extra.util;
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/165514
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1308b32dc7ac9bc3d59aa2fcd20cc35bf0ea2686
Gerrit-PatchSet: 1
Gerrit-Project: search/extra
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits