Author: tommaso
Date: Sat Sep 12 07:21:11 2015
New Revision: 1702594
URL: http://svn.apache.org/r1702594
Log:
OPENNLP-817 - added a CFG runner (with samples), added pcfg parse rules / cfg
capabilities
Added:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
opennlp/sandbox/nlp-utils/src/main/resources/
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/adj.txt
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/adv.txt
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/noun.txt
opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/verb.txt
opennlp/sandbox/nlp-utils/src/test/resources/it-tb-news.txt
Modified:
opennlp/sandbox/nlp-utils/ (props changed)
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
Propchange: opennlp/sandbox/nlp-utils/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sat Sep 12 07:21:11 2015
@@ -0,0 +1 @@
+*.iml
Added:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java?rev=1702594&view=auto
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
(added)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
Sat Sep 12 07:21:11 2015
@@ -0,0 +1,162 @@
+package org.apache.opennlp.utils.cfg;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.security.SecureRandom;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+/**
+ * Runner for {@link ContextFreeGrammar}s
+ */
+public class CFGRunner {
+
+ public static void main(String[] args) throws Exception {
+ CFGBuilder builder = new CFGBuilder();
+
+ Arrays.sort(args);
+ boolean useWn = Arrays.binarySearch(args, "-wn") >= 0;
+
+ Collection<String> adverbsCollection;
+ Collection<String> verbsCollection;
+ Collection<String> adjectivesCollection;
+ Collection<String> nounsCollection;
+ if (useWn) {
+ adverbsCollection = getTokens("/opennlp/cfg/wn/adv.txt");
+ adjectivesCollection = getTokens("/opennlp/cfg/wn/adj.txt");
+ nounsCollection = getTokens("/opennlp/cfg/wn/noun.txt");
+ verbsCollection = getTokens("/opennlp/cfg/wn/verb.txt");
+ } else {
+ adverbsCollection = getTokens("/opennlp/cfg/an/adv.txt");
+ adjectivesCollection = getTokens("/opennlp/cfg/an/adj.txt");
+ nounsCollection = getTokens("/opennlp/cfg/an/noun.txt");
+ verbsCollection = getTokens("/opennlp/cfg/an/verb.txt");
+ }
+
+ Collection<String> terminals = new LinkedList<>();
+ terminals.addAll(adverbsCollection);
+ terminals.addAll(verbsCollection);
+ terminals.addAll(adjectivesCollection);
+ terminals.addAll(nounsCollection);
+
+ builder.withTerminals(terminals);
+
+ Collection<String> nonTerminals = new LinkedList<String>();
+ String startSymbol = "START_SYMBOL";
+ nonTerminals.add(startSymbol);
+ nonTerminals.add("NP");
+ nonTerminals.add("NN");
+ nonTerminals.add("Adv");
+ nonTerminals.add("Adj");
+ nonTerminals.add("VP");
+ nonTerminals.add("Vb");
+ builder.withNonTerminals(nonTerminals);
+
+ builder.withStartSymbol(startSymbol);
+
+ Collection<Rule> rules = new LinkedList<Rule>();
+ rules.add(new Rule(startSymbol, "VP", "NP"));
+ rules.add(new Rule("VP", "Adv", "Vb"));
+ rules.add(new Rule("NP", "Adj", "NN"));
+
+ for (String v : verbsCollection) {
+ rules.add(new Rule("Vb", v));
+ }
+ for (String adj : adjectivesCollection) {
+ rules.add(new Rule("Adj", adj));
+ }
+ for (String n : nounsCollection) {
+ rules.add(new Rule("NN", n));
+ }
+ for (String adv : adverbsCollection) {
+ rules.add(new Rule("Adv", adv));
+ }
+ builder.withRules(rules);
+ ContextFreeGrammar cfg = builder.withRandomExpansion(true).build();
+ String[] sentence = cfg.leftMostDerivation(startSymbol);
+ String toString = Arrays.toString(sentence);
+
+ if (toString.length() > 0) {
+ System.out.println(toString.substring(1, toString.length() -
1).replaceAll(",", ""));
+ }
+
+ boolean pt = Arrays.binarySearch(args, "-pt") >= 0;
+
+ if (pt) {
+ Map<Rule, Double> rulesMap = new HashMap<>();
+ rulesMap.put(new Rule(startSymbol, "VP", "NP"), 1d);
+ rulesMap.put(new Rule("VP", "Adv", "Vb"), 1d);
+ rulesMap.put(new Rule("NP", "Adj", "NN"), 1d);
+
+ SecureRandom secureRandom = new SecureRandom();
+
+ double remainingP = 1d;
+ for (String v : verbsCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == verbsCollection.size() - 1) {
+ p = remainingP;
+ }
+ if (remainingP - p <= 0) {
+ p /= 10;
+ }
+ rulesMap.put(new Rule("Vb", v), p);
+ remainingP -= p;
+ }
+ for (String a : adjectivesCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == adjectivesCollection.size() - 1) {
+ p = remainingP;
+ }
+ if (remainingP - p <= 0) {
+ p /= 10;
+ }
+ rulesMap.put(new Rule("Adj", a), p);
+ remainingP -= p;
+ }
+ for (String n : nounsCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == nounsCollection.size() - 1) {
+ p = remainingP;
+ } else if (remainingP - p <= 0) {
+ p /= 10;
+ }
+ rulesMap.put(new Rule("NN", n), p);
+ remainingP -= p;
+ }
+ for (String a : adverbsCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == adverbsCollection.size() - 1) {
+ p = remainingP;
+ }
+ if (remainingP - p <= 0) {
+ p /= 10;
+ }
+ rulesMap.put(new Rule("Adv", a), p);
+ remainingP -= p;
+ }
+ ProbabilisticContextFreeGrammar pcfg = new
ProbabilisticContextFreeGrammar(cfg.getNonTerminalSymbols(),
cfg.getTerminalSymbols(),
+ rulesMap, startSymbol, true);
+ ProbabilisticContextFreeGrammar.ParseTree parseTree =
pcfg.cky(Arrays.asList(sentence));
+ System.out.println(parseTree);
+ }
+ }
+
+ private static Collection<String> getTokens(String s) throws IOException {
+ Collection<String> tokens = new LinkedList<>();
+ InputStream resourceStream = CFGRunner.class.getResourceAsStream(s);
+ BufferedReader bufferedReader = new BufferedReader(new
InputStreamReader(resourceStream));
+ String line;
+ while ((line = bufferedReader.readLine()) != null) {
+ tokens.add(line);
+ }
+ bufferedReader.close();
+ resourceStream.close();
+ return tokens;
+ }
+
+}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java?rev=1702594&r1=1702593&r2=1702594&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
Sat Sep 12 07:21:11 2015
@@ -19,11 +19,16 @@
package org.apache.opennlp.utils.cfg;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* a probabilistic CFG
@@ -36,6 +41,17 @@ public class ProbabilisticContextFreeGra
private final String startSymbol;
private boolean randomExpansion;
+ private static final Rule emptyRule = new Rule("E", "");
+
+ private static final String nonTerminalMatcher =
"[\\w\\~\\*\\-\\.\\,\\'\\:\\_\\\"]";
+ private static final String terminalMatcher = "[òÃ
ùìèé\\|\\w\\'\\.\\,\\:\\_Ã\\?Ã\\%\\;Ã\\-\\\"]";
+
+ private static final Pattern terminalPattern =
Pattern.compile("\\(("+nonTerminalMatcher+"+)\\s("+terminalMatcher+"+)\\)");
+ private static final Pattern nonTerminalPattern = Pattern.compile(
+ "\\(("+nonTerminalMatcher+"+)" + // source NT
+
"\\s("+nonTerminalMatcher+"+)(\\s("+nonTerminalMatcher+"+))*\\)" // expansion
NTs
+ );
+
public ProbabilisticContextFreeGrammar(Collection<String>
nonTerminalSymbols, Collection<String> terminalSymbols,
Map<Rule, Double> rules, String
startSymbol, boolean randomExpansion) {
@@ -119,32 +135,32 @@ public class ProbabilisticContextFreeGra
}
}
- public BackPointer pi(List<String> sentence, int i, int j, String x) {
- BackPointer backPointer = new BackPointer(0, 0, null);
+ public ParseTree pi(List<String> sentence, int i, int j, String x) {
+ ParseTree parseTree = new ParseTree(0, 0, null);
if (i == j) {
Rule rule = new Rule(x, sentence.get(i));
double q = q(rule);
- backPointer = new BackPointer(q, i, rule);
+ parseTree = new ParseTree(q, i, rule);
} else {
double max = 0;
for (Rule rule : getNTRules()) {
for (int s = i; s < j; s++) {
double q = q(rule);
- BackPointer left = pi(sentence, i, s, rule.getExpansion()[0]);
- BackPointer right = pi(sentence, s + 1, j, rule.getExpansion()[1]);
+ ParseTree left = pi(sentence, i, s, rule.getExpansion()[0]);
+ ParseTree right = pi(sentence, s + 1, j, rule.getExpansion()[1]);
double cp = q * left.getProbability() * right.getProbability();
if (cp > max) {
max = cp;
- backPointer = new BackPointer(max, s, rule, left, right);
+ parseTree = new ParseTree(max, s, rule, left, right);
}
}
}
}
- return backPointer;
+ return parseTree;
}
- public BackPointer cky(List<String> sentence) {
- BackPointer backPointer = null;
+ public ParseTree cky(List<String> sentence) {
+ ParseTree parseTree = null;
int n = sentence.size();
for (int l = 1; l < n; l++) {
@@ -155,25 +171,26 @@ public class ProbabilisticContextFreeGra
for (Rule r : getRulesForNonTerminal(x)) {
for (int s = i; s < j - 1; s++) {
double q = q(r);
- BackPointer left = pi(sentence, i, s, r.getExpansion()[0]);
- BackPointer right = pi(sentence, s + 1, j, r.getExpansion()[1]);
+ ParseTree left = pi(sentence, i, s, r.getExpansion()[0]);
+ ParseTree right = pi(sentence, s + 1, j, r.getExpansion()[1]);
double cp = q * left.getProbability() * right.getProbability();
if (cp > max) {
max = cp;
- backPointer = new BackPointer(max, s, r, left, right);
+ parseTree = new ParseTree(max, s, r, left, right);
}
}
}
}
}
}
- return backPointer;
+ return parseTree;
}
private Collection<Rule> getRulesForNonTerminal(String x) {
LinkedList<Rule> ntRules = new LinkedList<Rule>();
for (Rule r : rules.keySet()) {
- if (x.equals(r.getEntry()) &&
nonTerminalSymbols.contains(r.getExpansion()[0]) &&
nonTerminalSymbols.contains(r.getExpansion()[1])) {
+ String[] expansion = r.getExpansion();
+ if (expansion.length == 2 && x.equals(r.getEntry()) &&
nonTerminalSymbols.contains(expansion[0]) &&
nonTerminalSymbols.contains(expansion[1])) {
ntRules.add(r);
}
}
@@ -183,7 +200,8 @@ public class ProbabilisticContextFreeGra
private Collection<Rule> getNTRules() {
Collection<Rule> ntRules = new LinkedList<Rule>();
for (Rule r : rules.keySet()) {
- if (nonTerminalSymbols.contains(r.getExpansion()[0]) &&
nonTerminalSymbols.contains(r.getExpansion()[1])) {
+ String[] expansion = r.getExpansion();
+ if (expansion.length == 2 && nonTerminalSymbols.contains(expansion[0])
&& nonTerminalSymbols.contains(expansion[1])) {
ntRules.add(r);
}
}
@@ -194,21 +212,21 @@ public class ProbabilisticContextFreeGra
return rules.keySet().contains(rule) ? rules.get(rule) : 0;
}
- public class BackPointer {
+ public class ParseTree {
private final double probability;
private final int splitPoint;
private final Rule rule;
- private BackPointer leftTree;
- private BackPointer rightTree;
+ private ParseTree leftTree;
+ private ParseTree rightTree;
- private BackPointer(double probability, int splitPoint, Rule rule) {
+ private ParseTree(double probability, int splitPoint, Rule rule) {
this.probability = probability;
this.splitPoint = splitPoint;
this.rule = rule;
}
- public BackPointer(double probability, int splitPoint, Rule rule,
BackPointer leftTree, BackPointer rightTree) {
+ public ParseTree(double probability, int splitPoint, Rule rule, ParseTree
leftTree, ParseTree rightTree) {
this.probability = probability;
this.splitPoint = splitPoint;
this.rule = rule;
@@ -228,24 +246,146 @@ public class ProbabilisticContextFreeGra
return rule;
}
- public BackPointer getLeftTree() {
+ public ParseTree getLeftTree() {
return leftTree;
}
- public BackPointer getRightTree() {
+ public ParseTree getRightTree() {
return rightTree;
}
@Override
public String toString() {
- return "(" +
- rule.getEntry() + " " +
- (leftTree != null && rightTree != null ?
- leftTree.toString() + " " + rightTree.toString() :
- rule.getExpansion()[0]
- ) +
- ')';
+ if (getRule() != emptyRule) {
+ return "(" +
+ rule.getEntry() + " " +
+ (leftTree != null && rightTree != null ?
+ leftTree.toString() + " " + rightTree.toString() :
+ rule.getExpansion()[0]
+ ) +
+ ')';
+ } else {
+ return "";
+ }
}
+
+ }
+
+ public static Map<Rule, Double> parseRules(String... parseTreeString) {
+ Map<Rule, Double> rules = new HashMap<>();
+ parseRules(rules, false, parseTreeString);
+ return rules;
+ }
+
+ public static void parseRules(Map<Rule, Double> rules, boolean trim,
String... parseStrings) {
+ parseGrammar(rules, "S", trim, parseStrings);
}
+ public static ProbabilisticContextFreeGrammar parseGrammar(boolean trim,
String... parseTreeStrings) {
+ return parseGrammar(new HashMap<Rule, Double>(), "S", trim,
parseTreeStrings);
+ }
+
+ public static ProbabilisticContextFreeGrammar parseGrammar(String...
parseTreeStrings) {
+ return parseGrammar(new HashMap<Rule, Double>(), "S", true,
parseTreeStrings);
+ }
+
+ public static ProbabilisticContextFreeGrammar parseGrammar(Map<Rule, Double>
rulesMap, String startSymbol, boolean trim, String... parseStrings) {
+
+ Map<Rule, Double> rules = new HashMap<>();
+
+ Collection<String> nonTerminals = new HashSet<>();
+ Collection<String> terminals = new HashSet<>();
+
+ for (String parseTreeString : parseStrings) {
+
+ if (trim) {
+ parseTreeString = parseTreeString.replaceAll("\n",
"").replaceAll("\t", "").replaceAll("\\s+", " ");
+ }
+
+ String toConsume = String.valueOf(parseTreeString);
+
+ Matcher m = terminalPattern.matcher(parseTreeString);
+ while (m.find()) {
+ String nt = m.group(1);
+ String t = m.group(2);
+ Rule key = new Rule(nt, t);
+ if (!rules.containsKey(key)) {
+ rules.put(key, 1d);
+ terminals.add(t);
+// System.err.println(key);
+ }
+ toConsume = toConsume.replace(m.group(), nt);
+ }
+
+ while (toConsume.contains(" ") && !toConsume.trim().equals("( " +
startSymbol + " )")) {
+ Matcher m2 = nonTerminalPattern.matcher(toConsume);
+ while (m2.find()) {
+ String nt = m2.group(1);
+ String t1 = m2.group(2);
+ String t2 = m2.group(3);
+
+ Rule key;
+ if (t2 != null) {
+ String[] t2s = t2.trim().split(" ");
+ String[] nts = new String[t2s.length + 1];
+ nts[0] = t1;
+ System.arraycopy(t2s, 0, nts, 1, t2s.length);
+ key = new Rule(nt, nts);
+ nonTerminals.addAll(Arrays.asList(nts));
+ } else {
+ key = new Rule(nt, t1);
+ nonTerminals.add(t1);
+ }
+ nonTerminals.add(key.getEntry());
+
+ if (!rules.containsKey(key)) {
+ rules.put(key, 1d);
+// startSymbol = key.getEntry();
+// System.err.println(key);
+ }
+ toConsume = toConsume.replace(m2.group(), nt);
+ }
+ }
+ }
+
+ // TODO : check/adjust rules to make them respect CNF
+ // TODO : adjust probabilities based on term frequencies
+ for (Map.Entry<Rule, Double> entry : rules.entrySet()) {
+ normalize(entry.getKey(), nonTerminals, terminals, rulesMap);
+ }
+
+ return new ProbabilisticContextFreeGrammar(nonTerminals, terminals,
rulesMap, startSymbol, true);
+ }
+
+ private static void normalize(Rule rule, Collection<String> nonTerminals,
Collection<String> terminals, Map<Rule, Double> rulesMap) {
+ String[] expansion = rule.getExpansion();
+ if (expansion.length == 1) {
+ if (!terminals.contains(expansion[0])) {
+ if (nonTerminals.contains(expansion[0])) {
+ // nt1 -> nt2 should be expanded in nt1 -> nt2,E
+ rulesMap.put(new Rule(rule.getEntry(), expansion[0],
emptyRule.getEntry()), 1d);
+ if (rulesMap.containsKey(emptyRule)) {
+ rulesMap.put(emptyRule, 1d);
+ }
+ } else {
+ throw new RuntimeException("rule "+rule+" expands to neither a
terminal or non terminal");
+ }
+ } else {
+ rulesMap.put(rule, 1d);
+ }
+ } else if (expansion.length > 2){
+ // nt1 -> nt2,nt3,...,ntn should be collapsed to a hierarchy of ntX ->
ntY,ntZ rules
+ String nt2 = expansion[0];
+ int seed = nonTerminals.size();
+ String generatedNT = "GEN~" + seed;
+ nonTerminals.add(generatedNT);
+ Rule newRule = new Rule(rule.getEntry(), nt2, generatedNT);
+ rulesMap.put(newRule, 1d);
+ Rule chainedRule = new Rule(generatedNT, Arrays.copyOfRange(expansion,
1, expansion.length - 1));
+ rulesMap.put(chainedRule, 1d);
+ normalize(chainedRule, nonTerminals, terminals, rulesMap);
+ } else {
+ rulesMap.put(rule, 1d);
+ }
+ }
}
Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt Sat Sep
12 07:21:11 2015
@@ -0,0 +1,157 @@
+24/7
+24/365
+accurate
+adaptive
+alternative
+an expanded array of
+B2B
+B2C
+backend
+backward-compatible
+best-of-breed
+bleeding-edge
+bricks-and-clicks
+business
+clicks-and-mortar
+client-based
+client-centered
+client-centric
+client-focused
+collaborative
+compelling
+competitive
+cooperative
+corporate
+cost effective
+covalent
+cross functional
+cross-media
+cross-platform
+cross-unit
+customer directed
+customized
+cutting-edge
+distinctive
+distributed
+diverse
+dynamic
+e-business
+economically sound
+effective
+efficient
+elastic
+emerging
+empowered
+enabled
+end-to-end
+enterprise
+enterprise-wide
+equity invested
+error-free
+ethical
+excellent
+exceptional
+extensible
+extensive
+flexible
+focused
+frictionless
+front-end
+fully researched
+fully tested
+functional
+functionalized
+fungible
+future-proof
+global
+go forward
+goal-oriented
+granular
+high standards in
+high-payoff
+hyperscale
+high-quality
+highly efficient
+holistic
+impactful
+inexpensive
+innovative
+installed base
+integrated
+interactive
+interdependent
+intermandated
+interoperable
+intuitive
+just in time
+leading-edge
+leveraged
+long-term high-impact
+low-risk high-yield
+magnetic
+maintainable
+market positioning
+market-driven
+mission-critical
+multidisciplinary
+multifunctional
+multimedia based
+next-generation
+on-demand
+one-to-one
+open-source
+optimal
+orthogonal
+out-of-the-box
+pandemic
+parallel
+performance based
+plug-and-play
+premier
+premium
+principle-centered
+proactive
+process-centric
+professional
+progressive
+prospective
+quality
+real-time
+reliable
+resource sucking
+resource maximizing
+resource-leveling
+revolutionary
+robust
+scalable
+seamless
+stand-alone
+standardized
+standards compliant
+state of the art
+sticky
+strategic
+superior
+sustainable
+synergistic
+tactical
+team building
+team driven
+technically sound
+timely
+top-line
+transparent
+turnkey
+ubiquitous
+unique
+user-centric
+user friendly
+value-added
+vertical
+viral
+virtual
+visionary
+web-enabled
+wireless
+world-class
+worldwide
\ No newline at end of file
Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt Sat Sep
12 07:21:11 2015
@@ -0,0 +1,34 @@
+appropriately
+assertively
+authoritatively
+collaboratively
+compellingly
+competently
+completely
+continually
+conveniently
+credibly
+distinctively
+dramatically
+dynamically
+efficiently
+energistically
+enthusiastically
+fungibly
+globally
+holisticly
+interactively
+intrinsically
+monotonectally
+objectively
+phosfluorescently
+proactively
+professionally
+progressively
+quickly
+rapidiously
+seamlessly
+synergistically
+uniquely"
+
+
Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt Sat
Sep 12 07:21:11 2015
@@ -0,0 +1,89 @@
+action items
+alignments
+applications
+architectures
+bandwidth
+benefits
+best practices
+catalysts for change
+channels
+clouds
+collaboration and idea-sharing
+communities
+content
+convergence
+core competencies
+customer service
+data
+deliverables
+e-business
+e-commerce
+e-markets
+e-tailers
+e-services
+experiences
+expertise
+functionalities
+fungibility
+growth strategies
+human capital
+ideas
+imperatives
+infomediaries
+information
+infrastructures
+initiatives
+innovation
+intellectual capital
+interfaces
+internal or \"organic\" sources
+leadership
+leadership skills
+manufactured products
+markets
+materials
+meta-services
+methodologies
+methods of empowerment
+metrics
+mindshare
+models
+networks
+niches
+niche markets
+nosql
+opportunities
+\"outside the box\" thinking
+outsourcing
+paradigms
+partnerships
+platforms
+portals
+potentialities
+rocess improvements
+processes
+products
+quality vectors
+relationships
+resources
+results
+ROI
+scenarios
+schemas
+services
+solutions
+sources
+strategic theme areas
+storage
+supply chains
+synergy
+systems
+technologies
+technology
+testing procedures
+total linkage
+users
+value
+vortals
+web-readiness
+web services
\ No newline at end of file
Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt Sat
Sep 12 07:21:11 2015
@@ -0,0 +1,97 @@
+actualize
+administrate
+aggregate
+architect
+benchmark
+brand
+build
+cloudify
+communicate
+conceptualize
+coordinate
+create
+cultivate
+customize
+deliver
+deploy
+develop
+dinintermediate disseminate
+drive
+embrace
+e-enable
+empower
+enable
+engage
+engineer
+enhance
+envisioneer
+evisculate
+evolve
+expedite
+exploit
+extend
+fabricate
+facilitate
+fashion
+formulate
+foster
+generate
+grow
+harness
+impact
+implement
+incentivize
+incubate
+initiate
+innovate
+integrate
+iterate
+leverage existing
+leverage other's
+maintain
+matrix
+maximize
+mesh
+monetize
+morph
+myocardinate
+negotiate
+network
+optimize
+orchestrate
+parallel task
+plagiarize
+pontificate
+predominate
+procrastinate
+productivate
+productize
+promote
+provide access to
+pursue
+recaptiualize
+reconceptualize
+redefine
+re-engineer
+reintermediate
+reinvent
+repurpose
+restore
+revolutionize
+scale
+seize
+simplify
+strategize
+streamline
+supply
+syndicate
+synergize
+synthesize
+target
+transform
+transition
+underwhelm
+unleash
+utilize
+visualize
+whiteboard
\ No newline at end of file