Author: tommaso
Date: Sat Sep 12 07:21:11 2015
New Revision: 1702594

URL: http://svn.apache.org/r1702594
Log:
OPENNLP-817 - added a CFG runner (with samples), added pcfg parse rules / cfg 
capabilities

Added:
    
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
    opennlp/sandbox/nlp-utils/src/main/resources/
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/adj.txt
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/adv.txt
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/noun.txt
    opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/wn/verb.txt
    opennlp/sandbox/nlp-utils/src/test/resources/it-tb-news.txt
Modified:
    opennlp/sandbox/nlp-utils/   (props changed)
    
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
    
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java

Propchange: opennlp/sandbox/nlp-utils/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sat Sep 12 07:21:11 2015
@@ -0,0 +1 @@
+*.iml

Added: 
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java?rev=1702594&view=auto
==============================================================================
--- 
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
 (added)
+++ 
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
 Sat Sep 12 07:21:11 2015
@@ -0,0 +1,162 @@
+package org.apache.opennlp.utils.cfg;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.security.SecureRandom;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+/**
+ * Runner for {@link ContextFreeGrammar}s
+ */
+public class CFGRunner {
+
+    public static void main(String[] args) throws Exception {
+        CFGBuilder builder = new CFGBuilder();
+
+        Arrays.sort(args);
+        boolean useWn = Arrays.binarySearch(args, "-wn") >= 0;
+
+        Collection<String> adverbsCollection;
+        Collection<String> verbsCollection;
+        Collection<String> adjectivesCollection;
+        Collection<String> nounsCollection;
+        if (useWn) {
+            adverbsCollection = getTokens("/opennlp/cfg/wn/adv.txt");
+            adjectivesCollection = getTokens("/opennlp/cfg/wn/adj.txt");
+            nounsCollection = getTokens("/opennlp/cfg/wn/noun.txt");
+            verbsCollection = getTokens("/opennlp/cfg/wn/verb.txt");
+        } else {
+            adverbsCollection = getTokens("/opennlp/cfg/an/adv.txt");
+            adjectivesCollection = getTokens("/opennlp/cfg/an/adj.txt");
+            nounsCollection = getTokens("/opennlp/cfg/an/noun.txt");
+            verbsCollection = getTokens("/opennlp/cfg/an/verb.txt");
+        }
+
+        Collection<String> terminals = new LinkedList<>();
+        terminals.addAll(adverbsCollection);
+        terminals.addAll(verbsCollection);
+        terminals.addAll(adjectivesCollection);
+        terminals.addAll(nounsCollection);
+
+        builder.withTerminals(terminals);
+
+        Collection<String> nonTerminals = new LinkedList<String>();
+        String startSymbol = "START_SYMBOL";
+        nonTerminals.add(startSymbol);
+        nonTerminals.add("NP");
+        nonTerminals.add("NN");
+        nonTerminals.add("Adv");
+        nonTerminals.add("Adj");
+        nonTerminals.add("VP");
+        nonTerminals.add("Vb");
+        builder.withNonTerminals(nonTerminals);
+
+        builder.withStartSymbol(startSymbol);
+
+        Collection<Rule> rules = new LinkedList<Rule>();
+        rules.add(new Rule(startSymbol, "VP", "NP"));
+        rules.add(new Rule("VP", "Adv", "Vb"));
+        rules.add(new Rule("NP", "Adj", "NN"));
+
+        for (String v : verbsCollection) {
+            rules.add(new Rule("Vb", v));
+        }
+        for (String adj : adjectivesCollection) {
+            rules.add(new Rule("Adj", adj));
+        }
+        for (String n : nounsCollection) {
+            rules.add(new Rule("NN", n));
+        }
+        for (String adv : adverbsCollection) {
+            rules.add(new Rule("Adv", adv));
+        }
+        builder.withRules(rules);
+        ContextFreeGrammar cfg = builder.withRandomExpansion(true).build();
+        String[] sentence = cfg.leftMostDerivation(startSymbol);
+        String toString = Arrays.toString(sentence);
+
+        if (toString.length() > 0) {
+            System.out.println(toString.substring(1, toString.length() - 
1).replaceAll(",", ""));
+        }
+
+        boolean pt = Arrays.binarySearch(args, "-pt") >= 0;
+
+        if (pt) {
+            Map<Rule, Double> rulesMap = new HashMap<>();
+            rulesMap.put(new Rule(startSymbol, "VP", "NP"), 1d);
+            rulesMap.put(new Rule("VP", "Adv", "Vb"), 1d);
+            rulesMap.put(new Rule("NP", "Adj", "NN"), 1d);
+
+            SecureRandom secureRandom = new SecureRandom();
+
+            double remainingP = 1d;
+            for (String v : verbsCollection) {
+                double p = (double) secureRandom.nextInt(1000) / 1001d;
+                if (rulesMap.size() == verbsCollection.size() - 1) {
+                    p = remainingP;
+                }
+                if (remainingP - p <= 0) {
+                    p /= 10;
+                }
+                rulesMap.put(new Rule("Vb", v), p);
+                remainingP -= p;
+            }
+            for (String a : adjectivesCollection) {
+                double p = (double) secureRandom.nextInt(1000) / 1001d;
+                if (rulesMap.size() == adjectivesCollection.size() - 1) {
+                    p = remainingP;
+                }
+                if (remainingP - p <= 0) {
+                    p /= 10;
+                }
+                rulesMap.put(new Rule("Adj", a), p);
+                remainingP -= p;
+            }
+            for (String n : nounsCollection) {
+                double p = (double) secureRandom.nextInt(1000) / 1001d;
+                if (rulesMap.size() == nounsCollection.size() - 1) {
+                    p = remainingP;
+                } else if (remainingP - p <= 0) {
+                    p /= 10;
+                }
+                rulesMap.put(new Rule("NN", n), p);
+                remainingP -= p;
+            }
+            for (String a : adverbsCollection) {
+                double p = (double) secureRandom.nextInt(1000) / 1001d;
+                if (rulesMap.size() == adverbsCollection.size() - 1) {
+                    p = remainingP;
+                }
+                if (remainingP - p <= 0) {
+                    p /= 10;
+                }
+                rulesMap.put(new Rule("Adv", a), p);
+                remainingP -= p;
+            }
+            ProbabilisticContextFreeGrammar pcfg = new 
ProbabilisticContextFreeGrammar(cfg.getNonTerminalSymbols(), 
cfg.getTerminalSymbols(),
+                    rulesMap, startSymbol, true);
+            ProbabilisticContextFreeGrammar.ParseTree parseTree = 
pcfg.cky(Arrays.asList(sentence));
+            System.out.println(parseTree);
+        }
+    }
+
+    private static Collection<String> getTokens(String s) throws IOException {
+        Collection<String> tokens = new LinkedList<>();
+        InputStream resourceStream = CFGRunner.class.getResourceAsStream(s);
+        BufferedReader bufferedReader = new BufferedReader(new 
InputStreamReader(resourceStream));
+        String line;
+        while ((line = bufferedReader.readLine()) != null) {
+            tokens.add(line);
+        }
+        bufferedReader.close();
+        resourceStream.close();
+        return tokens;
+    }
+
+}

Modified: 
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java?rev=1702594&r1=1702593&r2=1702594&view=diff
==============================================================================
--- 
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
 (original)
+++ 
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
 Sat Sep 12 07:21:11 2015
@@ -19,11 +19,16 @@
 package org.apache.opennlp.utils.cfg;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * a probabilistic CFG
@@ -36,6 +41,17 @@ public class ProbabilisticContextFreeGra
   private final String startSymbol;
   private boolean randomExpansion;
 
+  private static final Rule emptyRule = new Rule("E", "");
+
+  private static final String nonTerminalMatcher = 
"[\\w\\~\\*\\-\\.\\,\\'\\:\\_\\\"]";
+  private static final String terminalMatcher = "[òà
ùìèé\\|\\w\\'\\.\\,\\:\\_Ù\\?È\\%\\;À\\-\\\"]";
+
+  private static final Pattern terminalPattern = 
Pattern.compile("\\(("+nonTerminalMatcher+"+)\\s("+terminalMatcher+"+)\\)");
+  private static final Pattern nonTerminalPattern = Pattern.compile(
+          "\\(("+nonTerminalMatcher+"+)" + // source NT
+                  
"\\s("+nonTerminalMatcher+"+)(\\s("+nonTerminalMatcher+"+))*\\)" // expansion 
NTs
+  );
+
   public ProbabilisticContextFreeGrammar(Collection<String> 
nonTerminalSymbols, Collection<String> terminalSymbols,
                                          Map<Rule, Double> rules, String 
startSymbol, boolean randomExpansion) {
 
@@ -119,32 +135,32 @@ public class ProbabilisticContextFreeGra
     }
   }
 
-  public BackPointer pi(List<String> sentence, int i, int j, String x) {
-    BackPointer backPointer = new BackPointer(0, 0, null);
+  public ParseTree pi(List<String> sentence, int i, int j, String x) {
+    ParseTree parseTree = new ParseTree(0, 0, null);
     if (i == j) {
       Rule rule = new Rule(x, sentence.get(i));
       double q = q(rule);
-      backPointer = new BackPointer(q, i, rule);
+      parseTree = new ParseTree(q, i, rule);
     } else {
       double max = 0;
       for (Rule rule : getNTRules()) {
         for (int s = i; s < j; s++) {
           double q = q(rule);
-          BackPointer left = pi(sentence, i, s, rule.getExpansion()[0]);
-          BackPointer right = pi(sentence, s + 1, j, rule.getExpansion()[1]);
+          ParseTree left = pi(sentence, i, s, rule.getExpansion()[0]);
+          ParseTree right = pi(sentence, s + 1, j, rule.getExpansion()[1]);
           double cp = q * left.getProbability() * right.getProbability();
           if (cp > max) {
             max = cp;
-            backPointer = new BackPointer(max, s, rule, left, right);
+            parseTree = new ParseTree(max, s, rule, left, right);
           }
         }
       }
     }
-    return backPointer;
+    return parseTree;
   }
 
-  public BackPointer cky(List<String> sentence) {
-    BackPointer backPointer = null;
+  public ParseTree cky(List<String> sentence) {
+    ParseTree parseTree = null;
 
     int n = sentence.size();
     for (int l = 1; l < n; l++) {
@@ -155,25 +171,26 @@ public class ProbabilisticContextFreeGra
           for (Rule r : getRulesForNonTerminal(x)) {
             for (int s = i; s < j - 1; s++) {
               double q = q(r);
-              BackPointer left = pi(sentence, i, s, r.getExpansion()[0]);
-              BackPointer right = pi(sentence, s + 1, j, r.getExpansion()[1]);
+              ParseTree left = pi(sentence, i, s, r.getExpansion()[0]);
+              ParseTree right = pi(sentence, s + 1, j, r.getExpansion()[1]);
               double cp = q * left.getProbability() * right.getProbability();
               if (cp > max) {
                 max = cp;
-                backPointer = new BackPointer(max, s, r, left, right);
+                parseTree = new ParseTree(max, s, r, left, right);
               }
             }
           }
         }
       }
     }
-    return backPointer;
+    return parseTree;
   }
 
   private Collection<Rule> getRulesForNonTerminal(String x) {
     LinkedList<Rule> ntRules = new LinkedList<Rule>();
     for (Rule r : rules.keySet()) {
-      if (x.equals(r.getEntry()) && 
nonTerminalSymbols.contains(r.getExpansion()[0]) && 
nonTerminalSymbols.contains(r.getExpansion()[1])) {
+      String[] expansion = r.getExpansion();
+      if (expansion.length == 2 && x.equals(r.getEntry()) && 
nonTerminalSymbols.contains(expansion[0]) && 
nonTerminalSymbols.contains(expansion[1])) {
         ntRules.add(r);
       }
     }
@@ -183,7 +200,8 @@ public class ProbabilisticContextFreeGra
   private Collection<Rule> getNTRules() {
     Collection<Rule> ntRules = new LinkedList<Rule>();
     for (Rule r : rules.keySet()) {
-      if (nonTerminalSymbols.contains(r.getExpansion()[0]) && 
nonTerminalSymbols.contains(r.getExpansion()[1])) {
+      String[] expansion = r.getExpansion();
+      if (expansion.length == 2 && nonTerminalSymbols.contains(expansion[0]) 
&& nonTerminalSymbols.contains(expansion[1])) {
         ntRules.add(r);
       }
     }
@@ -194,21 +212,21 @@ public class ProbabilisticContextFreeGra
     return rules.keySet().contains(rule) ? rules.get(rule) : 0;
   }
 
-  public class BackPointer {
+  public class ParseTree {
 
     private final double probability;
     private final int splitPoint;
     private final Rule rule;
-    private BackPointer leftTree;
-    private BackPointer rightTree;
+    private ParseTree leftTree;
+    private ParseTree rightTree;
 
-    private BackPointer(double probability, int splitPoint, Rule rule) {
+    private ParseTree(double probability, int splitPoint, Rule rule) {
       this.probability = probability;
       this.splitPoint = splitPoint;
       this.rule = rule;
     }
 
-    public BackPointer(double probability, int splitPoint, Rule rule, 
BackPointer leftTree, BackPointer rightTree) {
+    public ParseTree(double probability, int splitPoint, Rule rule, ParseTree 
leftTree, ParseTree rightTree) {
       this.probability = probability;
       this.splitPoint = splitPoint;
       this.rule = rule;
@@ -228,24 +246,146 @@ public class ProbabilisticContextFreeGra
       return rule;
     }
 
-    public BackPointer getLeftTree() {
+    public ParseTree getLeftTree() {
       return leftTree;
     }
 
-    public BackPointer getRightTree() {
+    public ParseTree getRightTree() {
       return rightTree;
     }
 
     @Override
     public String toString() {
-      return "(" +
-              rule.getEntry() + " " +
-              (leftTree != null && rightTree != null ?
-                      leftTree.toString() + " " + rightTree.toString() :
-                      rule.getExpansion()[0]
-              ) +
-              ')';
+      if (getRule() != emptyRule) {
+        return "(" +
+                rule.getEntry() + " " +
+                (leftTree != null && rightTree != null ?
+                        leftTree.toString() + " " + rightTree.toString() :
+                        rule.getExpansion()[0]
+                ) +
+                ')';
+      } else {
+        return "";
+      }
     }
+
+  }
+
+  public static Map<Rule, Double> parseRules(String... parseTreeString) {
+    Map<Rule, Double> rules = new HashMap<>();
+    parseRules(rules, false, parseTreeString);
+    return rules;
+  }
+
+  public static void parseRules(Map<Rule, Double> rules, boolean trim, 
String... parseStrings) {
+    parseGrammar(rules, "S", trim, parseStrings);
   }
 
+  public static ProbabilisticContextFreeGrammar parseGrammar(boolean trim, 
String... parseTreeStrings) {
+    return parseGrammar(new HashMap<Rule, Double>(), "S", trim, 
parseTreeStrings);
+  }
+
+  public static ProbabilisticContextFreeGrammar parseGrammar(String... 
parseTreeStrings) {
+    return parseGrammar(new HashMap<Rule, Double>(), "S", true, 
parseTreeStrings);
+  }
+
+  public static ProbabilisticContextFreeGrammar parseGrammar(Map<Rule, Double> 
rulesMap, String startSymbol, boolean trim, String... parseStrings) {
+
+    Map<Rule, Double> rules = new HashMap<>();
+
+    Collection<String> nonTerminals = new HashSet<>();
+    Collection<String> terminals = new HashSet<>();
+
+    for (String parseTreeString : parseStrings) {
+
+      if (trim) {
+        parseTreeString = parseTreeString.replaceAll("\n", 
"").replaceAll("\t", "").replaceAll("\\s+", " ");
+      }
+
+      String toConsume = String.valueOf(parseTreeString);
+
+      Matcher m = terminalPattern.matcher(parseTreeString);
+      while (m.find()) {
+        String nt = m.group(1);
+        String t = m.group(2);
+        Rule key = new Rule(nt, t);
+        if (!rules.containsKey(key)) {
+          rules.put(key, 1d);
+          terminals.add(t);
+//          System.err.println(key);
+        }
+        toConsume = toConsume.replace(m.group(), nt);
+      }
+
+      while (toConsume.contains(" ") && !toConsume.trim().equals("( " + 
startSymbol + " )")) {
+        Matcher m2 = nonTerminalPattern.matcher(toConsume);
+        while (m2.find()) {
+          String nt = m2.group(1);
+          String t1 = m2.group(2);
+          String t2 = m2.group(3);
+
+          Rule key;
+          if (t2 != null) {
+            String[] t2s = t2.trim().split(" ");
+            String[] nts = new String[t2s.length + 1];
+            nts[0] = t1;
+            System.arraycopy(t2s, 0, nts, 1, t2s.length);
+            key = new Rule(nt, nts);
+            nonTerminals.addAll(Arrays.asList(nts));
+          } else {
+            key = new Rule(nt, t1);
+            nonTerminals.add(t1);
+          }
+          nonTerminals.add(key.getEntry());
+
+          if (!rules.containsKey(key)) {
+            rules.put(key, 1d);
+//            startSymbol = key.getEntry();
+//            System.err.println(key);
+          }
+          toConsume = toConsume.replace(m2.group(), nt);
+        }
+      }
+    }
+
+    // TODO : check/adjust rules to make them respect CNF
+    // TODO : adjust probabilities based on term frequencies
+    for (Map.Entry<Rule, Double> entry : rules.entrySet()) {
+      normalize(entry.getKey(), nonTerminals, terminals, rulesMap);
+    }
+
+    return new ProbabilisticContextFreeGrammar(nonTerminals, terminals, 
rulesMap, startSymbol, true);
+  }
+
+  private static void normalize(Rule rule, Collection<String> nonTerminals, 
Collection<String> terminals, Map<Rule, Double> rulesMap) {
+    String[] expansion = rule.getExpansion();
+    if (expansion.length == 1) {
+      if (!terminals.contains(expansion[0])) {
+        if (nonTerminals.contains(expansion[0])) {
+          // nt1 -> nt2 should be expanded in nt1 -> nt2,E
+          rulesMap.put(new Rule(rule.getEntry(), expansion[0], 
emptyRule.getEntry()), 1d);
+          if (rulesMap.containsKey(emptyRule)) {
+            rulesMap.put(emptyRule, 1d);
+          }
+        } else {
+          throw new RuntimeException("rule "+rule+" expands to neither a 
terminal or non terminal");
+        }
+      } else {
+        rulesMap.put(rule, 1d);
+      }
+    } else if (expansion.length > 2){
+      // nt1 -> nt2,nt3,...,ntn should be collapsed to a hierarchy of ntX -> 
ntY,ntZ rules
+      String nt2 = expansion[0];
+      int seed = nonTerminals.size();
+      String generatedNT = "GEN~" + seed;
+      nonTerminals.add(generatedNT);
+      Rule newRule = new Rule(rule.getEntry(), nt2, generatedNT);
+      rulesMap.put(newRule, 1d);
+      Rule chainedRule = new Rule(generatedNT, Arrays.copyOfRange(expansion, 
1, expansion.length - 1));
+      rulesMap.put(chainedRule, 1d);
+      normalize(chainedRule, nonTerminals, terminals, rulesMap);
+    } else {
+      rulesMap.put(rule, 1d);
+    }
+  }
 }

Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adj.txt Sat Sep 
12 07:21:11 2015
@@ -0,0 +1,157 @@
+24/7
+24/365
+accurate
+adaptive
+alternative
+an expanded array of
+B2B
+B2C
+backend
+backward-compatible
+best-of-breed
+bleeding-edge
+bricks-and-clicks
+business
+clicks-and-mortar
+client-based
+client-centered
+client-centric
+client-focused
+collaborative
+compelling
+competitive
+cooperative
+corporate
+cost effective
+covalent
+cross functional
+cross-media
+cross-platform
+cross-unit
+customer directed
+customized
+cutting-edge
+distinctive
+distributed
+diverse
+dynamic
+e-business
+economically sound
+effective
+efficient
+elastic
+emerging
+empowered
+enabled
+end-to-end
+enterprise
+enterprise-wide
+equity invested
+error-free
+ethical
+excellent
+exceptional
+extensible
+extensive
+flexible
+focused
+frictionless
+front-end
+fully researched
+fully tested
+functional
+functionalized
+fungible
+future-proof
+global
+go forward
+goal-oriented
+granular
+high standards in
+high-payoff
+hyperscale
+high-quality
+highly efficient
+holistic
+impactful
+inexpensive
+innovative
+installed base
+integrated
+interactive
+interdependent
+intermandated
+interoperable
+intuitive
+just in time
+leading-edge
+leveraged
+long-term high-impact
+low-risk high-yield
+magnetic
+maintainable
+market positioning
+market-driven
+mission-critical
+multidisciplinary
+multifunctional
+multimedia based
+next-generation
+on-demand
+one-to-one
+open-source
+optimal
+orthogonal
+out-of-the-box
+pandemic
+parallel
+performance based
+plug-and-play
+premier
+premium
+principle-centered
+proactive
+process-centric
+professional
+progressive
+prospective
+quality
+real-time
+reliable
+resource sucking
+resource maximizing
+resource-leveling
+revolutionary
+robust
+scalable
+seamless
+stand-alone
+standardized
+standards compliant
+state of the art
+sticky
+strategic
+superior
+sustainable
+synergistic
+tactical
+team building
+team driven
+technically sound
+timely
+top-line
+transparent
+turnkey
+ubiquitous
+unique
+user-centric
+user friendly
+value-added
+vertical
+viral
+virtual
+visionary
+web-enabled
+wireless
+world-class
+worldwide
\ No newline at end of file

Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/adv.txt Sat Sep 
12 07:21:11 2015
@@ -0,0 +1,34 @@
+appropriately
+assertively
+authoritatively
+collaboratively
+compellingly
+competently
+completely
+continually
+conveniently
+credibly
+distinctively
+dramatically
+dynamically
+efficiently
+energistically
+enthusiastically
+fungibly
+globally
+holisticly
+interactively
+intrinsically
+monotonectally
+objectively
+phosfluorescently
+proactively
+professionally
+progressively
+quickly
+rapidiously
+seamlessly
+synergistically
+uniquely"
+
+

Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/noun.txt Sat 
Sep 12 07:21:11 2015
@@ -0,0 +1,89 @@
+action items
+alignments
+applications
+architectures
+bandwidth
+benefits
+best practices
+catalysts for change
+channels
+clouds
+collaboration and idea-sharing
+communities
+content
+convergence
+core competencies
+customer service
+data
+deliverables
+e-business
+e-commerce
+e-markets
+e-tailers
+e-services
+experiences
+expertise
+functionalities
+fungibility
+growth strategies
+human capital
+ideas
+imperatives
+infomediaries
+information
+infrastructures
+initiatives
+innovation
+intellectual capital
+interfaces
+internal or \"organic\" sources
+leadership
+leadership skills
+manufactured products
+markets
+materials
+meta-services
+methodologies
+methods of empowerment
+metrics
+mindshare
+models
+networks
+niches
+niche markets
+nosql
+opportunities
+\"outside the box\" thinking
+outsourcing
+paradigms
+partnerships
+platforms
+portals
+potentialities
+rocess improvements
+processes
+products
+quality vectors
+relationships
+resources
+results
+ROI
+scenarios
+schemas
+services
+solutions
+sources
+strategic theme areas
+storage
+supply chains
+synergy
+systems
+technologies
+technology
+testing procedures
+total linkage
+users
+value
+vortals
+web-readiness
+web services
\ No newline at end of file

Added: opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt?rev=1702594&view=auto
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt (added)
+++ opennlp/sandbox/nlp-utils/src/main/resources/opennlp/cfg/an/verb.txt Sat 
Sep 12 07:21:11 2015
@@ -0,0 +1,97 @@
+actualize
+administrate
+aggregate
+architect
+benchmark
+brand
+build
+cloudify
+communicate
+conceptualize
+coordinate
+create
+cultivate
+customize
+deliver
+deploy
+develop
+dinintermediate disseminate
+drive
+embrace
+e-enable
+empower
+enable
+engage
+engineer
+enhance
+envisioneer
+evisculate
+evolve
+expedite
+exploit
+extend
+fabricate
+facilitate
+fashion
+formulate
+foster
+generate
+grow
+harness
+impact
+implement
+incentivize
+incubate
+initiate
+innovate
+integrate
+iterate
+leverage existing
+leverage other's
+maintain
+matrix
+maximize
+mesh
+monetize
+morph
+myocardinate
+negotiate
+network
+optimize
+orchestrate
+parallel task
+plagiarize
+pontificate
+predominate
+procrastinate
+productivate
+productize
+promote
+provide access to
+pursue
+recaptiualize
+reconceptualize
+redefine
+re-engineer
+reintermediate
+reinvent
+repurpose
+restore
+revolutionize
+scale
+seize
+simplify
+strategize
+streamline
+supply
+syndicate
+synergize
+synthesize
+target
+transform
+transition
+underwhelm
+unleash
+utilize
+visualize
+whiteboard
\ No newline at end of file


Reply via email to