Author: joern
Date: Mon Mar  9 19:57:53 2015
New Revision: 1665334

URL: http://svn.apache.org/r1665334
Log:
OPENNLP-763 Parser is now using the new methods of the POS Tagger and Chunker 
for training.

Added:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
   (with props)
Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java

Added: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java?rev=1665334&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
 Mon Mar  9 19:57:53 2015
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.parser;
+
+import opennlp.tools.chunker.ChunkerContextGenerator;
+import opennlp.tools.chunker.ChunkerFactory;
+import opennlp.tools.chunker.ChunkerME;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.SequenceValidator;
+
+public class ParserChunkerFactory extends ChunkerFactory {
+
+  @Override
+  public ChunkerContextGenerator getContextGenerator() {
+    return new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE);
+  }
+  
+  @Override
+  public SequenceValidator<String> getSequenceValidator() {
+    
+    MaxentModel model = (MaxentModel) 
artifactProvider.getArtifact("chunker.model");
+    
+    String outcomes[] = new String[model.getNumOutcomes()];
+    for (int i = 0; i < outcomes.length; i++) {
+      outcomes[i] = model.getOutcome(i);
+    }
+    
+    return new ParserChunkerSequenceValidator(outcomes);
+  }
+  
+}

Propchange: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java?rev=1665334&r1=1665333&r2=1665334&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
 Mon Mar  9 19:57:53 2015
@@ -20,7 +20,6 @@ package opennlp.tools.parser;
 import java.util.HashMap;
 import java.util.Map;
 
-import opennlp.tools.chunker.ChunkerModel;
 import opennlp.tools.parser.chunking.Parser;
 import opennlp.tools.util.SequenceValidator;
 
@@ -28,12 +27,12 @@ public class ParserChunkerSequenceValida
 
   private Map<String, String> continueStartMap;
 
-  public ParserChunkerSequenceValidator(ChunkerModel model) {
+  public ParserChunkerSequenceValidator(String outcomes[]) {
 
     continueStartMap =
-        new HashMap<String, String>(model.getChunkerModel().getNumOutcomes());
-    for (int oi=0, on = model.getChunkerModel().getNumOutcomes(); oi<on; oi++) 
{
-      String outcome = model.getChunkerModel().getOutcome(oi);
+        new HashMap<String, String>(outcomes.length);
+    for (int oi=0, on = outcomes.length; oi<on; oi++) {
+      String outcome = outcomes[oi];
       if (outcome.startsWith(Parser.CONT)){
         continueStartMap.put(outcome,Parser.START+outcome.substring(
             Parser.CONT.length()));

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java?rev=1665334&r1=1665333&r2=1665334&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
 Mon Mar  9 19:57:53 2015
@@ -28,6 +28,7 @@ import opennlp.tools.chunker.Chunker;
 import opennlp.tools.chunker.ChunkerME;
 import opennlp.tools.chunker.ChunkerModel;
 import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.ml.BeamSearch;
 import opennlp.tools.ml.model.AbstractModel;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
@@ -38,6 +39,7 @@ import opennlp.tools.parser.ChunkContext
 import opennlp.tools.parser.ChunkSampleStream;
 import opennlp.tools.parser.HeadRules;
 import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParserChunkerFactory;
 import opennlp.tools.parser.ParserChunkerSequenceValidator;
 import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.parser.ParserModel;
@@ -45,6 +47,7 @@ import opennlp.tools.parser.ParserType;
 import opennlp.tools.parser.PosSampleStream;
 import opennlp.tools.postag.POSModel;
 import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.postag.POSTaggerME;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Span;
@@ -74,11 +77,8 @@ public class Parser extends AbstractBott
 
   public Parser(ParserModel model, int beamSize, double advancePercentage) {
     this(model.getBuildModel(), model.getCheckModel(),
-        new POSTaggerME(model.getParserTaggerModel(), 10, 0),
-        new ChunkerME(model.getParserChunkerModel(),
-            ChunkerME.DEFAULT_BEAM_SIZE,
-            new ParserChunkerSequenceValidator(model.getParserChunkerModel()),
-            new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE)),
+        new POSTaggerME(model.getParserTaggerModel()),
+        new ChunkerME(model.getParserChunkerModel()),
             model.getHeadRules(), beamSize, advancePercentage);
   }
 
@@ -287,15 +287,21 @@ public class Parser extends AbstractBott
     parseSamples.reset();
 
     // tag
+    TrainingParameters posTaggerParams = mlParams.getParameters("tagger");
+        
+    if 
(!posTaggerParams.getSettings().containsKey(BeamSearch.BEAM_SIZE_PARAMETER)) {
+      mlParams.put("tagger", BeamSearch.BEAM_SIZE_PARAMETER,
+          Integer.toString(10));
+    }
+    
     POSModel posModel = POSTaggerME.train(languageCode, new 
PosSampleStream(parseSamples),
-        mlParams.getParameters("tagger"), null, null);
+        mlParams.getParameters("tagger"), new POSTaggerFactory());
 
     parseSamples.reset();
 
     // chunk
     ChunkerModel chunkModel = ChunkerME.train(languageCode,
-        new ChunkSampleStream(parseSamples),
-        new ChunkContextGenerator(), mlParams.getParameters("chunker"));
+        new ChunkSampleStream(parseSamples), 
mlParams.getParameters("chunker"), new ParserChunkerFactory());
 
     parseSamples.reset();
 

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java?rev=1665334&r1=1665333&r2=1665334&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
 Mon Mar  9 19:57:53 2015
@@ -39,6 +39,7 @@ import opennlp.tools.parser.ChunkContext
 import opennlp.tools.parser.ChunkSampleStream;
 import opennlp.tools.parser.HeadRules;
 import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParserChunkerFactory;
 import opennlp.tools.parser.ParserChunkerSequenceValidator;
 import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.parser.ParserModel;
@@ -46,6 +47,7 @@ import opennlp.tools.parser.ParserType;
 import opennlp.tools.parser.PosSampleStream;
 import opennlp.tools.postag.POSModel;
 import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.postag.POSTaggerME;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.TrainingParameters;
@@ -101,10 +103,7 @@ public class Parser extends AbstractBott
   public Parser(ParserModel model, int beamSize, double advancePercentage) {
     this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(),
         new POSTaggerME(model.getParserTaggerModel()),
-        new ChunkerME(model.getParserChunkerModel(),
-        ChunkerME.DEFAULT_BEAM_SIZE,
-        new ParserChunkerSequenceValidator(model.getParserChunkerModel()),
-        new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE)),
+        new ChunkerME(model.getParserChunkerModel()),
         model.getHeadRules(),
         beamSize, advancePercentage);
   }
@@ -445,13 +444,13 @@ public class Parser extends AbstractBott
 
     // tag
     POSModel posModel = POSTaggerME.train(languageCode, new PosSampleStream(
-        parseSamples), mlParams.getParameters("tagger"), null, null);
+        parseSamples), mlParams.getParameters("tagger"), new 
POSTaggerFactory());
 
     parseSamples.reset();
 
     // chunk
     ChunkerModel chunkModel = ChunkerME.train(languageCode, new 
ChunkSampleStream(
-        parseSamples), new ChunkContextGenerator(), 
mlParams.getParameters("chunker"));
+        parseSamples), mlParams.getParameters("chunker"), new 
ParserChunkerFactory());
 
     parseSamples.reset();
 


Reply via email to