Author: joern
Date: Mon Mar 9 19:57:53 2015
New Revision: 1665334
URL: http://svn.apache.org/r1665334
Log:
OPENNLP-763 Parser is now using the new methods of the POS Tagger and Chunker
for training.
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
(with props)
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java?rev=1665334&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
(added)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
Mon Mar 9 19:57:53 2015
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.parser;
+
+import opennlp.tools.chunker.ChunkerContextGenerator;
+import opennlp.tools.chunker.ChunkerFactory;
+import opennlp.tools.chunker.ChunkerME;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.SequenceValidator;
+
+public class ParserChunkerFactory extends ChunkerFactory {
+
+ @Override
+ public ChunkerContextGenerator getContextGenerator() {
+ return new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE);
+ }
+
+ @Override
+ public SequenceValidator<String> getSequenceValidator() {
+
+ MaxentModel model = (MaxentModel)
artifactProvider.getArtifact("chunker.model");
+
+ String outcomes[] = new String[model.getNumOutcomes()];
+ for (int i = 0; i < outcomes.length; i++) {
+ outcomes[i] = model.getOutcome(i);
+ }
+
+ return new ParserChunkerSequenceValidator(outcomes);
+ }
+
+}
Propchange:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java?rev=1665334&r1=1665333&r2=1665334&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
Mon Mar 9 19:57:53 2015
@@ -20,7 +20,6 @@ package opennlp.tools.parser;
import java.util.HashMap;
import java.util.Map;
-import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.parser.chunking.Parser;
import opennlp.tools.util.SequenceValidator;
@@ -28,12 +27,12 @@ public class ParserChunkerSequenceValida
private Map<String, String> continueStartMap;
- public ParserChunkerSequenceValidator(ChunkerModel model) {
+ public ParserChunkerSequenceValidator(String outcomes[]) {
continueStartMap =
- new HashMap<String, String>(model.getChunkerModel().getNumOutcomes());
- for (int oi=0, on = model.getChunkerModel().getNumOutcomes(); oi<on; oi++)
{
- String outcome = model.getChunkerModel().getOutcome(oi);
+ new HashMap<String, String>(outcomes.length);
+ for (int oi=0, on = outcomes.length; oi<on; oi++) {
+ String outcome = outcomes[oi];
if (outcome.startsWith(Parser.CONT)){
continueStartMap.put(outcome,Parser.START+outcome.substring(
Parser.CONT.length()));
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java?rev=1665334&r1=1665333&r2=1665334&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
Mon Mar 9 19:57:53 2015
@@ -28,6 +28,7 @@ import opennlp.tools.chunker.Chunker;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
@@ -38,6 +39,7 @@ import opennlp.tools.parser.ChunkContext
import opennlp.tools.parser.ChunkSampleStream;
import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParserChunkerFactory;
import opennlp.tools.parser.ParserChunkerSequenceValidator;
import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.parser.ParserModel;
@@ -45,6 +47,7 @@ import opennlp.tools.parser.ParserType;
import opennlp.tools.parser.PosSampleStream;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerFactory;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
@@ -74,11 +77,8 @@ public class Parser extends AbstractBott
public Parser(ParserModel model, int beamSize, double advancePercentage) {
this(model.getBuildModel(), model.getCheckModel(),
- new POSTaggerME(model.getParserTaggerModel(), 10, 0),
- new ChunkerME(model.getParserChunkerModel(),
- ChunkerME.DEFAULT_BEAM_SIZE,
- new ParserChunkerSequenceValidator(model.getParserChunkerModel()),
- new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE)),
+ new POSTaggerME(model.getParserTaggerModel()),
+ new ChunkerME(model.getParserChunkerModel()),
model.getHeadRules(), beamSize, advancePercentage);
}
@@ -287,15 +287,21 @@ public class Parser extends AbstractBott
parseSamples.reset();
// tag
+ TrainingParameters posTaggerParams = mlParams.getParameters("tagger");
+
+ if
(!posTaggerParams.getSettings().containsKey(BeamSearch.BEAM_SIZE_PARAMETER)) {
+ mlParams.put("tagger", BeamSearch.BEAM_SIZE_PARAMETER,
+ Integer.toString(10));
+ }
+
POSModel posModel = POSTaggerME.train(languageCode, new
PosSampleStream(parseSamples),
- mlParams.getParameters("tagger"), null, null);
+ mlParams.getParameters("tagger"), new POSTaggerFactory());
parseSamples.reset();
// chunk
ChunkerModel chunkModel = ChunkerME.train(languageCode,
- new ChunkSampleStream(parseSamples),
- new ChunkContextGenerator(), mlParams.getParameters("chunker"));
+ new ChunkSampleStream(parseSamples),
mlParams.getParameters("chunker"), new ParserChunkerFactory());
parseSamples.reset();
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java?rev=1665334&r1=1665333&r2=1665334&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
Mon Mar 9 19:57:53 2015
@@ -39,6 +39,7 @@ import opennlp.tools.parser.ChunkContext
import opennlp.tools.parser.ChunkSampleStream;
import opennlp.tools.parser.HeadRules;
import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParserChunkerFactory;
import opennlp.tools.parser.ParserChunkerSequenceValidator;
import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.parser.ParserModel;
@@ -46,6 +47,7 @@ import opennlp.tools.parser.ParserType;
import opennlp.tools.parser.PosSampleStream;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerFactory;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
@@ -101,10 +103,7 @@ public class Parser extends AbstractBott
public Parser(ParserModel model, int beamSize, double advancePercentage) {
this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(),
new POSTaggerME(model.getParserTaggerModel()),
- new ChunkerME(model.getParserChunkerModel(),
- ChunkerME.DEFAULT_BEAM_SIZE,
- new ParserChunkerSequenceValidator(model.getParserChunkerModel()),
- new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE)),
+ new ChunkerME(model.getParserChunkerModel()),
model.getHeadRules(),
beamSize, advancePercentage);
}
@@ -445,13 +444,13 @@ public class Parser extends AbstractBott
// tag
POSModel posModel = POSTaggerME.train(languageCode, new PosSampleStream(
- parseSamples), mlParams.getParameters("tagger"), null, null);
+ parseSamples), mlParams.getParameters("tagger"), new
POSTaggerFactory());
parseSamples.reset();
// chunk
ChunkerModel chunkModel = ChunkerME.train(languageCode, new
ChunkSampleStream(
- parseSamples), new ChunkContextGenerator(),
mlParams.getParameters("chunker"));
+ parseSamples), mlParams.getParameters("chunker"), new
ParserChunkerFactory());
parseSamples.reset();