Adding sentiment analysis code to OpenNLP: OPENNLP-840
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/56321aab Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/56321aab Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/56321aab Branch: refs/heads/master Commit: 56321aab51a470cd2004b76fb1f5330881b943c1 Parents: 8d7e1c3 Author: Menshikova <[email protected]> Authored: Thu Jun 15 09:59:19 2017 -0700 Committer: Menshikova <[email protected]> Committed: Wed Jun 21 14:06:41 2017 -0700 ---------------------------------------------------------------------- .../sentiment/SentimentCrossValidatorTool.java | 126 ++++++++++ .../SentimentDetailedFMeasureListener.java | 43 ++++ .../SentimentEvaluationErrorListener.java | 65 +++++ .../sentiment/SentimentEvaluatorTool.java | 154 ++++++++++++ .../cmdline/sentiment/SentimentModelLoader.java | 51 ++++ .../cmdline/sentiment/SentimentTrainerTool.java | 115 +++++++++ .../formats/SentimentSampleStreamFactory.java | 83 +++++++ .../java/opennlp/tools/sentiment/Sentiment.java | 30 +++ .../sentiment/SentimentContextGenerator.java | 83 +++++++ .../sentiment/SentimentCrossValidator.java | 240 +++++++++++++++++++ .../sentiment/SentimentEvaluationMonitor.java | 28 +++ .../tools/sentiment/SentimentEvaluator.java | 67 ++++++ .../tools/sentiment/SentimentEventStream.java | 80 +++++++ .../tools/sentiment/SentimentFactory.java | 73 ++++++ .../opennlp/tools/sentiment/SentimentME.java | 163 +++++++++++++ .../opennlp/tools/sentiment/SentimentModel.java | 124 ++++++++++ .../tools/sentiment/SentimentSample.java | 92 +++++++ .../tools/sentiment/SentimentSampleStream.java | 76 ++++++ .../sentiment/SentimentSampleTypeFilter.java | 68 ++++++ .../tools/sentiment/AbstractSentimentTest.java | 77 ++++++ .../sentiment/SentimentCrossValidatorTest.java | 35 +++ .../tools/sentiment/SentimentEvaluatorTest.java | 89 +++++++ .../sentiment/SentimentEventStreamTest.java | 46 ++++ .../tools/sentiment/SentimentMETest.java | 107 +++++++++ .../tools/sentiment/SentimentSampleTest.java | 45 ++++ .../tools/sentiment/en-netflix-sentiment.bin | Bin 0 -> 465780 bytes .../tools/sentiment/en-stanford-sentiment.bin | Bin 0 -> 664663 bytes .../opennlp/tools/sentiment/ht-lg-model-raw.bin | Bin 0 -> 347428 bytes .../tools/sentiment/ht-sentiment-bin.bin | Bin 0 -> 82083 bytes .../tools/sentiment/ht-sentiment-categ.bin | Bin 0 -> 161961 bytes .../opennlp/tools/sentiment/sample_train_categ | 100 ++++++++ .../opennlp/tools/sentiment/sample_train_categ2 | 100 ++++++++ 32 files changed, 2360 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java new file mode 100755 index 0000000..05035a4 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.cmdline.sentiment; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; + +import opennlp.tools.cmdline.AbstractCrossValidatorTool; +import opennlp.tools.cmdline.CmdLineUtil; +import opennlp.tools.cmdline.TerminateToolException; +import opennlp.tools.cmdline.params.BasicTrainingParams; +import opennlp.tools.cmdline.params.CVParams; +import opennlp.tools.cmdline.params.DetailedFMeasureEvaluatorParams; +import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool.CVToolParams; +import opennlp.tools.sentiment.SentimentCrossValidator; +import opennlp.tools.sentiment.SentimentEvaluationMonitor; +import opennlp.tools.sentiment.SentimentFactory; +import opennlp.tools.sentiment.SentimentSample; +import opennlp.tools.util.eval.EvaluationMonitor; +import opennlp.tools.util.model.ModelUtil; + +/** + * Class for helping perform cross validation on the Sentiment Analysis Parser. + */ +public class SentimentCrossValidatorTool + extends AbstractCrossValidatorTool<SentimentSample, CVToolParams> { + + /** + * Interface for parameters + */ + interface CVToolParams + extends BasicTrainingParams, CVParams, DetailedFMeasureEvaluatorParams { + + } + + /** + * Constructor + */ + public SentimentCrossValidatorTool() { + super(SentimentSample.class, CVToolParams.class); + } + + /** + * Returns the short description of the tool + * + * @return short description + */ + public String getShortDescription() { + return "K-fold cross validator for the learnable Sentiment Analysis Parser"; + } + + /** + * Runs the tool + * + * @param format + * the format to be used + * @param args + * the arguments + */ + public void run(String format, String[] args) { + super.run(format, args); + + mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); + if (mlParams == null) { + mlParams = ModelUtil.createDefaultTrainingParameters(); + } + + List<EvaluationMonitor<SentimentSample>> listeners = new LinkedList<EvaluationMonitor<SentimentSample>>(); + if (params.getMisclassified()) { + listeners.add(new SentimentEvaluationErrorListener()); + } + SentimentDetailedFMeasureListener detailedFListener = null; + if (params.getDetailedF()) { + detailedFListener = new SentimentDetailedFMeasureListener(); + listeners.add(detailedFListener); + } + + SentimentFactory sentimentFactory = new SentimentFactory(); + + SentimentCrossValidator validator; + try { + validator = new SentimentCrossValidator(params.getLang(), mlParams, + sentimentFactory, + listeners.toArray(new SentimentEvaluationMonitor[listeners.size()])); + validator.evaluate(sampleStream, params.getFolds()); + } catch (IOException e) { + throw new TerminateToolException(-1, + "IO error while reading training data or indexing data: " + + e.getMessage(), + e); + } finally { + try { + sampleStream.close(); + } catch (IOException e) { + // sorry that this can fail + } + } + + System.out.println("done"); + + System.out.println(); + + if (detailedFListener == null) { + System.out.println(validator.getFMeasure()); + } else { + System.out.println(detailedFListener.toString()); + } + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java new file mode 100755 index 0000000..c99fcfc --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.cmdline.sentiment; + +import opennlp.tools.cmdline.DetailedFMeasureListener; +import opennlp.tools.sentiment.SentimentEvaluationMonitor; +import opennlp.tools.sentiment.SentimentSample; +import opennlp.tools.util.Span; + +/** + * Class for creating a detailed F-Measure listener + */ +public class SentimentDetailedFMeasureListener + extends DetailedFMeasureListener<SentimentSample> + implements SentimentEvaluationMonitor { + + /** + * Returns the sentiment sample as a span array + * + * @param sample + * the sentiment sample to be returned + * @return span array of the sample + */ + @Override + protected Span[] asSpanArray(SentimentSample sample) { + return null; + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java new file mode 100755 index 0000000..443eb14 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.cmdline.sentiment; + +import java.io.OutputStream; + +import opennlp.tools.cmdline.EvaluationErrorPrinter; +import opennlp.tools.sentiment.SentimentEvaluationMonitor; +import opennlp.tools.sentiment.SentimentSample; +import opennlp.tools.util.eval.EvaluationMonitor; + +/** + * Class for creating an evaluation error listener. + */ +public class SentimentEvaluationErrorListener + extends EvaluationErrorPrinter<SentimentSample> + implements EvaluationMonitor<SentimentSample>, + SentimentEvaluationMonitor { + + /** + * Constructor + */ + public SentimentEvaluationErrorListener() { + super(System.err); + } + + /** + * Constructor + */ + public SentimentEvaluationErrorListener(OutputStream outputStream) { + super(outputStream); + } + + /** + * Prints the error in case of a missclassification in the evaluator + * + * @param reference + * the sentiment sample reference to be used + * @param prediction + * the sentiment sampple prediction + */ + @Override + public void missclassified(SentimentSample reference, + SentimentSample prediction) { + printError(new String[] { reference.getSentiment() }, + new String[] { prediction.getSentiment() }, reference, prediction, + reference.getSentence()); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java new file mode 100755 index 0000000..4a773ef --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.cmdline.sentiment; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; + +import opennlp.tools.cmdline.AbstractEvaluatorTool; +import opennlp.tools.cmdline.ArgumentParser.OptionalParameter; +import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; +import opennlp.tools.cmdline.PerformanceMonitor; +import opennlp.tools.cmdline.TerminateToolException; +import opennlp.tools.cmdline.params.DetailedFMeasureEvaluatorParams; +import opennlp.tools.cmdline.params.EvaluatorParams; +import opennlp.tools.cmdline.sentiment.SentimentEvaluatorTool.EvalToolParams; +import opennlp.tools.sentiment.SentimentEvaluationMonitor; +import opennlp.tools.sentiment.SentimentEvaluator; +import opennlp.tools.sentiment.SentimentME; +import opennlp.tools.sentiment.SentimentModel; +import opennlp.tools.sentiment.SentimentSample; +import opennlp.tools.sentiment.SentimentSampleTypeFilter; +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.eval.EvaluationMonitor; + +/** + * Class for creating an evaluation tool for sentiment analysis. + */ +public class SentimentEvaluatorTool + extends AbstractEvaluatorTool<SentimentSample, EvalToolParams> { + + /** + * Interface for parameters to be used in evaluation + */ + interface EvalToolParams + extends EvaluatorParams, DetailedFMeasureEvaluatorParams { + @OptionalParameter + @ParameterDescription(valueName = "types", description = "name types to use for evaluation") + String getNameTypes(); + } + + /** + * Constructor + */ + public SentimentEvaluatorTool() { + super(SentimentSample.class, EvalToolParams.class); + } + + /** + * Returns the short description of the tool + * + * @return short description + */ + public String getShortDescription() { + return "Measures the performance of the Sentiment model with the reference data"; + } + + /** + * Runs the tool + * + * @param format + * the format to be used + * @param args + * the arguments + */ + public void run(String format, String[] args) { + super.run(format, args); + + SentimentModel model = new SentimentModelLoader().load(params.getModel()); + // TODO: check EvalToolParams --> getNameTypes() + + List<EvaluationMonitor<SentimentSample>> listeners = new LinkedList<EvaluationMonitor<SentimentSample>>(); + if (params.getMisclassified()) { + listeners.add(new SentimentEvaluationErrorListener()); + } + SentimentDetailedFMeasureListener detailedFListener = null; + if (params.getDetailedF()) { + detailedFListener = new SentimentDetailedFMeasureListener(); + listeners.add(detailedFListener); + } + + if (params.getNameTypes() != null) { + String[] nameTypes = params.getNameTypes().split(","); + sampleStream = new SentimentSampleTypeFilter(nameTypes, sampleStream); + } + + SentimentEvaluator evaluator = new SentimentEvaluator( + new SentimentME(model), + listeners.toArray(new SentimentEvaluationMonitor[listeners.size()])); + + final PerformanceMonitor monitor = new PerformanceMonitor("sent"); + + ObjectStream<SentimentSample> measuredSampleStream = new ObjectStream<SentimentSample>() { + + public SentimentSample read() throws IOException { + SentimentSample sample = sampleStream.read(); + if (sample != null) { + monitor.incrementCounter(); + } + return sample; + } + + public void reset() throws IOException { + sampleStream.reset(); + } + + public void close() throws IOException { + sampleStream.close(); + } + }; + + monitor.startAndPrintThroughput(); + + try { + evaluator.evaluate(measuredSampleStream); + } catch (IOException e) { + System.err.println("failed"); + throw new TerminateToolException(-1, + "IO error while reading test data: " + e.getMessage(), e); + } finally { + try { + measuredSampleStream.close(); + } catch (IOException e) { + // sorry that this can fail + } + } + + monitor.stopAndPrintFinalResult(); + + System.out.println(); + + if (detailedFListener == null) { + System.out.println(evaluator.getFMeasure()); + } else { + System.out.println(detailedFListener.toString()); + } + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java new file mode 100755 index 0000000..8cf2874 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.cmdline.sentiment; + +import java.io.IOException; +import java.io.InputStream; + +import opennlp.tools.cmdline.ModelLoader; +import opennlp.tools.sentiment.SentimentModel; +import opennlp.tools.util.InvalidFormatException; + +/** + * Class for loading a sentiment model. + */ +public class SentimentModelLoader extends ModelLoader<SentimentModel> { + + /** + * Constructor + */ + public SentimentModelLoader() { + super("Sentiment"); + } + + /** + * Loads the sentiment model + * + * @param modelIn + * the input stream model + * @return the model + */ + @Override + protected SentimentModel loadModel(InputStream modelIn) + throws IOException, InvalidFormatException { + return new SentimentModel(modelIn); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java new file mode 100755 index 0000000..dd6ac44 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.cmdline.sentiment; + +import java.io.File; +import java.io.IOException; + +import opennlp.tools.cmdline.AbstractTrainerTool; +import opennlp.tools.cmdline.CLI; +import opennlp.tools.cmdline.CmdLineUtil; +import opennlp.tools.cmdline.TerminateToolException; +import opennlp.tools.cmdline.params.TrainingToolParams; +import opennlp.tools.sentiment.Sentiment; +import opennlp.tools.sentiment.SentimentFactory; +import opennlp.tools.sentiment.SentimentME; +import opennlp.tools.sentiment.SentimentModel; +import opennlp.tools.sentiment.SentimentSample; +import opennlp.tools.util.model.ModelUtil; + +/** + * Class for helping train a sentiment analysis model. + */ +public class SentimentTrainerTool + extends AbstractTrainerTool<SentimentSample, TrainingToolParams> { + + /** + * Constructor + */ + public SentimentTrainerTool() { + super(SentimentSample.class, TrainingToolParams.class); + } + + /** + * Runs the trainer + * + * @param format + * the format to be used + * @param args + * the arguments + */ + @Override + public void run(String format, String[] args) { + super.run(format, args); + if (0 == args.length) { + System.out.println(getHelp()); + } else { + + mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); + if (mlParams == null) { + mlParams = ModelUtil.createDefaultTrainingParameters(); + } + + File modelOutFile = params.getModel(); + + CmdLineUtil.checkOutputFile("sentiment analysis model", modelOutFile); + + SentimentModel model; + try { + SentimentFactory factory = new SentimentFactory(); + Sentiment sentiment = new SentimentME(params.getLang(), mlParams, factory); + model = sentiment.train(sampleStream); + } catch (IOException e) { + throw new TerminateToolException(-1, + "IO error while reading training data or indexing data: " + + e.getMessage(), + e); + } + finally { + try { + sampleStream.close(); + } catch (IOException e) { + // sorry that this can fail + } + } + + CmdLineUtil.writeModel("sentiment analysis", modelOutFile, model); + } + } + + /** + * Returns the help message + * + * @return the message + */ + @Override + public String getHelp() { + return "Usage: " + CLI.CMD + " " + getName() + " model < documents"; + } + + /** + * Returns the short description of the programme + * + * @return the description + */ + @Override + public String getShortDescription() { + return "learnable sentiment analysis"; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java new file mode 100644 index 0000000..3396740 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.formats; + +import java.io.IOException; + +import opennlp.tools.cmdline.ArgumentParser; +import opennlp.tools.cmdline.CmdLineUtil; +import opennlp.tools.cmdline.StreamFactoryRegistry; +import opennlp.tools.cmdline.params.BasicFormatParams; +import opennlp.tools.sentiment.SentimentSample; +import opennlp.tools.sentiment.SentimentSampleStream; +import opennlp.tools.util.InputStreamFactory; +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.PlainTextByLineStream; + +/** + * Class for creating a sample stream factory for sentiment analysis. + */ +public class SentimentSampleStreamFactory + extends AbstractSampleStreamFactory<SentimentSample> { + + /** + * The constructor of the class; initialises the factory + * + * @param params + * any given parameters + */ + protected <P> SentimentSampleStreamFactory(Class<P> params) { + super(params); + } + + /** + * Creates a sentiment sample stream factory + * + * @param args + * the necessary arguments + * @return SentimentSample stream (factory) + */ + @Override + public ObjectStream<SentimentSample> create(String[] args) { + BasicFormatParams params = ArgumentParser.parse(args, + BasicFormatParams.class); + + CmdLineUtil.checkInputFile("Data", params.getData()); + InputStreamFactory sampleDataIn = CmdLineUtil + .createInputStreamFactory(params.getData()); + ObjectStream<String> lineStream = null; + try { + lineStream = new PlainTextByLineStream(sampleDataIn, + params.getEncoding()); + } catch (IOException ex) { + CmdLineUtil.handleCreateObjectStreamError(ex); + } + + return new SentimentSampleStream(lineStream); + } + + /** + * Registers a SentimentSample stream factory + */ + public static void registerFactory() { + StreamFactoryRegistry.registerFactory(SentimentSample.class, + StreamFactoryRegistry.DEFAULT_FORMAT, + new SentimentSampleStreamFactory(BasicFormatParams.class)); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java new file mode 100755 index 0000000..ac219a9 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.IOException; + +import opennlp.tools.util.ObjectStream; + +public interface Sentiment { + + String predict(String[] tokens); + + SentimentModel train(ObjectStream<SentimentSample> samples) + throws IOException; +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java new file mode 100755 index 0000000..4185747 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import opennlp.tools.util.BeamSearchContextGenerator; +import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator; + +/** + * Class for using a Context Generator for Sentiment Analysis. + */ +public class SentimentContextGenerator + implements BeamSearchContextGenerator<String> { + + private AdaptiveFeatureGenerator[] featureGenerators; + + public SentimentContextGenerator() { + this(new AdaptiveFeatureGenerator[0]); + } + + public SentimentContextGenerator( + AdaptiveFeatureGenerator[] featureGenerators) { + this.featureGenerators = featureGenerators; + } + + /** + * Returns the context + * + * @param text + * the given text to be returned as context + * @return the text (the context) + */ + public String[] getContext(String[] text) { + return text; + } + + /** + * Returns the context + * + * @param index + * the index of the context + * @param sequence + * String sequence given + * @param priorDecisions + * decisions given earlier + * @param additionalContext + * any additional context + * @return the context + */ + @Override + public String[] getContext(int index, String[] sequence, + String[] priorDecisions, Object[] additionalContext) { + return new String[] {}; + } + + public void updateAdaptiveData(String[] tokens, String[] outcomes) { + + if (tokens != null && outcomes != null + && tokens.length != outcomes.length) { + throw new IllegalArgumentException( + "The tokens and outcome arrays MUST have the same size!"); + } + + for (AdaptiveFeatureGenerator featureGenerator : featureGenerators) { + featureGenerator.updateAdaptiveData(tokens, outcomes); + } + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java new file mode 100755 index 0000000..19af35a --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import opennlp.tools.util.FilterObjectStream; +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.TrainingParameters; +import opennlp.tools.util.eval.CrossValidationPartitioner; +import opennlp.tools.util.eval.FMeasure; + +/** + * Class for performing cross validation on the Sentiment Analysis Parser. + */ +public class SentimentCrossValidator { + + /** + * Class for creating a document sample + */ + private class DocumentSample { + + private SentimentSample[] samples; + + /** + * Constructor + */ + DocumentSample(SentimentSample[] samples) { + this.samples = samples; + } + + /** + * Returns the short description of the tool + * + * @return the samples + */ + private SentimentSample[] getSamples() { + return samples; + } + } + + /** + * Reads Sentiment Samples to group them as a document based on the clear + * adaptive data flag. + */ + private class SentimentToDocumentSampleStream + extends FilterObjectStream<SentimentSample, DocumentSample> { + + private SentimentSample beginSample; + + /** + * Constructor + */ + protected SentimentToDocumentSampleStream( + ObjectStream<SentimentSample> samples) { + super(samples); + } + + /** + * Reads Sentiment Samples to group them as a document + * + * @return the resulting DocumentSample + */ + public DocumentSample read() throws IOException { + + List<SentimentSample> document = new ArrayList<SentimentSample>(); + + if (beginSample == null) { + // Assume that the clear flag is set + beginSample = samples.read(); + } + + // Underlying stream is exhausted! + if (beginSample == null) { + return null; + } + + document.add(beginSample); + + SentimentSample sample; + while ((sample = samples.read()) != null) { + + if (sample.isClearAdaptiveDataSet()) { + beginSample = sample; + break; + } + + document.add(sample); + } + + // Underlying stream is exhausted, + // next call must return null + if (sample == null) { + beginSample = null; + } + + return new DocumentSample( + document.toArray(new SentimentSample[document.size()])); + } + + /** + * Performs a reset + * + * @return the resulting DocumentSample + */ + @Override + public void reset() throws IOException, UnsupportedOperationException { + super.reset(); + beginSample = null; + } + } + + /** + * Splits DocumentSample into SentimentSamples. + */ + private class DocumentToSentimentSampleStream + extends FilterObjectStream<DocumentSample, SentimentSample> { + + /** + * Constructor + */ + protected DocumentToSentimentSampleStream( + ObjectStream<DocumentSample> samples) { + super(samples); + } + + private Iterator<SentimentSample> documentSamples = Collections + .<SentimentSample>emptyList().iterator(); + + /** + * Reads Document Sample into SentimentSample + * + * @return the resulting DocumentSample + */ + public SentimentSample read() throws IOException { + + // Note: Empty document samples should be skipped + + if (documentSamples.hasNext()) { + return documentSamples.next(); + } else { + DocumentSample docSample = samples.read(); + + if (docSample != null) { + documentSamples = Arrays.asList(docSample.getSamples()).iterator(); + + return read(); + } else { + return null; + } + } + } + } + + private final String languageCode; + private final TrainingParameters params; + private SentimentEvaluationMonitor[] listeners; + + private SentimentFactory factory; + private FMeasure fmeasure = new FMeasure(); + + /** + * Constructor + */ + public SentimentCrossValidator(String lang, TrainingParameters params, + SentimentFactory factory, SentimentEvaluationMonitor[] monitors) { + + this.languageCode = lang; + this.factory = factory; + this.params = params; + this.listeners = monitors; + } + + /** + * Performs evaluation + * + * @param samples + * stream of SentimentSamples + * @param nFolds + * the number of folds to be used in cross validation + */ + public void evaluate(ObjectStream<SentimentSample> samples, int nFolds) + throws IOException { + + // Note: The sentiment samples need to be grouped on a document basis. + + CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<DocumentSample>( + new SentimentToDocumentSampleStream(samples), nFolds); + + Sentiment sentiment = new SentimentME(languageCode, params, factory); + + while (partitioner.hasNext()) { + + CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner + .next(); + + sentiment + .train(new DocumentToSentimentSampleStream(trainingSampleStream)); + + // do testing + SentimentEvaluator evaluator = new SentimentEvaluator(sentiment, + listeners); + + evaluator.evaluate(new DocumentToSentimentSampleStream( + trainingSampleStream.getTestSampleStream())); + + fmeasure.mergeInto(evaluator.getFMeasure()); + } + } + + /** + * Returns the F-Measure + * + * @return the F-Measure + */ + public FMeasure getFMeasure() { + return fmeasure; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java new file mode 100755 index 0000000..ab503f6 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import opennlp.tools.util.eval.EvaluationMonitor; + +/** + * Evaluation Monitor to be used by the evaluator + */ +public interface SentimentEvaluationMonitor + extends EvaluationMonitor<SentimentSample> { + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java new file mode 100755 index 0000000..8ece791 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import opennlp.tools.util.eval.Evaluator; +import opennlp.tools.util.eval.FMeasure; + +/** + * Class for performing evaluation on the Sentiment Analysis Parser. + */ +public class SentimentEvaluator extends Evaluator<SentimentSample> { + + private FMeasure fmeasure = new FMeasure(); + + private Sentiment sentiment; + + /** + * Constructor + */ + public SentimentEvaluator(Sentiment sentiment, + SentimentEvaluationMonitor... listeners) { + super(listeners); + this.sentiment = sentiment; + } + + /** + * Returns the short description of the tool + * + * @param reference + * the reference to the SentimentSample to be processed + * @return the processed samples + */ + @Override + protected SentimentSample processSample(SentimentSample reference) { + String prediction = sentiment.predict(reference.getSentence()); + String label = reference.getSentiment(); + + fmeasure.updateScores(new String[] { label }, new String[] { prediction }); + + return new SentimentSample(prediction, reference.getSentence()); + } + + /** + * Returns the F-Measure + * + * @return the F-Measure + */ + public FMeasure getFMeasure() { + return fmeasure; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java new file mode 100755 index 0000000..8043460 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.util.Iterator; + +import opennlp.tools.ml.model.Event; +import opennlp.tools.util.AbstractEventStream; +import opennlp.tools.util.ObjectStream; + +/** + * Class for creating events for Sentiment Analysis that is later sent to + * MaxEnt. + */ +public class SentimentEventStream extends AbstractEventStream<SentimentSample> { + + private SentimentContextGenerator contextGenerator; + + /** + * Initializes the event stream. + * + * @param samples + * the sentiment samples to be used + * @param createContextGenerator + * the context generator to be used + */ + public SentimentEventStream(ObjectStream<SentimentSample> samples, + SentimentContextGenerator createContextGenerator) { + super(samples); + contextGenerator = createContextGenerator; + } + + /** + * Creates events. + * + * @param sample + * the sentiment sample to be used + * @return event iterator + */ + @Override + protected Iterator<Event> createEvents(final SentimentSample sample) { + + return new Iterator<Event>() { + + private boolean isVirgin = true; + + public boolean hasNext() { + return isVirgin; + } + + public Event next() { + + isVirgin = false; + + return new Event(sample.getSentiment(), + contextGenerator.getContext(sample.getSentence())); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java new file mode 100755 index 0000000..9c284e4 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import opennlp.tools.tokenize.Tokenizer; +import opennlp.tools.tokenize.WhitespaceTokenizer; +import opennlp.tools.util.BaseToolFactory; +import opennlp.tools.util.InvalidFormatException; +import opennlp.tools.util.ext.ExtensionLoader; + +/** + * Class for creating sentiment factories for training. + */ +public class SentimentFactory extends BaseToolFactory { + + private static final String TOKENIZER_NAME = "sentiment.tokenizer"; + + private Tokenizer tokenizer; + + /** + * Validates the artifact map --> nothing to validate. + */ + @Override + public void validateArtifactMap() throws InvalidFormatException { + // nothing to validate + } + + /** + * Creates a new context generator. + * + * @return a context generator for Sentiment Analysis + */ + public SentimentContextGenerator createContextGenerator() { + return new SentimentContextGenerator(); + } + + /** + * Returns the tokenizer + * + * @return the tokenizer + */ + public Tokenizer getTokenizer() { + if (this.tokenizer == null) { + if (artifactProvider != null) { + String className = artifactProvider.getManifestProperty(TOKENIZER_NAME); + if (className != null) { + this.tokenizer = ExtensionLoader.instantiateExtension(Tokenizer.class, + className); + } + } + if (this.tokenizer == null) { // could not load using artifact provider + this.tokenizer = WhitespaceTokenizer.INSTANCE; + } + } + return tokenizer; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java new file mode 100755 index 0000000..38c7ac9 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.TrainerFactory; +import opennlp.tools.ml.model.Event; +import opennlp.tools.ml.model.MaxentModel; +import opennlp.tools.ml.model.SequenceClassificationModel; +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.Sequence; +import opennlp.tools.util.Span; +import opennlp.tools.util.TrainingParameters; + +/** + * Class for creating a maximum-entropy-based Sentiment Analysis model. + */ +public class SentimentME implements Sentiment { + + public static final int DEFAULT_BEAM_SIZE = 3; + + protected SentimentContextGenerator contextGenerator; + private Sequence bestSequence; + protected SequenceClassificationModel<String> model; + private SentimentFactory factory; + private MaxentModel maxentModel; + + private String lang; + private TrainingParameters params; + + /** + * Constructor, initializes. + * + * @param sentModel + * sentiment analysis model + */ + public SentimentME(SentimentModel sentModel) { + model = sentModel.getSentimentModel(); + maxentModel = sentModel.getMaxentModel(); + factory = sentModel.getFactory(); + contextGenerator = factory.createContextGenerator(); + } + + public SentimentME(String lang, TrainingParameters params, + SentimentFactory factory) { + this.lang = Objects.requireNonNull(lang, "lang must be provided"); + this.params = Objects.requireNonNull(params, "params must be provided"); + this.factory = Objects.requireNonNull(factory, "factory must be provided"); + contextGenerator = factory.createContextGenerator(); + } + + /** + * Trains a Sentiment Analysis model. + * + * @param languageCode + * the code for the language of the text, e.g. "en" + * @param samples + * the sentiment samples to be used + * @param trainParams + * parameters for training + * @param factory + * a Sentiment Analysis factory + * @return a Sentiment Analysis model + */ + public SentimentModel train(ObjectStream<SentimentSample> samples) + throws IOException { + Map<String, String> entries = new HashMap<String, String>(); + ObjectStream<Event> eventStream = new SentimentEventStream(samples, + contextGenerator); + EventTrainer trainer = TrainerFactory.getEventTrainer(params, entries); + maxentModel = trainer.train(eventStream); + Map<String, String> manifestInfoEntries = new HashMap<String, String>(); + SentimentModel sentimentModel = new SentimentModel(lang, maxentModel, + manifestInfoEntries, factory); + model = sentimentModel.getSentimentModel(); + return sentimentModel; + } + + /** + * Makes a sentiment prediction + * + * @param tokens + * the tokens to be analyzed for its sentiment + * @return the predicted sentiment + */ + @Override + public String predict(String[] tokens) { + if (tokens == null || tokens.length == 0) { + throw new IllegalArgumentException("Tokens must be not empty"); + } + double[] prob = probabilities(tokens); + return getBestSentiment(prob); + } + + /** + * Returns the best chosen sentiment for the text predicted on + * + * @param outcome + * the outcome + * @return the best sentiment + */ + public String getBestSentiment(double[] outcome) { + return maxentModel.getBestOutcome(outcome); + } + + /** + * Returns the analysis probabilities + * + * @param text + * the text to categorize + */ + public double[] probabilities(String[] text) { + return maxentModel.eval(contextGenerator.getContext(text)); + } + + /** + * Returns an array of probabilities for each of the specified spans which is + * the arithmetic mean of the probabilities for each of the outcomes which + * make up the span. + * + * @param spans + * The spans of the sentiments for which probabilities are desired. + * @return an array of probabilities for each of the specified spans. + */ + public double[] probs(Span[] spans) { + double[] sprobs = new double[spans.length]; + double[] probs = bestSequence.getProbs(); + + for (int si = 0; si < spans.length; si++) { + double p = 0; + + for (int oi = spans[si].getStart(); oi < spans[si].getEnd(); oi++) { + p += probs[oi]; + } + + p /= spans[si].length(); + sprobs[si] = p; + } + + return sprobs; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java new file mode 100755 index 0000000..924148d --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.Map; +import java.util.Properties; + +import opennlp.tools.ml.BeamSearch; +import opennlp.tools.ml.model.MaxentModel; +import opennlp.tools.ml.model.SequenceClassificationModel; +import opennlp.tools.util.InvalidFormatException; +import opennlp.tools.util.model.BaseModel; + +/** + * Class for the basis of the Sentiment Analysis model. + */ +public class SentimentModel extends BaseModel { + + private static final String COMPONENT_NAME = "SentimentME"; + private static final String SENTIMENT_MODEL_ENTRY_NAME = "sentiment.model"; + + /** + * Initializes the Sentiment Analysis model. + * + * @param languageCode + * the code for the language of the text, e.g. "en" + * @param sentimentModel + * a MaxEnt sentiment model + * @param manifestInfoEntries + * additional information in the manifest + * @param factory + * a Sentiment Analysis factory + */ + public SentimentModel(String languageCode, MaxentModel sentimentModel, + Map<String, String> manifestInfoEntries, SentimentFactory factory) { + super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); + artifactMap.put(SENTIMENT_MODEL_ENTRY_NAME, sentimentModel); + checkArtifactMap(); + } + + /** + * Initializes the Sentiment Analysis model. + * + * @param modelURL + * the URL to a file required for the model + */ + public SentimentModel(URL modelURL) + throws IOException, InvalidFormatException { + super(COMPONENT_NAME, modelURL); + } + + /** + * Initializes the Sentiment Analysis model. + * + * @param file + * the file required for the model + */ + public SentimentModel(File file) throws InvalidFormatException, IOException { + super(COMPONENT_NAME, file); + } + + public SentimentModel(InputStream modelIn) + throws InvalidFormatException, IOException { + super(COMPONENT_NAME, modelIn); + } + + /** + * Return the model + * + * @return the model + */ + public SequenceClassificationModel<String> getSentimentModel() { + Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); + + String beamSizeString = manifest + .getProperty(BeamSearch.BEAM_SIZE_PARAMETER); + + int beamSize = SentimentME.DEFAULT_BEAM_SIZE; + if (beamSizeString != null) { + beamSize = Integer.parseInt(beamSizeString); + } + + return new BeamSearch<>(beamSize, + (MaxentModel) artifactMap.get(SENTIMENT_MODEL_ENTRY_NAME)); + } + + /** + * Returns the sentiment factory + * + * @return the sentiment factory for the model + */ + public SentimentFactory getFactory() { + return (SentimentFactory) this.toolFactory; + } + + /** + * Returns the MaxEntropy model + * + * @return the MaxEnt model + */ + public MaxentModel getMaxentModel() { + return (MaxentModel) artifactMap.get(SENTIMENT_MODEL_ENTRY_NAME); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java new file mode 100755 index 0000000..c0c4b20 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * Class for holding text used for sentiment analysis. + */ +public class SentimentSample { + + private final String sentiment; + private final List<String> sentence; + private final boolean isClearAdaptiveData; + private final String id = null; + + /** + * Initializes the current instance. + * + * @param sentiment + * training sentiment + * @param sentence + * training sentence + */ + public SentimentSample(String sentiment, String[] sentence) { + this(sentiment, sentence, true); + } + + public SentimentSample(String sentiment, String[] sentence, + boolean clearAdaptiveData) { + this.sentiment = Objects.requireNonNull(sentiment, + "sentiment must not be null"); + Objects.requireNonNull(sentence, "sentence must not be null"); + this.sentence = Collections.unmodifiableList(Arrays.asList(sentence)); + this.isClearAdaptiveData = clearAdaptiveData; + } + + /** + * Returns the sentiment + * + * @return the sentiment + */ + public String getSentiment() { + return sentiment; + } + + /** + * Returns the sentence used + * + * @return the sentence + */ + public String[] getSentence() { + return sentence.toArray(new String[0]); + } + + /** + * Returns the id + * + * @return the id + */ + public String getId() { + return id; + } + + /** + * Returns the value of isClearAdaptiveData + * + * @return true or false + */ + public boolean isClearAdaptiveDataSet() { + return isClearAdaptiveData; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java new file mode 100755 index 0000000..8dac2ee --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.IOException; + +import opennlp.tools.tokenize.WhitespaceTokenizer; +import opennlp.tools.util.FilterObjectStream; +import opennlp.tools.util.ObjectStream; + +/** + * Class for converting Strings through Data Stream to SentimentSample using + * tokenised text. + */ +public class SentimentSampleStream + extends FilterObjectStream<String, SentimentSample> { + + /** + * Initializes the sample stream. + * + * @param samples + * the sentiment samples to be used + */ + public SentimentSampleStream(ObjectStream<String> samples) { + super(samples); + } + + /** + * Reads the text + * + * @return a ready-to-be-trained SentimentSample object + */ + @Override + public SentimentSample read() throws IOException { + String sentence = samples.read(); + + if (sentence != null) { + + // Whitespace tokenize entire string + String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(sentence); + + SentimentSample sample; + + if (tokens.length > 1) { + String sentiment = tokens[0]; + String[] sentTokens = new String[tokens.length - 1]; + System.arraycopy(tokens, 1, sentTokens, 0, tokens.length - 1); + + sample = new SentimentSample(sentiment, sentTokens); + } else { + throw new IOException( + "Empty lines, or lines with only a category string are not allowed!"); + } + + return sample; + } + + return null; + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java new file mode 100755 index 0000000..68e7ecc --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import opennlp.tools.util.FilterObjectStream; +import opennlp.tools.util.ObjectStream; + +/** + * Class for creating a type filter + */ +public class SentimentSampleTypeFilter + extends FilterObjectStream<SentimentSample, SentimentSample> { + + private final Set<String> types; + + /** + * Constructor + */ + public SentimentSampleTypeFilter(String[] types, + ObjectStream<SentimentSample> samples) { + super(samples); + this.types = Collections + .unmodifiableSet(new HashSet<String>(Arrays.asList(types))); + } + + /** + * Constructor + */ + public SentimentSampleTypeFilter(Set<String> types, + ObjectStream<SentimentSample> samples) { + super(samples); + this.types = Collections.unmodifiableSet(new HashSet<String>(types)); + } + + /** + * Reads and returns sentiment samples. + * + * @return the sentiment sample read + */ + @Override + public SentimentSample read() throws IOException { + SentimentSample sample = samples.read(); + return sample; + + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java new file mode 100644 index 0000000..1daca03 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.File; +import java.io.IOException; + +import opennlp.tools.cmdline.CmdLineUtil; +import opennlp.tools.util.InvalidFormatException; +import opennlp.tools.util.MockInputStreamFactory; +import opennlp.tools.util.PlainTextByLineStream; +import opennlp.tools.util.TrainingParameters; + +public class AbstractSentimentTest { + + protected static final String TRAINING_DATASET = "opennlp/tools/sentiment/sample_train_categ"; + protected static final String MODEL = "opennlp/tools/sentiment/sample_model"; + protected static final String ENCODING = "ISO-8859-1"; + protected static final String LANG = "en"; + + private static final SentimentFactory factory = new SentimentFactory(); + + protected Sentiment createEmptySentiment() { + TrainingParameters params = new TrainingParameters(); + params.put(TrainingParameters.ITERATIONS_PARAM, 50); + params.put(TrainingParameters.CUTOFF_PARAM, 1); + return new SentimentME(LANG, params, factory); + } + + protected SentimentCrossValidator createCrossValidation() { + TrainingParameters params = new TrainingParameters(); + params.put(TrainingParameters.ITERATIONS_PARAM, 50); + params.put(TrainingParameters.CUTOFF_PARAM, 1); + return new SentimentCrossValidator(LANG, params, factory, null); + } + + protected String[] tokenize(String txt) { + return factory.getTokenizer().tokenize(txt); + } + + protected SentimentSampleStream createSampleStream() throws IOException { + MockInputStreamFactory mockStream = new MockInputStreamFactory( + new File(TRAINING_DATASET)); + return new SentimentSampleStream( + new PlainTextByLineStream(mockStream, ENCODING)); + } + + protected Sentiment loadSentiment(File modelFile) + throws InvalidFormatException, IOException { + SentimentModel model = new SentimentModel(modelFile); + return new SentimentME(model); + } + + protected File saveTempModel() throws IOException { + Sentiment sentiment = createEmptySentiment(); + SentimentSampleStream sampleStream = createSampleStream(); + SentimentModel model = sentiment.train(sampleStream); + File temp = File.createTempFile("sample_model", ".tmp"); + CmdLineUtil.writeModel("sentiment analysis", temp, model); + return temp; + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java new file mode 100755 index 0000000..c519c17 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import org.junit.Assert; +import org.junit.Test; + +public class SentimentCrossValidatorTest extends AbstractSentimentTest { + + @Test + public void testWithNullResources() throws Exception { + SentimentSampleStream sampleStream = createSampleStream(); + SentimentCrossValidator cv = createCrossValidation(); + + cv.evaluate(sampleStream, 2); + + Assert.assertNotNull(cv.getFMeasure()); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java new file mode 100644 index 0000000..1e6280b --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import opennlp.tools.cmdline.sentiment.SentimentEvaluationErrorListener; +import opennlp.tools.util.InvalidFormatException; + +public class SentimentEvaluatorTest extends AbstractSentimentTest { + + private static final String LIKE_SENTENCE = "benefits from serendipity also reminds us of our " + + "own responsibility to question what is told as the truth"; + + private static final String ANGRY_SENTENCE = "Stupid , infantile , redundant , sloppy , " + + "over-the-top , and amateurish . Yep"; + + private static final String NEUTRAL = "stripped almost entirely of such tools as nudity , " + + "profanity and violence"; + + private Sentiment sentiment; + + @Before + public void setup() throws IOException { + sentiment = createEmptySentiment(); + SentimentSampleStream sampleStream = createSampleStream(); + sentiment.train(sampleStream); + } + + @Test + public void testPositive() throws InvalidFormatException, IOException { + String[] tokens = tokenize(LIKE_SENTENCE); + SentimentSample sample = new SentimentSample("like", tokens, false); + OutputStream stream = new ByteArrayOutputStream(); + SentimentEvaluationMonitor listener = new SentimentEvaluationErrorListener( + stream); + SentimentEvaluator eval = new SentimentEvaluator(sentiment, listener); + + eval.evaluateSample(sample); + + Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0); + + Assert.assertEquals(0, stream.toString().length()); + + tokens = tokenize(ANGRY_SENTENCE); + sample = new SentimentSample("angry", tokens, false); + + Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0); + Assert.assertEquals(0, stream.toString().length()); + } + + @Test + public void testMissclassified() throws InvalidFormatException, IOException { + OutputStream stream = new ByteArrayOutputStream(); + SentimentEvaluationMonitor listener = new SentimentEvaluationErrorListener( + stream); + + String[] tokens = tokenize(NEUTRAL); + SentimentSample sample = new SentimentSample("like", tokens, false); + SentimentEvaluator eval = new SentimentEvaluator(sentiment, listener); + + eval.evaluateSample(sample); + + Assert.assertEquals(-1.0, eval.getFMeasure().getFMeasure(), 0.0); + Assert.assertNotEquals(0, stream.toString().length()); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java new file mode 100755 index 0000000..2fb9ee2 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import org.junit.Assert; +import org.junit.Test; + +import opennlp.tools.ml.model.Event; +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.ObjectStreamUtils; + +public class SentimentEventStreamTest { + + private static final String[] SENTENCE = { "benefits", "from", "serendipity", + "but", "also", "reminds", "us", "of", "our", "own", "responsibility", + "to", "question", "what", "is", "told", "as", "the", "truth" }; + private static final String SENTIMENT = "like"; + private static final SentimentContextGenerator CG = new SentimentContextGenerator(); + + @Test + public void testSentEventStream() throws Exception { + SentimentSample sample = new SentimentSample(SENTIMENT, SENTENCE, false); + ObjectStream<Event> eventStream = new SentimentEventStream( + ObjectStreamUtils.createObjectStream(sample), CG); + + Assert.assertEquals(SENTIMENT, eventStream.read().getOutcome()); + + eventStream.close(); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java new file mode 100644 index 0000000..eb789ce --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.io.File; + +import org.junit.Assert; +import org.junit.Test; + +public class SentimentMETest extends AbstractSentimentTest { + + @Test + public void testSentimentModel() throws Exception { + Sentiment sentiment = createEmptySentiment(); + SentimentSampleStream sampleStream = createSampleStream(); + + sentiment.train(sampleStream); + + // "Angry" + String[] tokens = tokenize( + "Stupid , infantile , redundant , sloppy , over-the-top , and amateurish . Yep"); + Assert.assertEquals("angry", sentiment.predict(tokens)); + + // "Sad" + String[] tokens2 = tokenize( + "Strong filmmaking requires a clear sense of purpose , and in that oh-so-important category , " + + "The Four Feathers comes up short"); + Assert.assertEquals("sad", sentiment.predict(tokens2)); + + // "Neutral" + String[] tokens3 = tokenize( + "to make its points about acceptance and growth"); + Assert.assertEquals("neutral", sentiment.predict(tokens3)); + + // "Like" + String[] tokens4 = tokenize("best performance"); + Assert.assertEquals("like", sentiment.predict(tokens4)); + + // "Love" + String[] tokens5 = tokenize("best short story writing"); + Assert.assertEquals("love", sentiment.predict(tokens5)); + } + + @Test(expected = NullPointerException.class) + public void testEmptyModel() throws Exception { + Sentiment sentiment = createEmptySentiment(); + String[] tokens = tokenize("best performance"); + sentiment.predict(tokens); + } + + @Test(expected = IllegalArgumentException.class) + public void testEmptySentiment() throws Exception { + Sentiment sentiment = createEmptySentiment(); + SentimentSampleStream sampleStream = createSampleStream(); + + sentiment.train(sampleStream); + + String[] tokens = new String[] {}; + sentiment.predict(tokens); + } + + @Test + public void testWorkingModel() throws Exception { + File tempModel = saveTempModel(); + Sentiment sentiment = loadSentiment(tempModel); + + // "Angry" + String[] tokens = tokenize( + "Stupid , infantile , redundant , sloppy , over-the-top , and amateurish . Yep"); + Assert.assertEquals("angry", sentiment.predict(tokens)); + + // "Sad" + String[] tokens2 = tokenize( + "Strong filmmaking requires a clear sense of purpose , and in that oh-so-important category , " + + "The Four Feathers comes up short"); + Assert.assertEquals("sad", sentiment.predict(tokens2)); + + // "Neutral" + String[] tokens3 = tokenize( + "to make its points about acceptance and growth"); + Assert.assertEquals("neutral", sentiment.predict(tokens3)); + + // "Like" + String[] tokens4 = tokenize("best performance"); + Assert.assertEquals("like", sentiment.predict(tokens4)); + + // "Love" + String[] tokens5 = tokenize("best short story writing"); + Assert.assertEquals("love", sentiment.predict(tokens5)); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java new file mode 100755 index 0000000..6306f0d --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.sentiment; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +public class SentimentSampleTest { + + @Test + public void testSentSample() throws Exception { + String[] sentence = { "benefits", "from", "serendipity", "but", "also", + "reminds", "us", "of", "our", "own", "responsibility", "to", "question", + "what", "is", "told", "as", "the", "truth" }; + String sentiment = "like"; + List<String> sentenceList = Collections + .unmodifiableList(Arrays.asList(sentence)); + + SentimentSample sample = new SentimentSample(sentiment, sentence, false); + + Assert.assertEquals("like", sample.getSentiment()); + Assert.assertEquals(false, sample.isClearAdaptiveDataSet()); + Assert.assertArrayEquals(sentence, sentenceList.toArray(new String[0])); + } + +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin new file mode 100644 index 0000000..0a34a33 Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin differ http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin new file mode 100644 index 0000000..c9d79e8 Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin differ http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin new file mode 100644 index 0000000..0e3511e Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin differ http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin new file mode 100644 index 0000000..253c9fd Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin differ http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin new file mode 100644 index 0000000..3bb6cc6 Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin differ
