Repository: opennlp Updated Branches: refs/heads/trunk 554626de2 -> 727964d7e
Remove deprecated code from PlainTextByLineStream See issue OPENNLP-882 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/727964d7 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/727964d7 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/727964d7 Branch: refs/heads/trunk Commit: 727964d7efad28d6b61d6d0597386415b0ab7075 Parents: 554626d Author: William Colen <[email protected]> Authored: Tue Dec 20 22:44:16 2016 -0200 Committer: William Colen <[email protected]> Committed: Tue Dec 20 22:44:16 2016 -0200 ---------------------------------------------------------------------- .../formats/BioNLP2004NameSampleStream.java | 14 ----- .../BioNLP2004NameSampleStreamFactory.java | 10 +++- .../tools/formats/Conll02NameSampleStream.java | 19 ------- .../tools/formats/Conll03NameSampleStream.java | 26 ++------- .../tools/formats/EvalitaNameSampleStream.java | 20 ------- .../formats/NameFinderCensus90NameStream.java | 17 ------ .../tools/formats/ad/ADChunkSampleStream.java | 21 -------- .../tools/formats/ad/ADNameSampleStream.java | 32 +---------- .../tools/formats/ad/ADPOSSampleStream.java | 29 ---------- .../formats/ad/ADSentenceSampleStream.java | 25 --------- .../namefind/TokenNameFinderEvaluator.java | 56 -------------------- .../tools/util/PlainTextByLineStream.java | 47 ---------------- 12 files changed, 12 insertions(+), 304 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java index 6825d80..14eb42e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java @@ -18,7 +18,6 @@ package opennlp.tools.formats; import java.io.IOException; -import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; @@ -67,19 +66,6 @@ public class BioNLP2004NameSampleStream implements ObjectStream<NameSample> { } - @Deprecated - public BioNLP2004NameSampleStream(InputStream in, int types) { - try { - this.lineStream = new PlainTextByLineStream(in, "UTF-8"); - System.setOut(new PrintStream(System.out, true, "UTF-8")); - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - - this.types = types; - } - public NameSample read() throws IOException { List<String> sentence = new ArrayList<>(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java index 3920a20..6445a4d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStreamFactory.java @@ -17,6 +17,8 @@ package opennlp.tools.formats; +import java.io.IOException; + import opennlp.tools.cmdline.ArgumentParser; import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; import opennlp.tools.cmdline.CmdLineUtil; @@ -68,7 +70,11 @@ public class BioNLP2004NameSampleStreamFactory extends AbstractSampleStreamFacto BioNLP2004NameSampleStream.GENERATE_RNA_ENTITIES; } - return new BioNLP2004NameSampleStream( - CmdLineUtil.openInFile(params.getData()), typesToGenerate); + try { + return new BioNLP2004NameSampleStream( + CmdLineUtil.createInputStreamFactory(params.getData()), typesToGenerate); + } catch (IOException e) { + throw new IllegalStateException(e); + } } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java index 7cd3810..efb37a0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java @@ -18,7 +18,6 @@ package opennlp.tools.formats; import java.io.IOException; -import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; @@ -84,24 +83,6 @@ public class Conll02NameSampleStream implements ObjectStream<NameSample>{ this.types = types; } - /** - * @param lang the language of the CONLL 02 data - * @param in an Input Stream to read data. - * @param types the entity types to include in the Name Samples - */ - @Deprecated - public Conll02NameSampleStream(LANGUAGE lang, InputStream in, int types) { - this.lang = lang; - try { - this.lineStream = new PlainTextByLineStream(in, "UTF-8"); - System.setOut(new PrintStream(System.out, true, "UTF-8")); - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - this.types = types; - } - static Span extract(int begin, int end, String beginTag) throws InvalidFormatException { String type = beginTag.substring(2); http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java index 07b62e8..a9e2e64 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java @@ -15,12 +15,14 @@ package opennlp.tools.formats; +import static opennlp.tools.formats.Conll02NameSampleStream.extract; + import java.io.IOException; -import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; + import opennlp.tools.namefind.NameSample; import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.ObjectStream; @@ -28,8 +30,6 @@ import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; import opennlp.tools.util.StringUtil; -import static opennlp.tools.formats.Conll02NameSampleStream.extract; - /** * An import stream which can parse the CONLL03 data. */ @@ -70,26 +70,6 @@ public class Conll03NameSampleStream implements ObjectStream<NameSample>{ this.types = types; } - /** - * - * @param lang the language of the CONLL 03 data - * @param in the Input Stream to read the data file - * @param types the entity types to include in the Name Sample object stream - */ - @Deprecated - public Conll03NameSampleStream(LANGUAGE lang, InputStream in, int types) { - - this.lang = lang; - try { - this.lineStream = new PlainTextByLineStream(in, "UTF-8"); - System.setOut(new PrintStream(System.out, true, "UTF-8")); - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - this.types = types; - } - public NameSample read() throws IOException { List<String> sentence = new ArrayList<>(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java index 026b2a1..925a130 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java @@ -18,7 +18,6 @@ package opennlp.tools.formats; import java.io.IOException; -import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; @@ -92,25 +91,6 @@ public class EvalitaNameSampleStream implements ObjectStream<NameSample>{ this.types = types; } - /** - * @param lang the language of the Evalita data file - * @param in an Input Stream to read data. - * @param types the types of the entities which are included in the Name Sample stream - */ - @Deprecated - public EvalitaNameSampleStream(LANGUAGE lang, InputStream in, int types) { - - this.lang = lang; - try { - this.lineStream = new PlainTextByLineStream(in, "UTF-8"); - System.setOut(new PrintStream(System.out, true, "UTF-8")); - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - this.types = types; - } - static final Span extract(int begin, int end, String beginTag) throws InvalidFormatException { String type = beginTag.substring(2); http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java index ee3f933..7773e50 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java @@ -16,7 +16,6 @@ package opennlp.tools.formats; import java.io.IOException; -import java.io.InputStream; import java.nio.charset.Charset; import java.util.Locale; @@ -76,22 +75,6 @@ public class NameFinderCensus90NameStream implements ObjectStream<StringList> { this.lineStream = new PlainTextByLineStream(in, this.encoding); } - - /** - * This constructor takes an <code>InputStream</code> and a <code>Charset</code> - * and opens an associated stream object with the specified encoding specified. - * - * @param in an <code>InputStream</code> for the input file. - * @param encoding the <code>Charset</code> to apply to the input stream. - * - * @deprecated use {@link NameFinderCensus90NameStream#NameFinderCensus90NameStream(InputStreamFactory, Charset)} - */ - public NameFinderCensus90NameStream(InputStream in, Charset encoding) { - this.locale = new Locale("en"); // locale is English - this.encoding = encoding; - this.lineStream = new PlainTextByLineStream(in, this.encoding); - } - public StringList read() throws IOException { String line = lineStream.read(); StringList name = null; http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java index d176f8b..12faf4c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java @@ -18,7 +18,6 @@ package opennlp.tools.formats.ad; import java.io.IOException; -import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; @@ -91,26 +90,6 @@ public class ADChunkSampleStream implements ObjectStream<ChunkSample> { } } - /** - * Creates a new {@link NameSample} stream from a {@link InputStream} - * - * @param in - * the Corpus {@link InputStream} - * @param charsetName - * the charset of the Arvores Deitadas Corpus - */ - @Deprecated - public ADChunkSampleStream(InputStream in, String charsetName) { - - try { - this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream( - in, charsetName)); - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - } - public ChunkSample read() throws IOException { Sentence paragraph; http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java index abf6dab..97b6ab9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java @@ -201,31 +201,6 @@ public class ADNameSampleStream implements ObjectStream<NameSample> { } } - /** - * Creates a new {@link NameSample} stream from a {@link InputStream} - * - * @param in - * the Corpus {@link InputStream} - * @param charsetName - * the charset of the Arvores Deitadas Corpus - * @param splitHyphenatedTokens - * if true hyphenated tokens will be separated: "carros-monstro" > - * "carros" "-" "monstro" - */ - @Deprecated - public ADNameSampleStream(InputStream in, String charsetName, - boolean splitHyphenatedTokens) { - - try { - this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream( - in, charsetName)); - this.splitHyphenatedTokens = splitHyphenatedTokens; - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - } - private int textID = -1; public NameSample read() throws IOException { @@ -347,17 +322,12 @@ public class ADNameSampleStream implements ObjectStream<NameSample> { // a NER. // we check if it is true, and expand the last NER int lastIndex = names.size() - 1; - boolean error = false; if (names.size() > 0) { Span last = names.get(lastIndex); if (last.getEnd() == sentence.size() - 1) { names.set(lastIndex, new Span(last.getStart(), sentence.size(), last.getType())); - } else { - error = true; - } - } else { - error = true; + } } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java index ff57d83..3f8fdb7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java @@ -91,35 +91,6 @@ public class ADPOSSampleStream implements ObjectStream<POSSample> { } } - /** - * Creates a new {@link POSSample} stream from a {@link InputStream} - * - * @param in - * the Corpus {@link InputStream} - * @param charsetName - * the charset of the Arvores Deitadas Corpus - * @param expandME - * if true will expand the multiword expressions, each word of the - * expression will have the POS Tag that was attributed to the - * expression plus the prefix B- or I- (CONLL convention) - * @param includeFeatures - * if true will combine the POS Tag with the feature tags - */ - @Deprecated - public ADPOSSampleStream(InputStream in, String charsetName, - boolean expandME, boolean includeFeatures) { - - try { - this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream( - in, charsetName)); - this.expandME = expandME; - this.isIncludeFeatures = includeFeatures; - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - } - public POSSample read() throws IOException { Sentence paragraph; while ((paragraph = this.adSentenceStream.read()) != null) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java index d3a04df..db09bae 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java @@ -92,31 +92,6 @@ public class ADSentenceSampleStream implements ObjectStream<SentenceSample> { this.isIncludeTitles = includeHeadlines; } - /** - * Creates a new {@link SentenceSample} stream from a {@link FileInputStream} - * - * @param in - * input stream from the corpus - * @param charsetName - * the charset to use while reading the corpus - * @param includeHeadlines - * if true will output the sentences marked as news headlines - */ - @Deprecated - public ADSentenceSampleStream(FileInputStream in, String charsetName, - boolean includeHeadlines) { - try { - this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream( - in, charsetName)); - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } - ptEosCharacters = Factory.ptEosCharacters; - Arrays.sort(ptEosCharacters); - this.isIncludeTitles = includeHeadlines; - } - // The Arvores Deitadas Corpus has information about texts and paragraphs. public SentenceSample read() throws IOException { http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java index fb689ab..3c8c549 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java @@ -18,13 +18,6 @@ package opennlp.tools.namefind; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import opennlp.tools.cmdline.PerformanceMonitor; -import opennlp.tools.util.ObjectStream; -import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; import opennlp.tools.util.eval.Evaluator; import opennlp.tools.util.eval.FMeasure; @@ -99,53 +92,4 @@ public class TokenNameFinderEvaluator extends Evaluator<NameSample> { public FMeasure getFMeasure() { return fmeasure; } - - @Deprecated - public static void main(String[] args) throws IOException { - - if (args.length == 4) { - - System.out.println("Loading name finder model ..."); - InputStream modelIn = new FileInputStream(args[3]); - - TokenNameFinderModel model = new TokenNameFinderModel(modelIn); - - TokenNameFinder nameFinder = new NameFinderME(model); - - System.out.println("Performing evaluation ..."); - TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(nameFinder); - - final NameSampleDataStream sampleStream = new NameSampleDataStream( - new PlainTextByLineStream(new InputStreamReader(new FileInputStream(args[2]), args[1]))); - - final PerformanceMonitor monitor = new PerformanceMonitor("sent"); - - monitor.startAndPrintThroughput(); - - ObjectStream<NameSample> iterator = new ObjectStream<NameSample>() { - - public NameSample read() throws IOException { - monitor.incrementCounter(); - return sampleStream.read(); - } - - public void reset() throws IOException { - sampleStream.reset(); - } - - public void close() throws IOException { - sampleStream.close(); - } - }; - - evaluator.evaluate(iterator); - - monitor.stopAndPrintFinalResult(); - - System.out.println(); - System.out.println("F-Measure: " + evaluator.getFMeasure().getFMeasure()); - System.out.println("Recall: " + evaluator.getFMeasure().getRecallScore()); - System.out.println("Precision: " + evaluator.getFMeasure().getPrecisionScore()); - } - } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/727964d7/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java b/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java index 2a3fd66..9c7cdc1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java @@ -20,10 +20,7 @@ package opennlp.tools.util; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; -import java.io.Reader; -import java.io.UnsupportedEncodingException; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.charset.Charset; @@ -52,50 +49,6 @@ public class PlainTextByLineStream implements ObjectStream<String> { reset(); } - /** - * Initializes the current instance. - * - * @param in - * @deprecated Use {@link #PlainTextByLineStream(InputStreamFactory, Charset)} instead. - */ - public PlainTextByLineStream(Reader in) { - this.in = new BufferedReader(in); - this.channel = null; - this.encoding = null; - } - - /** - * @deprecated Use {@link #PlainTextByLineStream(InputStreamFactory, String)} instead. - */ - public PlainTextByLineStream(InputStream in, String charsetName) throws UnsupportedEncodingException { - this(new InputStreamReader(in, charsetName)); - } - - /** - * @deprecated Use {@link #PlainTextByLineStream(InputStreamFactory, Charset)} instead. - */ - public PlainTextByLineStream(InputStream in, Charset charset) { - this(new InputStreamReader(in, charset)); - } - - /** - * @deprecated Use {@link #PlainTextByLineStream(InputStreamFactory, String)} instead. - */ - public PlainTextByLineStream(FileChannel channel, String charsetName) { - this.encoding = charsetName; - this.channel = channel; - - // TODO: Why isn't reset called here ? - in = new BufferedReader(Channels.newReader(channel, encoding)); - } - - /** - * @deprecated Use {@link #PlainTextByLineStream(InputStreamFactory, Charset)} instead. - */ - public PlainTextByLineStream(FileChannel channel, Charset encoding) { - this(channel, encoding.name()); - } - public String read() throws IOException { return in.readLine(); }
