[opennlp-sandbox] 01/01: sanitize some TODOs and unhealthy code

mawiesne Fri, 24 Feb 2023 10:01:52 -0800

This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch sanitize_some_TODOs_and_unhealthy_code
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


commit 5348212e75d579dc4a95343f6e7840b8c5ee6ede
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Feb 24 19:00:29 2023 +0100

    sanitize some TODOs and unhealthy code
    
    - addresses open TODO in `PredictTest` which is now only display if 
platform arch is 'aarch64' (aka Apple Silicon)
    - fixes incomplete resource handling of IO streams
    - adds Override annotation where useful
    - extracts regex Patterns instead of inline recompilation of those in loops
    - removes "dead" (unused) `FileHandler` class which was flawed for several 
reasons anyway
    - removes unused ImageIO / AWT bound code from `Utils` class
    - clears some unused variables/fields
    - removes some unclear TODO leftovers
    - fixes some broken indentation along the path
---
 .../opennlp/tools/coref/mention/DefaultParse.java  |  34 +-
 .../muc/Muc6FullParseCorefSampleStreamFactory.java |   3 +-
 .../formats/muc/NameFinderCorefEnhancerStream.java |   1 +
 .../JSMLearnerOnLatticeWithAbduction.java          |  21 +-
 .../tools/similarity/apps/ContentGenerator.java    |   3 +-
 .../similarity/apps/ContentGeneratorSupport.java   |  29 +-
 .../similarity/apps/RelatedSentenceFinder.java     | 111 +++---
 .../similarity/apps/RelatedSentenceFinderML.java   |   4 -
 .../solr/WordDocBuilderSingleImageSearchCall.java  |  68 ++--
 .../taxo_builder/TaxoQuerySnapshotMatcher.java     |   4 +-
 .../taxo_builder/TaxonomyExtenderViaMebMining.java |  33 +-
 .../tools/similarity/apps/utils/FileHandler.java   | 373 ---------------------
 .../opennlp/tools/similarity/apps/utils/Utils.java |  36 +-
 .../cmdline/disambiguator/DisambiguatorTool.java   |   6 +-
 .../disambiguator/IMSWSDContextGenerator.java      |  10 +-
 .../disambiguator/OSCCWSDContextGenerator.java     |   7 +-
 .../tools/disambiguator/WSDDefaultParameters.java  |   4 +-
 .../java/opennlp/tools/disambiguator/WSDModel.java |   3 +-
 .../tools/disambiguator/WSDSampleStream.java       |   3 +-
 .../disambiguator/WSDisambiguatorFactory.java      |   3 +-
 .../tools/disambiguator/WSDisambiguatorME.java     |   4 +-
 .../java/opennlp/tools/disambiguator/WordPOS.java  |   4 +-
 .../main/java/org/apache/opennlp/ModelUtil.java    |  22 +-
 .../org/apache/opennlp/namefinder/PredictTest.java |  16 +-
 24 files changed, 178 insertions(+), 624 deletions(-)

diff --git 
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java 
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
index 725a213..d3566e1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
@@ -23,7 +23,6 @@ import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Set;
-import java.util.Stack;
 
 import opennlp.tools.parser.Parse;
 import opennlp.tools.parser.chunking.Parser;
@@ -56,10 +55,12 @@ public class DefaultParse extends AbstractParse {
     // Should we just maintain a parse id map !?
   }
 
+  @Override
   public int getSentenceNumber() {
     return sentenceNumber;
   }
 
+  @Override
   public List<opennlp.tools.coref.mention.Parse> getNamedEntities() {
     List<Parse> names = new ArrayList<>();
     List<Parse> kids = new LinkedList<>(Arrays.asList(parse.getChildren()));
@@ -75,10 +76,12 @@ public class DefaultParse extends AbstractParse {
     return createParses(names.toArray(new Parse[names.size()]));
   }
 
+  @Override
   public List<opennlp.tools.coref.mention.Parse> getChildren() {
     return createParses(parse.getChildren());
   }
 
+  @Override
   public List<opennlp.tools.coref.mention.Parse> getSyntacticChildren() {
     List<Parse> kids = new ArrayList<>(Arrays.asList(parse.getChildren()));
     for (int ci = 0; ci < kids.size(); ci++) {
@@ -92,6 +95,7 @@ public class DefaultParse extends AbstractParse {
     return createParses(kids.toArray(new Parse[kids.size()]));
   }
 
+  @Override
   public List<opennlp.tools.coref.mention.Parse> getTokens() {
     List<Parse> tokens = new ArrayList<>();
     List<Parse> kids = new LinkedList<>(Arrays.asList(parse.getChildren()));
@@ -107,6 +111,7 @@ public class DefaultParse extends AbstractParse {
     return createParses(tokens.toArray(new Parse[tokens.size()]));
   }
 
+  @Override
   public String getSyntacticType() {
     if (ENTITY_SET.contains(parse.getType())) {
       return null;
@@ -129,6 +134,7 @@ public class DefaultParse extends AbstractParse {
     return newParses;
   }
 
+  @Override
   public String getEntityType() {
     if (ENTITY_SET.contains(parse.getType())) {
       return parse.getType();
@@ -138,6 +144,7 @@ public class DefaultParse extends AbstractParse {
     }
   }
 
+  @Override
   public boolean isParentNAC() {
     Parse parent = parse.getParent();
     while (parent != null) {
@@ -149,6 +156,7 @@ public class DefaultParse extends AbstractParse {
     return false;
   }
 
+  @Override
   public opennlp.tools.coref.mention.Parse getParent() {
     Parse parent = parse.getParent();
     if (parent == null) {
@@ -159,32 +167,32 @@ public class DefaultParse extends AbstractParse {
     }
   }
 
+  @Override
   public boolean isNamedEntity() {
     
     // TODO: We should use here a special tag to, where
     // the type can be extracted from. Then it just depends
     // on the training data and not the values inside NAME_TYPES.
-    
-    if (ENTITY_SET.contains(parse.getType())) {
-      return true;
-    }
-    else {
-      return false;
-    }
+
+    return ENTITY_SET.contains(parse.getType());
   }
 
+  @Override
   public boolean isNounPhrase() {
     return parse.getType().equals("NP") || parse.getType().startsWith("NP#");
   }
 
+  @Override
   public boolean isSentence() {
     return parse.getType().equals(Parser.TOP_NODE);
   }
 
+  @Override
   public boolean isToken() {
     return parse.isPosTag();
   }
 
+  @Override
   public int getEntityId() {
     
     String type = parse.getType();
@@ -198,16 +206,17 @@ public class DefaultParse extends AbstractParse {
     }
   }
 
+  @Override
   public Span getSpan() {
     return parse.getSpan();
   }
 
+  @Override
   public int compareTo(opennlp.tools.coref.mention.Parse p) {
 
     if (p == this) {
       return 0;
     }
-
     if (getSentenceNumber() < p.getSentenceNumber()) {
       return -1;
     }
@@ -221,11 +230,6 @@ public class DefaultParse extends AbstractParse {
 
         System.out.println("Maybe incorrect measurement!");
         
-        Stack<Parse> parents = new Stack<>();
-        
-        
-        
-        
         // get parent and update distance
         // if match return distance
         // if not match do it again
@@ -241,6 +245,7 @@ public class DefaultParse extends AbstractParse {
   }
 
 
+  @Override
   public opennlp.tools.coref.mention.Parse getPreviousToken() {
     Parse parent = parse.getParent();
     Parse node = parse;
@@ -267,6 +272,7 @@ public class DefaultParse extends AbstractParse {
     }
   }
 
+  @Override
   public opennlp.tools.coref.mention.Parse getNextToken() {
     Parse parent = parse.getParent();
     Parse node = parse;
diff --git 
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
 
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
index d715871..9f5a9d0 100644
--- 
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
+++ 
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/Muc6FullParseCorefSampleStreamFactory.java
@@ -73,6 +73,7 @@ public class Muc6FullParseCorefSampleStreamFactory extends 
AbstractSampleStreamF
     super(Parameters.class);
   }
 
+  @Override
   public ObjectStream<CorefSample> create(String[] args) {
     
     Parameters params = ArgumentParser.parse(args, Parameters.class);
@@ -85,7 +86,7 @@ public class Muc6FullParseCorefSampleStreamFactory extends 
AbstractSampleStreamF
     
     ObjectStream<String> mucDocStream = new FileToStringSampleStream(
         new DirectorySampleStream(params.getData(), new FileFilter() {
-          
+          @Override
           public boolean accept(File file) {
             return file.getName().toLowerCase().endsWith(".sgm");
           }
diff --git 
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
 
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
index e9e0bc4..4e24777 100644
--- 
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
+++ 
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
@@ -43,6 +43,7 @@ public class NameFinderCorefEnhancerStream extends 
FilterObjectStream<RawCorefSa
     this.tags = tags;
   }
 
+  @Override
   public RawCorefSample read() throws IOException {
     
     RawCorefSample sample = samples.read();
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java
index 76ae8ed..cd7c818 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeWithAbduction.java
@@ -20,25 +20,17 @@ package opennlp.tools.jsmlearning;
 import java.util.Arrays;
 import java.util.List;
 
-public class JSMLearnerOnLatticeWithAbduction extends 
JSMLearnerOnLatticeWithDeduction{
-
-
-
+public class JSMLearnerOnLatticeWithAbduction extends 
JSMLearnerOnLatticeWithDeduction {
 
+       @Override
        public JSMDecision buildLearningModel(List<String> posTexts, 
List<String> negTexts, 
                        String unknown, String[] separationKeywords){
-               JSMDecision decision = super.buildLearningModel(posTexts, 
negTexts, unknown, separationKeywords);
-               // verify each hypothesis
-               //TODO
-                               return decision;
-
+               //TODO verify each hypothesis
+               return super.buildLearningModel(posTexts, negTexts, unknown, 
separationKeywords);
        }
 
 
-       
-
        public static void main (String[] args) {
-
                String[] posArr = new String[] {"I rent an office space. This 
office is for my business. I can deduct office rental expense from my business 
profit to calculate net income. ",
                                "To run my business, I have to rent an office. 
The net business profit is calculated as follows. Rental expense needs to be 
subtracted from revenue. ",
                                "To store goods for my retail business I rent 
some space. When I calculate the net income, I take revenue and subtract 
business expenses such as office rent. ",
@@ -60,10 +52,5 @@ public class JSMLearnerOnLatticeWithAbduction extends 
JSMLearnerOnLatticeWithDed
                // Finally, do prediction
                JSMDecision dec = // may be determined by ...
                                jsm.buildLearningModel(Arrays.asList(posArr), 
Arrays.asList(negArr), unknown , new String[]{"property"});
-               
-               
-               
-
-
        }
 }
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
index b71d0b2..00a6d33 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
@@ -93,8 +93,7 @@ public class ContentGenerator /*extends 
RelatedSentenceFinder*/ {
                                for (HitBase item : searchResult) { // got some 
text from .html
                                        if (item.getAbstractText() != null
                                                        && 
!(item.getUrl().indexOf(".pdf") > 0)) { // exclude pdf
-                                               opinionSentencesToAdd
-                                               
.add(buildParagraphOfGeneratedText(item, sentence, null));
+                                               
opinionSentencesToAdd.add(buildParagraphOfGeneratedText(item, sentence, null));
                                        }
                                }
                        }
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
index 4389ab6..0575bbd 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 import java.util.logging.Logger;
+import java.util.regex.Pattern;
 
 import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
 import opennlp.tools.similarity.apps.utils.Utils;
@@ -30,15 +31,16 @@ import 
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcess
 
 import org.apache.commons.lang.StringUtils;
 
-/*
- * This class supports content generation by static functions
- * 
+/**
+ * This class supports content generation by static functions.
  */
-
 public class ContentGeneratorSupport {
        private static final Logger LOG = Logger
                        
.getLogger("opennlp.tools.similarity.apps.ContentGeneratorSupport");
 
+       //TODO - verify regexp!!
+       private static final Pattern SPACES_PATTERN = 
Pattern.compile("([a-z])(\\s{2,3})([A-Z])");
+
        /**
         * Takes a sentence and extracts noun phrases and entity names to from 
search
         * queries for finding relevant sentences on the web, which are then 
subject
@@ -50,10 +52,7 @@ public class ContentGeneratorSupport {
         * @return List<String> of search expressions
         */
        public static List<String> buildSearchEngineQueryFromSentence(String 
sentence) {
-               ParseTreeChunk matcher = new ParseTreeChunk();
-               ParserChunker2MatcherProcessor pos = 
ParserChunker2MatcherProcessor
-                               .getInstance();
-               List<List<ParseTreeChunk>> sent1GrpLst = null;
+               ParserChunker2MatcherProcessor pos = 
ParserChunker2MatcherProcessor.getInstance();
 
                List<ParseTreeChunk> nPhrases = pos
                                
.formGroupedPhrasesFromChunksForSentence(sentence).get(0);
@@ -135,10 +134,11 @@ public class ContentGeneratorSupport {
 
        public static String cleanSpacesInCleanedHTMLpage(String pageContent){ 
//was 4 spaces 
                //was 3 spaces => now back to 2
-               //TODO - verify regexp!!
-               pageContent = 
pageContent.trim().replaceAll("([a-z])(\\s{2,3})([A-Z])", "$1. $3")
-                               .replace("..", ".").replace(". . .", " ").
-                               replace(".    .",". ").trim(); // sometimes   
html breaks are converted into ' ' (two spaces), so
+               pageContent = pageContent.trim();
+               pageContent = 
SPACES_PATTERN.matcher(pageContent).replaceAll("$1. $3")
+                               .replace("..", ".").replace(". . .", " ")
+                               .replace(".    .",". ").trim();
+               // sometimes html breaks are converted into ' ' (two spaces), so
                // we need to put '.'
                return pageContent;
        }
@@ -209,12 +209,11 @@ public class ContentGeneratorSupport {
                                                for (Fragment f2 : fragmList2) {
                                                        String sf1 = 
f1.getResultText();
                                                        String sf2 = 
f2.getResultText();
-                                                       if 
(StringUtils.isEmpty(sf1) || StringUtils.isEmpty(sf1))
+                                                       if 
(StringUtils.isEmpty(sf1) || StringUtils.isEmpty(sf2))
                                                                continue;
                                                        if 
(meas.measureStringDistance(sf1, sf2) > dupeThresh) {
                                                                
fragmList2Results.remove(f2);
-                                                               
LOG.info("Removed duplicates from formed fragments list: "
-                                                                               
+ sf2);
+                                                               
LOG.info("Removed duplicates from formed fragments list: " + sf2);
                                                        }
                                                }
 
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
index 45bcbdb..80f02ed 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
@@ -311,8 +311,7 @@ public class RelatedSentenceFinder {
         * @param hits List<HitBase> of search results objects
         * @return List<String> of search results objects where dupes are 
removed
         */
-       public static List<HitBase> removeDuplicatesFromResultantHits(
-                       List<HitBase> hits) {
+       public static List<HitBase> 
removeDuplicatesFromResultantHits(List<HitBase> hits) {
                StringDistanceMeasurer meas = new StringDistanceMeasurer();
                double dupeThresh = // 0.8; // if more similar, then considered 
dupes was
                                0.7;
@@ -447,7 +446,7 @@ public class RelatedSentenceFinder {
                                // or get original snippet
                                pageSentence = fragment;
                        if (pageSentence != null)
-                               pageSentence.replace("_should_find_orig_", "");
+                               pageSentence = 
pageSentence.replace("_should_find_orig_", "");
 
                        // resultant sentence SHOULD NOT be longer than for 
times the size of
                        // snippet fragment
@@ -463,9 +462,7 @@ public class RelatedSentenceFinder {
                                                        + " " + title, 
originalSentence);
                                        List<List<ParseTreeChunk>> match = 
matchRes.getMatchResult();
                                        if (!matchRes.isVerbExists() || 
matchRes.isImperativeVerb()) {
-                                               System.out
-                                               .println("Rejected Sentence : 
No verb OR Yes imperative verb :"
-                                                               + pageSentence);
+                                               System.out.println("Rejected 
Sentence : No verb OR Yes imperative verb :" + pageSentence);
                                                continue;
                                        }
 
@@ -520,12 +517,9 @@ public class RelatedSentenceFinder {
                                                                        + "| 
with title= " + title);
                                                        System.out.println("For 
fragment = " + fragment);
                                                } else
-                                                       System.out
-                                                       .println("Rejected 
sentence due to wrong area at webpage: "
-                                                                       + 
pageSentence);
+                                                       
System.out.println("Rejected sentence due to wrong area at webpage: " + 
pageSentence);
                                        } else
-                                               System.out.println("Rejected 
sentence due to low score: "
-                                                               + pageSentence);
+                                               System.out.println("Rejected 
sentence due to low score: " + pageSentence);
                                        // }
                                } catch (Throwable t) {
                                        t.printStackTrace();
@@ -902,63 +896,58 @@ public class RelatedSentenceFinder {
                        t.printStackTrace();
                }
 
-       return result;
-}
+               return result;
+       }
 
-public HitBase buildParagraphOfGeneratedText(HitBase item,
-               String originalSentence, List<String> sentsAll) {
-       List<Fragment> results = new ArrayList<>() ;
-       
-       Triple<List<String>, String, String[]> fragmentExtractionResults = 
formCandidateFragmentsForPage(item, originalSentence, sentsAll);
+       public HitBase buildParagraphOfGeneratedText(HitBase item,
+                       String originalSentence, List<String> sentsAll) {
+               List<Fragment> results = new ArrayList<>() ;
 
-       List<String> allFragms = fragmentExtractionResults.getFirst();
+               Triple<List<String>, String, String[]> 
fragmentExtractionResults = formCandidateFragmentsForPage(item, 
originalSentence, sentsAll);
 
-       for (String fragment : allFragms) {
-               String[] candidateSentences = formCandidateSentences(fragment, 
fragmentExtractionResults);
-               if (candidateSentences == null)
-                       continue;
-               Fragment res = 
verifyCandidateSentencesAndFormParagraph(candidateSentences, item, fragment, 
originalSentence, sentsAll);
-               if (res!=null)
-                       results.add(res);
+               List<String> allFragms = fragmentExtractionResults.getFirst();
 
+               for (String fragment : allFragms) {
+                       String[] candidateSentences = 
formCandidateSentences(fragment, fragmentExtractionResults);
+                       if (candidateSentences == null)
+                               continue;
+                       Fragment res = 
verifyCandidateSentencesAndFormParagraph(candidateSentences, item, fragment, 
originalSentence, sentsAll);
+                       if (res!=null)
+                               results.add(res);
+
+               }
+               item.setFragments(results);
+               return item;
        }
-       
-       item.setFragments(results );
-       return item;
-}
-
-
-
-
-public static void main(String[] args) {
-       RelatedSentenceFinder f = new RelatedSentenceFinder();
-
-       List<HitBase> hits;
-       try {
-               // uncomment the sentence you would like to serve as a seed 
sentence for
-               // content generation for an event description
-
-               // uncomment the sentence you would like to serve as a seed 
sentence for
-               // content generation for an event description
-               hits = f.generateContentAbout("Albert Einstein"
-                               // "Britney Spears - The Femme Fatale Tour"
-                               // "Rush Time Machine",
-                               // "Blue Man Group" ,
-                               // "Belly Dance With Zaharah",
-                               // "Hollander Musicology Lecture: Danielle 
Fosler-Lussier, Guest Lecturer",
-                               // "Jazz Master and arguably the most famous 
jazz musician alive, trumpeter Wynton Marsalis",
-                               );
-               System.out.println(HitBase.toString(hits));
-               System.out.println(HitBase.toResultantString(hits));
-               // WordFileGenerator.createWordDoc("Essey about Albert 
Einstein",
-               // hits.get(0).getTitle(), hits);
-
-       } catch (Exception e) {
-               e.printStackTrace();
-       }
 
-}
 
+       public static void main(String[] args) {
+               RelatedSentenceFinder f = new RelatedSentenceFinder();
+
+               List<HitBase> hits;
+               try {
+                       // uncomment the sentence you would like to serve as a 
seed sentence for
+                       // content generation for an event description
+
+                       // uncomment the sentence you would like to serve as a 
seed sentence for
+                       // content generation for an event description
+                       hits = f.generateContentAbout("Albert Einstein"
+                                       // "Britney Spears - The Femme Fatale 
Tour"
+                                       // "Rush Time Machine",
+                                       // "Blue Man Group" ,
+                                       // "Belly Dance With Zaharah",
+                                       // "Hollander Musicology Lecture: 
Danielle Fosler-Lussier, Guest Lecturer",
+                                       // "Jazz Master and arguably the most 
famous jazz musician alive, trumpeter Wynton Marsalis",
+                                       );
+                       System.out.println(HitBase.toString(hits));
+                       System.out.println(HitBase.toResultantString(hits));
+                       // WordFileGenerator.createWordDoc("Essey about Albert 
Einstein",
+                       // hits.get(0).getTitle(), hits);
+
+               } catch (Exception e) {
+                       e.printStackTrace();
+               }
 
+       }
 
 }
\ No newline at end of file
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
index a075bc2..dbc93f5 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
@@ -20,7 +20,6 @@ package opennlp.tools.similarity.apps;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.logging.Logger;
 
 import opennlp.tools.similarity.apps.utils.Utils;
 import opennlp.tools.textsimilarity.TextProcessor;
@@ -29,8 +28,6 @@ import opennlp.tools.textsimilarity.TextProcessor;
  * This class does content generation in ES, DE etc
  */
 public class RelatedSentenceFinderML extends RelatedSentenceFinder{
-       private static final Logger LOG = 
Logger.getLogger("opennlp.tools.similarity.apps.RelatedSentenceFinderML");
-
 
        public RelatedSentenceFinderML(int ms, int msr, float thresh, String 
key) {
                this.MAX_STEPS = ms;
@@ -45,7 +42,6 @@ public class RelatedSentenceFinderML extends 
RelatedSentenceFinder{
        public List<HitBase> generateContentAbout(String sentence) throws 
Exception {
                List<HitBase> opinionSentencesToAdd = new ArrayList<>();
                System.out.println(" \n=== Entity to write about = " + 
sentence);
-               List<String> nounPhraseQueries = new ArrayList<>();
 
                List<HitBase> searchResult = yrunner.runSearch(sentence, 100);
                if (MAX_SEARCH_RESULTS<searchResult.size())
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java
index 79aa5d1..b0eaa29 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/WordDocBuilderSingleImageSearchCall.java
@@ -16,7 +16,6 @@
  */
 package opennlp.tools.similarity.apps.solr;
 
-
 import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
@@ -25,13 +24,15 @@ import 
net.billylieurance.azuresearch.AzureSearchImageResult;
 import net.billylieurance.azuresearch.AzureSearchResultSet;
 
 import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
+import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
 
 import opennlp.tools.similarity.apps.ContentGeneratorSupport;
 import opennlp.tools.similarity.apps.Fragment;
 import opennlp.tools.similarity.apps.HitBase;
 
 public class WordDocBuilderSingleImageSearchCall extends WordDocBuilder{
-       
+
+       @Override
        public String buildWordDoc(List<HitBase> content, String title){
                
                String outputDocFilename =  absPath+"/written/"+ 
title.replace(' ','_').replace('\"', ' ').trim()+ ".docx";
@@ -41,20 +42,20 @@ public class WordDocBuilderSingleImageSearchCall extends 
WordDocBuilder{
                int count=0;
                try {
                        wordMLPackage = WordprocessingMLPackage.createPackage();
-                       
wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Title", 
title.toUpperCase());
+                       MainDocumentPart mdp = 
wordMLPackage.getMainDocumentPart();
+                       mdp.addStyledParagraphOfText("Title", 
title.toUpperCase());
                        for(HitBase para: content){
                                if (para.getFragments()==null || 
para.getFragments().size()<1) // no found content in this hit
                                                continue;
                                try {
                                        if (!para.getTitle().endsWith("..") 
/*|| StringUtils.isAlphanumeric(para.getTitle())*/){
                                                String sectTitle = 
ContentGeneratorSupport.getPortionOfTitleWithoutDelimiters(para.getTitle());
-                                               
wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle",
-                                                       sectTitle);
+                                               
mdp.addStyledParagraphOfText("Subtitle", sectTitle);
                                        }
                                        String paraText = 
para.getFragments().toString().replace("[", "").replace("]", "").replace(" | ", 
"")
                                                        .replace(".,", 
".").replace(".\"", "\"").replace(". .", ".")
                                                        .replace(",.", ".");
-                                       
wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText);
+                                       mdp.addParagraphOfText(paraText);
                                        
                                        try {
                                                
addImageByImageURLToPackage(count, wordMLPackage, imageURLs);
@@ -67,33 +68,22 @@ public class WordDocBuilderSingleImageSearchCall extends 
WordDocBuilder{
                                count++;
                        }
                        // now add URLs
-                       
wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", 
"REFERENCES");
+                       mdp.addStyledParagraphOfText("Subtitle", "REFERENCES");
                        for(HitBase para: content){
                                if (para.getFragments()==null || 
para.getFragments().size()<1) // no found content in this hit
                                                continue;
                                try {
-                                       
wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle",
-                                                       para.getTitle());
+                                       
mdp.addStyledParagraphOfText("Subtitle", para.getTitle());
                                        String paraText = para.getUrl();
-                                       
wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText);
-                                       
-                                       
+                                       mdp.addParagraphOfText(paraText);
+
                                } catch (Exception e) {
                                        e.printStackTrace();
                                }
                        }
-       
                
                        wordMLPackage.save(new File(outputDocFilename));
                        System.out.println("Finished creating docx 
="+outputDocFilename);
-               //TODO pdf export
-                       /*
-                       FOSettings foSettings = Docx4J.createFOSettings();
-            foSettings.setWmlPackage(wordMLPackage);
-            OutputStream os = new 
java.io.FileOutputStream(outputDocFilename.replace(".docx", ".pdf"));
-            Docx4J.toFO(foSettings, os, Docx4J.FLAG_NONE);
-               System.out.println("Finished creating docx's PDF 
="+outputDocFilename);
-       */      
                        
                } catch (Exception e) {
                        e.printStackTrace();
@@ -102,9 +92,8 @@ public class WordDocBuilderSingleImageSearchCall extends 
WordDocBuilder{
                return outputDocFilename;
        }
        
-       protected void addImageByImageURLToPackage(int count,
-                       WordprocessingMLPackage wordMLPackage,
-                       List<String>  imageURLs) {
+       protected void addImageByImageURLToPackage(int count, 
WordprocessingMLPackage wordMLPackage,
+                                                                               
                                                                                
                 List<String>  imageURLs) {
                if (count>imageURLs.size()-1)
                        return;
                
@@ -112,7 +101,7 @@ public class WordDocBuilderSingleImageSearchCall extends 
WordDocBuilder{
                String destinationFile = url.replace("http://";, 
"").replace("/", "_");
                saveImageFromTheWeb(url, absPath+IMG_REL_PATH+destinationFile);
                File file = new File(absPath+IMG_REL_PATH+destinationFile);
-        try {
+               try {
                        byte[] bytes = convertImageToByteArray(file);
                        addImageToPackage(wordMLPackage, bytes);
                } catch (Exception e) {
@@ -130,20 +119,19 @@ public class WordDocBuilderSingleImageSearchCall extends 
WordDocBuilder{
                return imageURLs;
                
        }
-
     
-    public static void main(String[] args){
-       WordDocBuilderSingleImageSearchCall b = new 
WordDocBuilderSingleImageSearchCall();
-       List<HitBase> content = new ArrayList<>();
-       for(int i = 0; i<10; i++){
-               HitBase h = new HitBase();
-               h.setTitle("albert einstein "+i);
-               List<Fragment> frs = new ArrayList<>();
-               frs.add(new Fragment(" content "+i, 0));
-               h.setFragments(frs);
-               content.add(h);
-       }
-       
-       b.buildWordDoc(content, "albert einstein");
-    }
+       public static void main(String[] args){
+               WordDocBuilderSingleImageSearchCall b = new 
WordDocBuilderSingleImageSearchCall();
+               List<HitBase> content = new ArrayList<>();
+               for(int i = 0; i<10; i++){
+                       HitBase h = new HitBase();
+                       h.setTitle("albert einstein "+i);
+                       List<Fragment> frs = new ArrayList<>();
+                       frs.add(new Fragment(" content "+i, 0));
+                       h.setFragments(frs);
+                       content.add(h);
+               }
+
+               b.buildWordDoc(content, "albert einstein");
+       }
 }
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
index 1be923e..fa205d7 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
@@ -19,7 +19,6 @@ package opennlp.tools.similarity.apps.taxo_builder;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -55,8 +54,7 @@ public class TaxoQuerySnapshotMatcher {
    */
   public int getTaxoScore(String query, String snapshot) {
 
-    lemma_ExtendedAssocWords = (HashMap<String, List<List<String>>>) taxo
-        .getLemma_ExtendedAssocWords();
+    lemma_ExtendedAssocWords = taxo.getLemma_ExtendedAssocWords();
 
     query = query.toLowerCase();
     snapshot = snapshot.toLowerCase();
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
index 2f53a7d..e780330 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
@@ -27,7 +27,6 @@ import opennlp.tools.similarity.apps.HitBase;
 import opennlp.tools.similarity.apps.utils.StringCleaner;
 import opennlp.tools.stemmer.PStemmer;
 import opennlp.tools.textsimilarity.ParseTreeChunk;
-import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
 import opennlp.tools.textsimilarity.SentencePairMatchResult;
 import 
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
 
@@ -39,11 +38,9 @@ import 
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcess
  * derived list output map 2) for such manual list of words -> derived list of
  * words
  */
-
 public class TaxonomyExtenderViaMebMining extends BingQueryRunner {
 
-  private final ParseTreeChunkListScorer parseTreeChunkListScorer = new 
ParseTreeChunkListScorer();
-  ParserChunker2MatcherProcessor sm;
+  private ParserChunker2MatcherProcessor sm;
 
   private Map<String, List<List<String>>> lemma_ExtendedAssocWords = new 
HashMap<>();
   private final Map<List<String>, List<List<String>>> 
assocWords_ExtendedAssocWords = new HashMap<>();
@@ -145,19 +142,17 @@ public class TaxonomyExtenderViaMebMining extends 
BingQueryRunner {
       List<HitBase> resultList = runSearch(query, numbOfHits);
 
       for (int i = 0; i < resultList.size(); i++) {
-        {
-          for (int j = i + 1; j < resultList.size(); j++) {
-            HitBase h1 = resultList.get(i);
-            HitBase h2 = resultList.get(j);
-            String snapshot1 = StringCleaner.processSnapshotForMatching(h1
-                .getTitle() + " . " + h1.getAbstractText());
-            String snapshot2 = StringCleaner.processSnapshotForMatching(h2
-                .getTitle() + " . " + h2.getAbstractText());
-            SentencePairMatchResult matchRes = sm.assessRelevance(snapshot1,
-                snapshot2);
-            List<List<ParseTreeChunk>> matchResult = matchRes.getMatchResult();
-            genResult.addAll(matchResult);
-          }
+        for (int j = i + 1; j < resultList.size(); j++) {
+          HitBase h1 = resultList.get(i);
+          HitBase h2 = resultList.get(j);
+          String snapshot1 = StringCleaner.processSnapshotForMatching(h1
+              .getTitle() + " . " + h1.getAbstractText());
+          String snapshot2 = StringCleaner.processSnapshotForMatching(h2
+              .getTitle() + " . " + h2.getAbstractText());
+          SentencePairMatchResult matchRes = sm.assessRelevance(snapshot1,
+              snapshot2);
+          List<List<ParseTreeChunk>> matchResult = matchRes.getMatchResult();
+          genResult.addAll(matchResult);
         }
       }
 
@@ -175,9 +170,7 @@ public class TaxonomyExtenderViaMebMining extends 
BingQueryRunner {
 
   public static void main(String[] args) {
     TaxonomyExtenderViaMebMining self = new TaxonomyExtenderViaMebMining();
-    self.extendTaxonomy("src/test/resources/taxonomies/irs_dom.ari", "tax",
-        "en");
-
+    self.extendTaxonomy("src/test/resources/taxonomies/irs_dom.ari", "tax", 
"en");
   }
 
 }
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
deleted file mode 100644
index 21bdafb..0000000
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.similarity.apps.utils;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.EOFException;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- * This class responsible to save data to files as well as read out! It is
- * capable to handle text and binary files.
- */
-public class FileHandler {
-
-  private static final Logger LOG = Logger
-      .getLogger("opennlp.tools.similarity.apps.utils.FileHandler");
-
-  public void writeToTextFile(String data, String filepath, boolean append)
-      throws IOException {
-    try {
-      BufferedWriter out = new BufferedWriter(new FileWriter(filepath, 
append));
-      out.write(data + "\n");
-      out.close();
-    } catch (IOException e) {
-      LOG.severe(e.toString());
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Writes data from an arrayList<String> to a text-file where each line of 
the
-   * text represented by an element in the list.
-   * 
-   * @param list
-   * @param filePath
-   * @param append
-   * @throws Exception
-   */
-  public void writeToTextFile(ArrayList<String> list, String filePath, boolean 
append) throws Exception {
-    FileWriter outFile;
-    Iterator<String> it = list.iterator();
-    if (!append) {
-      outFile = new FileWriter(filePath);
-      PrintWriter out = new PrintWriter(outFile);
-      while (it.hasNext()) {
-        out.println(it.next());
-      }
-      outFile.close();
-    } else {
-      int tmp = 0;
-      while (it.hasNext()) {
-        if (tmp == 0) {
-          appendtofile("\n" + it.next(), filePath);
-        } else {
-          appendtofile(it.next(), filePath);
-        }
-        tmp++;
-      }
-    }
-  }
-
-  public void writeObjectToFile(Object obj, String filepath, boolean append) {
-    if (!isFileOrDirectoryExists(getDirPathfromFullPath(filepath))) {
-      createFolder(getDirPathfromFullPath(filepath));
-    }
-    ObjectOutputStream outputStream;
-    try {
-      outputStream = new ObjectOutputStream(new FileOutputStream(filepath));
-      outputStream.writeObject(obj);
-    } catch (IOException e) {
-      LOG.severe(e.toString());
-    }
-  }
-
-  public Object readObjectfromFile(String filePath) {
-    ObjectInputStream inputStream = null;
-    try {
-      // Construct the ObjectInputStream object
-      inputStream = new ObjectInputStream(new FileInputStream(filePath));
-      Object obj;
-      while ((obj = inputStream.readObject()) != null) {
-        return obj;
-      }
-    } catch (EOFException ex) { // This exception will be caught when EOF is
-                                // reached
-      LOG.severe("End of file reached.\n" + ex);
-    } catch (ClassNotFoundException | IOException ex) {
-      LOG.severe(ex.toString());
-    } finally {
-      // Close the ObjectInputStream
-      try {
-        if (inputStream != null) {
-          inputStream.close();
-        }
-      } catch (IOException ex) {
-        LOG.severe(ex.toString());
-      }
-    }
-    return null;
-  }
-
-  /**
-   * Creates a byte array from any object.
-   * <p>
-   * I wanted to use it when I write out object to files! (This is not in use
-   * right now, I may move it into other class)
-   * 
-   * @param obj
-   * @return
-   * @throws java.io.IOException
-   */
-  public byte[] getBytes(Object obj) throws java.io.IOException {
-    ByteArrayOutputStream bos = new ByteArrayOutputStream();
-    ObjectOutputStream oos = new ObjectOutputStream(bos);
-    oos.writeObject(obj);
-    oos.flush();
-    oos.close();
-    bos.close();
-    return bos.toByteArray();
-  }
-
-  /**
-   * Fetches all content from a text file, and return it as a String.
-   * 
-   * @return
-   */
-  public String readFromTextFile(String filePath) {
-    StringBuilder contents = new StringBuilder();
-    // ...checks on aFile are edited
-    File aFile = new File(filePath);
-
-    try {
-      // use buffering, reading one line at a time
-      // FileReader always assumes default encoding is OK!
-      // TODO be sure that the default encoding is OK!!!!! Otherwise
-      // change it
-
-      try (BufferedReader input = new BufferedReader(new FileReader(aFile))) {
-        String line; // not declared within while loop
-        /*
-         * readLine is a bit quirky : it returns the content of a line MINUS 
the
-         * newline. it returns null only for the END of the stream. it returns
-         * an empty String if two newlines appear in a row.
-         */
-        while ((line = input.readLine()) != null) {
-          contents.append(line);
-          contents.append(System.getProperty("line.separator"));
-        }
-      }
-    } catch (IOException ex) {
-      LOG.severe("fileName: " + filePath +"\n " + ex);
-    }
-    return contents.toString();
-  }
-
-  /**
-   * Reads text file line-wise each line will be an element in the resulting
-   * list
-   * 
-   * @param filePath
-   * @return
-   */
-  public List<String> readLinesFromTextFile(String filePath) {
-    List<String> lines = new ArrayList<>();
-    // ...checks on aFile are edited
-    File aFile = new File(filePath);
-    try {
-      // use buffering, reading one line at a time
-      // FileReader always assumes default encoding is OK!
-      // TODO be sure that the default encoding is OK!!!!! Otherwise
-      // change it
-
-      BufferedReader input = new BufferedReader(new FileReader(aFile));
-      try {
-        String line; // not declared within while loop
-        /*
-         * readLine is a bit quirky : it returns the content of a line MINUS 
the
-         * newline. it returns null only for the END of the stream. it returns
-         * an empty String if two newlines appear in a row.
-         */
-        while ((line = input.readLine()) != null) {
-          lines.add(line);
-        }
-      } finally {
-        input.close();
-      }
-    } catch (IOException ex) {
-      LOG.severe(ex.toString());
-    }
-    return lines;
-  }
-
-  private void appendtofile(String data, String filePath) {
-    try (BufferedWriter out = new BufferedWriter(new FileWriter(filePath, 
true))) {
-      out.write(data + "\n");
-    } catch (IOException e) {
-    }
-  }
-
-  public void createFolder(String path) {
-    if (!isFileOrDirectoryExists(path)) {
-      File file = new File(path);
-      try {
-        file.mkdirs();
-      } catch (Exception e) {
-        LOG.severe("Directory already exists or the file-system is read only");
-      }
-    }
-  }
-
-  public boolean isFileOrDirectoryExists(String path) {
-    File file = new File(path);
-    return file.exists();
-  }
-
-  /**
-   * Separates the directory-path from a full file-path
-   * 
-   * @param filePath
-   * @return
-   */
-  private String getDirPathfromFullPath(String filePath) {
-    String dirPath = "";
-    if (filePath != null) {
-      if (filePath.contains("\\"))
-        dirPath = filePath.substring(0, filePath.lastIndexOf("\\"));
-    }
-    return dirPath;
-  }
-
-  /**
-   * Returns the file-names of the files in a folder (not paths only names) 
(Not
-   * recursive)
-   * 
-   * @param dirPath
-   * @return
-   */
-  public ArrayList<String> getFileNamesInFolder(String dirPath) {
-    ArrayList<String> fileNames = new ArrayList<>();
-
-    File folder = new File(dirPath);
-    File[] listOfFiles = folder.listFiles();
-
-    for (File listOfFile : listOfFiles) {
-      if (listOfFile.isFile()) {
-        fileNames.add(listOfFile.getName());
-      } else if (listOfFile.isDirectory()) {
-        // TODO if I want to use it recursive I should handle this case
-      }
-    }
-    return fileNames;
-  }
-
-  public void deleteAllfilesinDir(String dirName) {
-    ArrayList<String> fileNameList = getFileNamesInFolder(dirName);
-    if (fileNameList != null) {
-      for (String s : fileNameList) {
-        try {
-          deleteFile(dirName + s);
-        } catch (IllegalArgumentException e) {
-          LOG.severe("No way to delete file: " + dirName + s + "\n" +
-                  e);
-        }
-      }
-    }
-  }
-
-  public void deleteFile(String filePath) throws IllegalArgumentException {
-    // A File object to represent the filename
-    File f = new File(filePath);
-    // Make sure the file or directory exists and isn't write protected
-    if (!f.exists())
-      throw new IllegalArgumentException("Delete: no such file or directory: "
-          + filePath);
-
-    if (!f.canWrite())
-      throw new IllegalArgumentException("Delete: write protected: " + 
filePath);
-    // If it is a directory, make sure it is empty
-    if (f.isDirectory()) {
-      String[] files = f.list();
-      if (files.length > 0)
-        throw new IllegalArgumentException("Delete: directory not empty: "
-            + filePath);
-    }
-    // Attempt to delete it
-    boolean success = f.delete();
-    if (!success)
-      throw new IllegalArgumentException("Delete: deletion failed");
-  }
-
-  public boolean deleteDirectory(File path) {
-    if (path.exists()) {
-      File[] files = path.listFiles();
-      for (File file : files) {
-        if (file.isDirectory()) {
-          deleteDirectory(file);
-        } else {
-          file.delete();
-        }
-      }
-    }
-    return (path.delete());
-  }
-
-  /**
-   * Returns the absolute-file-paths of the files in a directory (not 
recursive)
-   * 
-   * @param dirPath
-   * @return
-   */
-  public ArrayList<String> getFilePathsInFolder(String dirPath) {
-    ArrayList<String> filePaths = new ArrayList<>();
-
-    File folder = new File(dirPath);
-    File[] listOfFiles = folder.listFiles();
-    if (listOfFiles == null)
-      return null;
-    for (File listOfFile : listOfFiles) {
-      if (listOfFile.isFile()) {
-        filePaths.add(listOfFile.getAbsolutePath());
-      } else if (listOfFile.isDirectory()) {
-        // TODO if I want to use it recursive I should handle this case
-      }
-    }
-    return filePaths;
-  }
-
-  /**
-   * Returns the number of individual files in a directory (Not recursive)
-   * 
-   * @param dirPath
-   * @return
-   */
-  public int getFileNumInFolder(String dirPath) {
-    int num;
-    try {
-      num = getFileNamesInFolder(dirPath).size();
-    } catch (Exception e) {
-      num = 0;
-    }
-    return num;
-  }
-
-}
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
index bae6357..4fd8a17 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
@@ -17,29 +17,19 @@
 
 package opennlp.tools.similarity.apps.utils;
 
-import java.awt.Graphics2D;
-import java.awt.geom.AffineTransform;
-import java.awt.image.BufferedImage;
-import java.io.File;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.List;
-import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import javax.imageio.ImageIO;
-
 import org.apache.commons.lang.StringUtils;
 
 public class Utils {
 
-  private static final Logger LOG = Logger
-      .getLogger("opennlp.tools.similarity.apps.utils.Utils");
-
   protected static final ArrayList<String[]> CHARACTER_MAPPINGS = new 
ArrayList<>();
 
   static {
@@ -237,30 +227,6 @@ public class Utils {
     }
   }
 
-  public static boolean createResizedCopy(String originalImage,
-      String newImage, int scaledWidth, int scaledHeight) {
-    boolean retVal = true;
-    try {
-      File o = new File(originalImage);
-      BufferedImage bsrc = ImageIO.read(o);
-      BufferedImage bdest = new BufferedImage(scaledWidth, scaledHeight,
-          BufferedImage.TYPE_INT_RGB);
-
-      Graphics2D g = bdest.createGraphics();
-      AffineTransform at = AffineTransform.getScaleInstance(
-          (double) scaledWidth / bsrc.getWidth(),
-          (double) scaledHeight / bsrc.getHeight());
-      g.drawRenderedImage(bsrc, at);
-      ImageIO.write(bdest, "jpeg", new File(newImage));
-
-    } catch (Exception e) {
-      retVal = false;
-      LOG.severe("Failed creating thumbnail for image: " + originalImage + e);
-    }
-
-    return retVal;
-  }
-
   private static int minimum(int a, int b, int c) {
     int mi;
 
@@ -676,7 +642,7 @@ public class Utils {
 
   public static boolean isLatinWord(String word) {
     for (int i = 0; i < word.length(); i++) {
-      int asciiCode = (int) word.charAt(i);
+      int asciiCode = word.charAt(i);
       if (asciiCode > 128)
         return false;
     }
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
 
b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
index 98f32bd..0a4554f 100644
--- 
a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
+++ 
b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
@@ -43,13 +43,11 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ParagraphStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
-/*
- * Command line tool for disambiguator supports MFS for now
- * 
+/**
+ * Command line tool for disambiguator supports MFS for now.
  */
 public class DisambiguatorTool extends CmdLineTool {
 
-  // TODO CmdLineTool should be an interface not abstract class
   @Override
   public String getName() {
     return "Disambiguator";
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
index 7cc7015..11d8f9e 100644
--- 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
+++ 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
+import java.util.regex.Pattern;
 
 public class IMSWSDContextGenerator implements WSDContextGenerator {
 
@@ -48,13 +49,14 @@ public class IMSWSDContextGenerator implements 
WSDContextGenerator {
     // TODO consider the windowSize
     List<String> contextWords = new ArrayList<>();
 
+    final Pattern pattern = Pattern.compile("[^a-z_]");
+
     for (int i = 0; i < toks.length; i++) {
       if (lemmas != null) {
-        if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index
-          != i)) {
+        if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index != 
i)) {
 
-          String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
-            .trim();
+          String lemma = lemmas[i].toLowerCase();
+          lemma = pattern.matcher(lemma).replaceAll("").trim();
 
           if (lemma.length() > 1) {
             contextWords.add(lemma);
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
index 8c52c9d..6e13523 100644
--- 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
+++ 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.regex.Pattern;
 
 import net.sf.extjwnl.data.Synset;
 
@@ -38,14 +39,16 @@ public class OSCCWSDContextGenerator implements 
WSDContextGenerator {
     // TODO consider windowSize
     ArrayList<String> contextClusters = new ArrayList<>();
 
+    final Pattern pattern = Pattern.compile("[^a-z_]");
+
     for (int i = 0; i < toks.length; i++) {
       if (lemmas != null) {
 
         if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index
           != i)) {
 
-          String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
-            .trim();
+          String lemma = lemmas[i].toLowerCase();
+          lemma = pattern.matcher(lemma).replaceAll("").trim();
 
           WordPOS word = new WordPOS(lemma, tags[i]);
 
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
index e65bccb..446b46c 100644
--- 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
+++ 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
@@ -98,8 +98,8 @@ public class WSDDefaultParameters extends WSDParameters {
     this.trainingDataDirectory = trainingDataDirectory;
   }
 
-  @Override public boolean areValid() {
-    // TODO recheck this pattern
+  @Override
+  public boolean areValid() {
     return true;
   }
 
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
index a51b656..90afbbf 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
@@ -126,7 +126,8 @@ public class WSDModel extends BaseModel {
     return true;
   }
 
-  @Override protected void validateArtifactMap() throws InvalidFormatException 
{
+  @Override
+  protected void validateArtifactMap() throws InvalidFormatException {
     super.validateArtifactMap();
 
     if (!(artifactMap.get(WSD_MODEL_ENTRY_NAME) instanceof AbstractModel)) {
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
index d8667d2..fc060f3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
@@ -61,8 +61,7 @@ public class WSDSampleStream extends 
FilterObjectStream<String, WSDSample> {
       } catch (InvalidFormatException e) {
 
         if (LOG.isLoggable(Level.WARNING)) {
-          LOG
-              .warning("Error during parsing, ignoring sentence: " + sentence);
+          LOG.warning("Error during parsing, ignoring sentence: " + sentence);
         }
 
         sample = null; // new WSDSample(new String[]{}, new String[]{},0);
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
index b222f52..f75d9b7 100644
--- 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
+++ 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
@@ -50,7 +50,8 @@ public class WSDisambiguatorFactory extends BaseToolFactory {
     }
   }
 
-  @Override public void validateArtifactMap() throws InvalidFormatException {
+  @Override
+  public void validateArtifactMap() throws InvalidFormatException {
     // no additional artifacts
   }
 
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
index c8aa549..b70bd42 100644
--- 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
+++ 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
@@ -115,7 +115,8 @@ public class WSDisambiguatorME extends WSDisambiguator {
     return surroundingWordsModel;
   }
 
-  @Override public String disambiguate(WSDSample sample) {
+  @Override
+  public String disambiguate(WSDSample sample) {
     if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
       String wordTag = sample.getTargetWordTag();
 
@@ -202,6 +203,7 @@ public class WSDisambiguatorME extends WSDisambiguator {
    * @param index            : the index of the word to disambiguate
    * @return an array of the senses of the word to disambiguate
    */
+  @Override
   public String disambiguate(String[] tokenizedContext, String[] tokenTags,
     String[] lemmas, int index) {
     return disambiguate(
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
index 7ada773..5a2ff78 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
@@ -36,7 +36,6 @@ public class WordPOS {
   private List<String> stems;
   private final POS pos;
   private String posTag;
-  public boolean isTarget = false;
 
   public WordPOS(String word, String tag) throws IllegalArgumentException {
     if (word == null || tag == null) {
@@ -82,8 +81,7 @@ public class WordPOS {
     try {
       indexWord = WSDHelper.getDictionary().lookupIndexWord(pos, word);
       if (indexWord == null) {
-        WSDHelper
-            .print("NULL synset probably a POS tagger mistake ! :: [POS] : "
+        WSDHelper.print("NULL synset probably a POS tagger mistake ! :: [POS] 
: "
                 + pos.getLabel() + " [word] : " + word);
         return null;
       }
diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java 
b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
index 76e5c8a..1f5b2d2 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/ModelUtil.java
@@ -29,21 +29,21 @@ public class ModelUtil {
   public static Path writeModelToTmpDir(InputStream modelIn) throws 
IOException {
     Path tmpDir = Files.createTempDirectory("opennlp2");
 
-    ZipInputStream zis = new ZipInputStream(modelIn);
-    ZipEntry zipEntry = zis.getNextEntry();
-    while(zipEntry != null){
-      Path newFile = tmpDir.resolve(zipEntry.getName());
+    try (ZipInputStream zis = new ZipInputStream(modelIn)) {
+      ZipEntry zipEntry = zis.getNextEntry();
+      while(zipEntry != null){
+        Path newFile = tmpDir.resolve(zipEntry.getName());
 
-      Files.createDirectories(newFile.getParent());
-      Files.copy(zis, newFile);
+        Files.createDirectories(newFile.getParent());
+        Files.copy(zis, newFile);
 
-      // TODO: How to delete the tmp directory after we are done loading from 
it ?!
-      newFile.toFile().deleteOnExit();
+        // TODO: How to delete the tmp directory after we are done loading 
from it ?!
+        newFile.toFile().deleteOnExit();
 
-      zipEntry = zis.getNextEntry();
+        zipEntry = zis.getNextEntry();
+      }
+      zis.closeEntry();
     }
-    zis.closeEntry();
-    zis.close();
 
     return tmpDir;
   }
diff --git 
a/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java 
b/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java
index 4c7b906..0bdae56 100644
--- a/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java
+++ b/tf-ner-poc/src/test/java/org/apache/opennlp/namefinder/PredictTest.java
@@ -19,20 +19,20 @@
 
 package org.apache.opennlp.namefinder;
 
-import org.junit.jupiter.api.Disabled;
+import java.io.IOException;
+import java.nio.file.Path;
+
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
 
 import opennlp.tools.util.Span;
 
-import java.io.IOException;
-import java.nio.file.Path;
-
 class PredictTest {
 
-  @Test
-  @Disabled // TODO This test is not platform neutral and, for instance, fails 
with
-    //  "Cannot find TensorFlow native library for OS: darwin, architecture: 
aarch64"
-    //  We need JUnit 5 in the sandbox to circumvent this, so it can be run in 
supported environments
+  // Note: Atm, this test is not platform neutral and, for instance, fails with
+  //  "Cannot find TensorFlow native library for OS: darwin, architecture: 
aarch64"
+  // That's why it is disabled via the architecture system property.
+  @Test @DisabledIfSystemProperty(named = "os.arch", matches = "aarch64")
   void testFindTokens() throws IOException {
 
     // can be changed to File or InputStream

[opennlp-sandbox] 01/01: sanitize some TODOs and unhealthy code

Reply via email to