(opennlp) branch main updated: OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)

mawiesne Sun, 22 Dec 2024 22:17:17 -0800

This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git



The following commit(s) were added to refs/heads/main by this push:
     new ed2682cc OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)
ed2682cc is described below

commit ed2682ccdb9553970acf683fe0f724d5c57e7c9d
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon Dec 23 07:15:23 2024 +0100

    OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)
    
    - removes @Disabled from multiple cmdline execution tests
    - adjusts TokenizerTrainerTool to handle existing yet "empty" abb-dict 
files better
---
 .../cmdline/tokenizer/TokenizerTrainerTool.java    |  12 ++-
 .../tools/cmdline/TokenNameFinderToolTest.java     | 105 ++++++++++----------
 .../tokenizer/TokenizerTrainerToolTest.java        | 107 ++++++++++++---------
 opennlp-tools/src/test/resources/logback-test.xml  |   6 +-
 4 files changed, 131 insertions(+), 99 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
index f51b8c67..4f5389ab 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
@@ -21,6 +21,7 @@ import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 
 import opennlp.tools.cmdline.AbstractTrainerTool;
 import opennlp.tools.cmdline.CmdLineUtil;
@@ -33,6 +34,7 @@ import opennlp.tools.ml.TrainerFactory.TrainerType;
 import opennlp.tools.tokenize.TokenSample;
 import opennlp.tools.tokenize.TokenizerFactory;
 import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.model.ModelUtil;
 
@@ -53,9 +55,15 @@ public final class TokenizerTrainerTool
 
   static Dictionary loadDict(File f) throws IOException {
     Dictionary dict = null;
-    if (f != null) {
+    if (f != null && f.exists()) {
       CmdLineUtil.checkInputFile("abb dict", f);
-      dict = new Dictionary(new BufferedInputStream(new FileInputStream(f)));
+      try (InputStream in = new BufferedInputStream(new FileInputStream(f))) {
+        if (in.available() == 0) {
+          throw new InvalidFormatException("Encountered an empty dictionary 
file?!");
+        } else {
+          dict = new Dictionary(in);
+        }
+      }
     }
     return dict;
   }
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
index e8594c59..190fa9d9 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
@@ -19,19 +19,22 @@ package opennlp.tools.cmdline;
 
 import java.io.BufferedOutputStream;
 import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
+import ch.qos.logback.classic.Level;
+import ch.qos.logback.classic.Logger;
+import ch.qos.logback.classic.LoggerContext;
+import nl.altindag.log.LogCaptor;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
+import org.slf4j.LoggerFactory;
 
 import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
 import opennlp.tools.namefind.NameFinderME;
@@ -44,75 +47,80 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.TrainingParameters;
 
-public class TokenNameFinderToolTest {
-
-  @Test
-  //TODO OPENNLP-1447
-  @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
-          "We need to find a way to redirect log output (i.e. implement " +
-          "a custom log adapter and plug it in, if we want to do such tests.")
-  void run() throws IOException {
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
-    File model1 = trainModel();
+public class TokenNameFinderToolTest {
 
-    String[] args = new String[] {model1.getAbsolutePath()};
+  /*
+   * Programmatic change to debug log to ensure that we can see log messages to
+   * confirm no duplicate download is happening
+   */
+  @BeforeAll
+  public static void prepare() {
+    LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+    Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
+    logger.setLevel(Level.INFO);
+  }
 
-    final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
-    InputStream stream = new 
ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
+  /*
+   * Programmatic restore the default log level (= OFF) after the test
+   */
+  @AfterAll
+  public static void cleanup() {
+    LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+    Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
+    logger.setLevel(Level.OFF);
+  }
 
-    System.setIn(stream);
+  @Test
+  void run() throws IOException {
+    try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
+      File model1 = trainModel();
+      String[] args = new String[] {model1.getAbsolutePath()};
 
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    PrintStream ps = new PrintStream(baos);
-    System.setOut(ps);
+      final String in = "It is Stefanie Schmidt.\n";
+      InputStream stream = new 
ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
 
-    TokenNameFinderTool tool = new TokenNameFinderTool();
-    tool.run(args);
+      System.setIn(stream);
 
-    final String content = baos.toString(StandardCharsets.UTF_8);
-    Assertions.assertTrue(content.contains("It is <START:person> Stefanie 
Schmidt. <END>"));
+      TokenNameFinderTool tool = new TokenNameFinderTool();
+      tool.run(args);
 
-    Assertions.assertTrue(model1.delete());
+      assertEquals(1, logCaptor.getInfoLogs().size());
+      final String content = logCaptor.getInfoLogs().get(0);
+      logCaptor.clearLogs();
+      assertEquals("It is <START:person> Stefanie Schmidt. <END>", content);
+      assertTrue(model1.delete());
+    }
   }
 
   @Test
   void invalidModel() {
-
-    Assertions.assertThrows(TerminateToolException.class, () -> {
-
+    assertThrows(TerminateToolException.class, () -> {
       String[] args = new String[] {"invalidmodel.bin"};
-
       TokenNameFinderTool tool = new TokenNameFinderTool();
       tool.run(args);
 
     });
-
-
   }
 
   @Test
-  //TODO OPENNLP-1447
-  @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
-          "We need to find a way to redirect log output (i.e. implement " +
-          "a custom log adapter and plug it in, if we want to do such tests.")
   void usage() {
+    try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
+      String[] args = new String[] {};
 
-    String[] args = new String[] {};
-
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    PrintStream ps = new PrintStream(baos);
-    System.setOut(ps);
-
-    TokenNameFinderTool tool = new TokenNameFinderTool();
-    tool.run(args);
-
-    final String content = baos.toString(StandardCharsets.UTF_8);
-    Assertions.assertEquals(tool.getHelp(), content.trim());
+      TokenNameFinderTool tool = new TokenNameFinderTool();
+      tool.run(args);
 
+      assertEquals(1, logCaptor.getInfoLogs().size());
+      final String content = logCaptor.getInfoLogs().get(0);
+      assertEquals(tool.getHelp(), content.trim());
+    }
   }
 
   private File trainModel() throws IOException {
-
     ObjectStream<String> lineStream =
         new PlainTextByLineStream(new MockInputStreamFactory(
             new 
File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")),
@@ -123,7 +131,6 @@ public class TokenNameFinderToolTest {
     params.put(TrainingParameters.CUTOFF_PARAM, 1);
 
     TokenNameFinderModel model;
-
     TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();
 
     try (ObjectStream<NameSample> sampleStream = new 
NameSampleDataStream(lineStream)) {
@@ -132,12 +139,10 @@ public class TokenNameFinderToolTest {
     }
 
     File modelFile = Files.createTempFile("model", ".bin").toFile();
-
     try (OutputStream modelOut =
              new BufferedOutputStream(new FileOutputStream(modelFile))) {
       model.serialize(modelOut);
     }
-
     return modelFile;
   }
 
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
index 65993fb0..6059a2b1 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
@@ -18,33 +18,40 @@
 package opennlp.tools.cmdline.tokenizer;
 
 import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
+import ch.qos.logback.classic.Level;
+import ch.qos.logback.classic.Logger;
+import ch.qos.logback.classic.LoggerContext;
+import nl.altindag.log.LogCaptor;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
+import org.slf4j.LoggerFactory;
 
 import opennlp.tools.AbstractTempDirTest;
+import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.TerminateToolException;
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.util.InvalidFormatException;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
 /**
  * Tests for the {@link TokenizerTrainerTool} class.
  */
 public class TokenizerTrainerToolTest extends AbstractTempDirTest {
 
-  private TokenizerTrainerTool tokenizerTrainerTool;
-
   private final String sampleSuccessData =
       "Pierre Vinken<SPLIT>, 61 years old<SPLIT>, will join the board as a 
nonexecutive " +
           "director Nov. 29<SPLIT>.\n" +
@@ -54,10 +61,31 @@ public class TokenizerTrainerToolTest extends 
AbstractTempDirTest {
 
   private final String sampleFailureData = "It is Fail Test Case.\n\nNothing 
in this sentence.";
 
+  /*
+   * Programmatic change to debug log to ensure that we can see log messages to
+   * confirm no duplicate download is happening
+   */
+  @BeforeAll
+  public static void prepare() {
+    LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+    Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
+    logger.setLevel(Level.INFO);
+  }
+
+  /*
+   * Programmatic restore the default log level (= OFF) after the test
+   */
+  @AfterAll
+  public static void cleanup() {
+    LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+    Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
+    logger.setLevel(Level.OFF);
+  }
+
   @Test
   public void testGetShortDescription() {
-    tokenizerTrainerTool = new TokenizerTrainerTool();
-    Assertions.assertEquals("Trainer for the learnable tokenizer",
+    TokenizerTrainerTool tokenizerTrainerTool = new TokenizerTrainerTool();
+    assertEquals("Trainer for the learnable tokenizer",
             tokenizerTrainerTool.getShortDescription());
   }
 
@@ -65,44 +93,38 @@ public class TokenizerTrainerToolTest extends 
AbstractTempDirTest {
   public void testLoadDictHappyCase() throws IOException {
     File dictFile = new File("lang/ga/abb_GA.xml");
     Dictionary dict = TokenizerTrainerTool.loadDict(dictFile);
-    Assertions.assertNotNull(dict);
+    assertNotNull(dict);
   }
 
   @Test
   public void testLoadDictFailCase() {
-    Assertions.assertThrows(InvalidFormatException.class , () ->
+    assertThrows(InvalidFormatException.class , () ->
             TokenizerTrainerTool.loadDict(prepareDataFile("")));
   }
 
-  //TODO OPENNLP-1447
-  @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
-          "We need to find a way to redirect log output (i.e. implement " +
-          "a custom log adapter and plug it in, if we want to do such tests.")
+  @Test
   public void testTestRunHappyCase() throws IOException {
-    File model = tempDir.resolve("model-en.bin").toFile();
-
-    String[] args =
-        new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , 
"false" , "-lang" , "en" ,
-            "-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , 
"-encoding" , "UTF-8" };
-
-    InputStream stream = new 
ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
-    System.setIn(stream);
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    PrintStream ps = new PrintStream(baos);
-    System.setOut(ps);
-
-    tokenizerTrainerTool = new TokenizerTrainerTool();
-    tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
-
-    final String content = baos.toString(StandardCharsets.UTF_8);
-    Assertions.assertTrue(content.contains("Number of Event Tokens: 171"));
-    Assertions.assertTrue(model.delete());
+    try (LogCaptor logCaptor = LogCaptor.forClass(CmdLineUtil.class)) {
+      File model = tempDir.resolve("model-en.bin").toFile();
+  
+      String[] args =
+          new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , 
"false" , "-lang" , "en" ,
+              "-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , 
"-encoding" , "UTF-8" };
+  
+      InputStream stream = new 
ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
+      System.setIn(stream);
+  
+      TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
+      trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
+  
+      assertEquals(3, logCaptor.getInfoLogs().size());
+      final String content = logCaptor.getInfoLogs().get(2);
+      assertTrue(content.startsWith("Wrote tokenizer model to path:"));
+      assertTrue(model.delete());
+    }
   }
 
-  //TODO OPENNLP-1447
-  @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
-          "We need to find a way to redirect log output (i.e. implement " +
-          "a custom log adapter and plug it in, if we want to do such tests.")
+  @Test
   public void testTestRunExceptionCase() throws IOException {
     File model = tempDir.resolve("model-en.bin").toFile();
     model.deleteOnExit();
@@ -111,17 +133,10 @@ public class TokenizerTrainerToolTest extends 
AbstractTempDirTest {
         new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , 
"false" , "-lang" , "en" ,
             "-data" , String.valueOf(prepareDataFile(sampleFailureData)) , 
"-encoding" , "UTF-8" };
 
-    InputStream stream = new 
ByteArrayInputStream(sampleFailureData.getBytes(StandardCharsets.UTF_8));
-    System.setIn(stream);
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    PrintStream ps = new PrintStream(baos);
-    System.setOut(ps);
-
-    Assertions.assertThrows(TerminateToolException.class , () -> {
-      tokenizerTrainerTool = new TokenizerTrainerTool();
-      tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
+    assertThrows(TerminateToolException.class , () -> {
+      TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
+      trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
     });
-
   }
 
   // This is guaranteed to be deleted after the test finishes.
diff --git a/opennlp-tools/src/test/resources/logback-test.xml 
b/opennlp-tools/src/test/resources/logback-test.xml
index b3cbcf20..1baae291 100644
--- a/opennlp-tools/src/test/resources/logback-test.xml
+++ b/opennlp-tools/src/test/resources/logback-test.xml
@@ -23,12 +23,16 @@
 
     <appender name="consoleAppender" 
class="ch.qos.logback.core.ConsoleAppender">
         <encoder>
-            <pattern>%date{HH:mm:ss.SSS} [%thread] %-5level 
%class{36}.%method:%line - %msg%n</pattern>
+            <pattern>%date{HH:mm:ss.SSS} [%thread] %-4level 
%class{36}.%method:%line - %msg%n</pattern>
         </encoder>
     </appender>
 
     <logger name="opennlp" level="off"/>
 
+    <logger name="opennlp.tools.cmdline.namefind" level="off"/>
+    
+    <logger name="opennlp.tools.cmdline.CmdLineUtil" level="off"/>
+
     <root level="off">
         <appender-ref ref="consoleAppender" />
     </root>

(opennlp) branch main updated: OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)

Reply via email to