This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch OPENNLP-1589 in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 43e6d393fe615a5d56b46a755a5ccec9914b8a74 Author: Richard Zowalla <r...@apache.org> AuthorDate: Thu Jul 4 14:00:23 2024 +0200 OPENNLP-1589 - Use Arrays.equals(...) --- .../tools/namefind/TokenNameFinderFactory.java | 4 +-- .../util/featuregen/CachedFeatureGenerator.java | 13 ++++----- .../featuregen/CachedFeatureGeneratorTest.java | 34 ++++++++++++++++++---- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java index 6f31f0fe..da7277a4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java @@ -212,13 +212,13 @@ public class TokenNameFinderFactory extends BaseToolFactory { AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators(); if (featureGenerator == null) { - featureGenerator = new CachedFeatureGenerator( + featureGenerator = new CachedFeatureGenerator(new AggregatedFeatureGenerator( new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), new OutcomePriorFeatureGenerator(), new PreviousMapFeatureGenerator(), new BigramNameFeatureGenerator(), - new SentenceFeatureGenerator(true, false)); + new SentenceFeatureGenerator(true, false))); } return new DefaultNameContextGenerator(featureGenerator); diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java index e26ffbdd..560c1e4a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java @@ -19,6 +19,7 @@ package opennlp.tools.util.featuregen; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import opennlp.tools.util.Cache; @@ -37,15 +38,13 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator { private long numberOfCacheHits; private long numberOfCacheMisses; - @Deprecated - public CachedFeatureGenerator(AdaptiveFeatureGenerator... generators) { - this.generator = new AggregatedFeatureGenerator(generators); - contextsCache = new Cache<>(100); + public CachedFeatureGenerator(AdaptiveFeatureGenerator generator, int cacheSize) { + this.generator = generator; + contextsCache = new Cache<>(cacheSize); } public CachedFeatureGenerator(AdaptiveFeatureGenerator generator) { - this.generator = generator; - contextsCache = new Cache<>(100); + this(generator, 100); } @Override @@ -54,7 +53,7 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator { List<String> cacheFeatures; - if (tokens == prevTokens) { + if (Arrays.equals(prevTokens, tokens)) { cacheFeatures = contextsCache.get(index); if (cacheFeatures != null) { diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java index 0dcb10a8..c17549c3 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorTest.java @@ -48,6 +48,31 @@ public class CachedFeatureGeneratorTest { features = new ArrayList<>(); } + @Test + void testCachingOfRealWorldSentence() { + CachedFeatureGenerator generator = new CachedFeatureGenerator(identityGenerator); + final String[] sentence = "He belongs to Apache \n Software Foundation .".split(" "); + int testIndex = 0; + + // after this call features are cached for testIndex + generator.createFeatures(features, sentence, testIndex, null); + Assertions.assertEquals(1, generator.getNumberOfCacheMisses()); + Assertions.assertEquals(0, generator.getNumberOfCacheHits()); + + generator.createFeatures(features, sentence, testIndex, null); + Assertions.assertEquals(1, generator.getNumberOfCacheMisses()); + Assertions.assertEquals(1, generator.getNumberOfCacheHits()); + + generator.createFeatures(features, sentence, testIndex + 1, null); + Assertions.assertEquals(2, generator.getNumberOfCacheMisses()); + Assertions.assertEquals(1, generator.getNumberOfCacheHits()); + + generator.createFeatures(features, sentence, testIndex + 1, null); + Assertions.assertEquals(2, generator.getNumberOfCacheMisses()); + Assertions.assertEquals(2, generator.getNumberOfCacheHits()); + + } + /** * Tests if cache works for one sentence and two different token indexes. */ @@ -85,7 +110,7 @@ public class CachedFeatureGeneratorTest { int testIndex2 = testIndex + 1; - generator.createFeatures(features, Arrays.copyOf(testSentence1, testSentence1.length), testIndex2, null); + generator.createFeatures(features, Arrays.copyOf(testSentence1, testSentence1.length), testIndex2, null); Assertions.assertEquals(2, generator.getNumberOfCacheMisses()); Assertions.assertEquals(1, generator.getNumberOfCacheHits()); @@ -115,7 +140,7 @@ public class CachedFeatureGeneratorTest { features.clear(); // use another sentence but same index - generator.createFeatures(features, testSentence2, testIndex, null); + generator.createFeatures(features, Arrays.copyOf(testSentence2, testSentence2.length), testIndex, null); Assertions.assertEquals(2, generator.getNumberOfCacheMisses()); Assertions.assertEquals(0, generator.getNumberOfCacheHits()); @@ -127,10 +152,7 @@ public class CachedFeatureGeneratorTest { // check if features are really cached final String expectedToken = testSentence2[testIndex]; - - testSentence2[testIndex] = null; - - generator.createFeatures(features, testSentence2, testIndex, null); + generator.createFeatures(features, Arrays.copyOf(testSentence2, testSentence2.length), testIndex, null); Assertions.assertTrue(features.contains(expectedToken)); Assertions.assertEquals(1, features.size());