This is an automated email from the ASF dual-hosted git repository. dweiss pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/solr.git
commit 4ac15e221bbd0a7ba747a61907f0ce1bc5af876d Author: Dawid Weiss <[email protected]> AuthorDate: Sun Jun 5 21:48:39 2022 +0200 SOLR-16225: Upgrade Carrot2 to 4.4.2 and HPPC to 0.9.1 (#884) --- solr/CHANGES.txt | 2 ++ solr/licenses/carrot2-core-4.0.4.jar.sha1 | 1 - solr/licenses/carrot2-core-4.4.2.jar.sha1 | 1 + solr/licenses/hppc-0.8.2.jar.sha1 | 1 - solr/licenses/hppc-0.9.1.jar.sha1 | 1 + solr/modules/clustering/build.gradle | 1 + .../testCustomLanguageResources/english.label-filters.json | 9 +++++++++ .../conf/testCustomLanguageResources/english.stoplabels.utf8 | 1 - .../conf/testCustomLanguageResources/english.stopwords.utf8 | 2 -- .../testCustomLanguageResources/english.word-filters.json | 10 ++++++++++ ...ClusteringComponentDistributedTest-testLingoAlgorithm.txt | 11 +++++------ .../ClusteringComponentDistributedTest-testStcAlgorithm.txt | 1 + .../ClusteringComponentTest-testLingoAlgorithm.txt | 11 +++++------ .../clustering/ClusteringComponentTest-testStcAlgorithm.txt | 3 ++- .../clustering/ClusteringComponentDistributedTest.java | 2 ++ .../solr/handler/clustering/ClusteringComponentTest.java | 2 ++ .../solr/handler/clustering/ResourceCheckAlgorithm.java | 12 +++++++----- versions.lock | 4 ++-- versions.props | 4 ++-- 19 files changed, 52 insertions(+), 27 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 7227324bc55..dd00dfbdc51 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -48,6 +48,8 @@ Improvements * SOLR-16181: Initialize the LogWatcher earlier in CoreContainer#load() (janhoy) +* SOLR-16225: Upgrade dependencies (Carrot2, HPPC) (Dawid Weiss) + Optimizations --------------------- * SOLR-16120: Optimise hl.fl expansion. (Christine Poerschke, David Smiley, Mike Drob) diff --git a/solr/licenses/carrot2-core-4.0.4.jar.sha1 b/solr/licenses/carrot2-core-4.0.4.jar.sha1 deleted file mode 100644 index 0d26e39096c..00000000000 --- a/solr/licenses/carrot2-core-4.0.4.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fb60ab80cfd69abe6cad1939f24bd5210501b177 diff --git a/solr/licenses/carrot2-core-4.4.2.jar.sha1 b/solr/licenses/carrot2-core-4.4.2.jar.sha1 new file mode 100644 index 00000000000..9bdf9a6c06e --- /dev/null +++ b/solr/licenses/carrot2-core-4.4.2.jar.sha1 @@ -0,0 +1 @@ +20f170227ddbbf1e012bf504a3ab21f5696f19d2 diff --git a/solr/licenses/hppc-0.8.2.jar.sha1 b/solr/licenses/hppc-0.8.2.jar.sha1 deleted file mode 100644 index a73358b9c66..00000000000 --- a/solr/licenses/hppc-0.8.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ccb3ef933ead6b5d766fa571582ddb9b447e48c4 diff --git a/solr/licenses/hppc-0.9.1.jar.sha1 b/solr/licenses/hppc-0.9.1.jar.sha1 new file mode 100644 index 00000000000..5cedaaa6433 --- /dev/null +++ b/solr/licenses/hppc-0.9.1.jar.sha1 @@ -0,0 +1 @@ +4bf4c51e06aec600894d841c4c004566b20dd357 diff --git a/solr/modules/clustering/build.gradle b/solr/modules/clustering/build.gradle index fa0811dda7d..652a7d60528 100644 --- a/solr/modules/clustering/build.gradle +++ b/solr/modules/clustering/build.gradle @@ -29,6 +29,7 @@ dependencies { implementation 'org.slf4j:slf4j-api' testImplementation project(':solr:test-framework') + testImplementation 'org.apache.lucene:lucene-test-framework' testImplementation 'com.carrotsearch.randomizedtesting:randomizedtesting-runner' testImplementation 'junit:junit' testImplementation 'org.hamcrest:hamcrest' diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json new file mode 100644 index 00000000000..7c805464b2b --- /dev/null +++ b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json @@ -0,0 +1,9 @@ +{ + "glob": [ + ], + "regexp": [ + "ba.+" + ], + "exact": [ + ] +} diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8 b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8 deleted file mode 100644 index 1471282e6f8..00000000000 --- a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8 +++ /dev/null @@ -1 +0,0 @@ -ba.+ \ No newline at end of file diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8 b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8 deleted file mode 100644 index 3bd1f0e2974..00000000000 --- a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8 +++ /dev/null @@ -1,2 +0,0 @@ -foo -bar diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json new file mode 100644 index 00000000000..7746028055e --- /dev/null +++ b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json @@ -0,0 +1,10 @@ +{ + "exact": [ + "foo", + "bar" + ], + "glob": [ + ], + "regexp": [ + ] +} diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt index ee8b3842d0d..3f35f30ecf5 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt @@ -1,17 +1,16 @@ - Knowledge Discovery [6] -- Patterns [6] - Data Mining Applications [5] - Statistical Analysis [4] +- Analysis Techniques [3] - Computer [3] -- Creating [3] - Data Mining Solutions [3] - Known as Data Mining [3] - Text Mining [3] -- Databases KDD [2] -- Extraction of Hidden Predictive [2] +- Courses [2] +- Extraction of Hidden Predictive Information [2] - Information from Large [2] +- Machine Learning [2] +- Neural Networks [2] - Open [2] - Powers [2] - Searching [2] -- Tools [2] -- Other topics [1] \ No newline at end of file diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt index 1ee2114eeb2..aef40034692 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt @@ -7,4 +7,5 @@ - Businesses [4] - Predictive [4] - Process [4] +- Hidden Predictive Information [2] - Other topics [2] diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt index 107b60f82f2..3f35f30ecf5 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt @@ -1,17 +1,16 @@ - Knowledge Discovery [6] -- Patterns [6] - Data Mining Applications [5] - Statistical Analysis [4] +- Analysis Techniques [3] - Computer [3] -- Creating [3] - Data Mining Solutions [3] - Known as Data Mining [3] - Text Mining [3] -- Databases KDD [2] -- Extraction of Hidden Predictive [2] +- Courses [2] +- Extraction of Hidden Predictive Information [2] - Information from Large [2] +- Machine Learning [2] +- Neural Networks [2] - Open [2] - Powers [2] - Searching [2] -- Tools [2] -- Other topics [1] diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt index 1ee2114eeb2..a6d985098e2 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt @@ -4,7 +4,8 @@ - Analysis [5] - Applications [5] - Software [5] -- Businesses [4] +- Business [4] - Predictive [4] - Process [4] +- Hidden Predictive Information [2] - Other topics [2] diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java index 1a9398a6ba5..743bbfa2b86 100644 --- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java +++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.List; import java.util.function.Consumer; import java.util.stream.Collectors; +import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.response.Cluster; @@ -72,6 +73,7 @@ public class ClusteringComponentDistributedTest extends BaseDistributedSearchTes })); } + @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/carrot2/carrot2/issues/149") @Test @ShardsFixed(num = 2) public void testStcAlgorithm() throws Exception { diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java index 0c6c968444b..8506f7f1726 100644 --- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java +++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java @@ -31,6 +31,7 @@ import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; +import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.client.solrj.response.ClusteringResponse; import org.apache.solr.common.SolrDocument; @@ -86,6 +87,7 @@ public class ClusteringComponentTest extends SolrTestCaseJ4 { compareToExpected(clusters("lingo", QUERY_TESTSET_SAMPLE_DOCUMENTS)); } + @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/carrot2/carrot2/issues/149") @Test public void testStcAlgorithm() throws Exception { compareToExpected(clusters("stc", QUERY_TESTSET_SAMPLE_DOCUMENTS)); diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java index a7bc78c07cb..16c275951c7 100644 --- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java +++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java @@ -28,8 +28,9 @@ import org.carrot2.attrs.AttrString; import org.carrot2.clustering.Cluster; import org.carrot2.clustering.ClusteringAlgorithm; import org.carrot2.clustering.Document; +import org.carrot2.language.LabelFilter; import org.carrot2.language.LanguageComponents; -import org.carrot2.language.LexicalData; +import org.carrot2.language.StopwordFilter; /** * Creates synthetic clusters with diagnostics of {@link LanguageComponents} passed to the @@ -42,7 +43,7 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith @Override public Set<Class<?>> requiredLanguageComponents() { - return Set.of(LexicalData.class); + return Set.of(StopwordFilter.class, LabelFilter.class); } @Override @@ -57,7 +58,8 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith cluster = new Cluster<>(); clusters.add(cluster); - LexicalData lexicalData = languageComponents.get(LexicalData.class); + StopwordFilter stopwordFilter = languageComponents.get(StopwordFilter.class); + LabelFilter labelFilter = languageComponents.get(LabelFilter.class); cluster.addLabel( Arrays.stream(text.get().trim().split("[\\s]+")) .map( @@ -66,8 +68,8 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith Locale.ROOT, "%s[%s, %s]", term, - lexicalData.ignoreWord(term) ? "ignoredWord" : "-", - lexicalData.ignoreLabel(term) ? "ignoredLabel" : "-")) + stopwordFilter.test(term) ? "-" : "ignoredWord", + labelFilter.test(term) ? "-" : "ignoredLabel")) .collect(Collectors.joining(" "))); return clusters; diff --git a/versions.lock b/versions.lock index ceeb6b1f9a9..ec3866498d7 100644 --- a/versions.lock +++ b/versions.lock @@ -1,6 +1,6 @@ # Run ./gradlew --write-locks to regenerate this file com.beust:jcommander:1.82 (2 constraints: 2b123714) -com.carrotsearch:hppc:0.8.2 (2 constraints: ad0fc5a6) +com.carrotsearch:hppc:0.9.1 (2 constraints: ac0fc8a6) com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.7.9 (2 constraints: da1558e3) com.cybozu.labs:langdetect:1.1-20120112 (1 constraints: 5c066d5e) com.epam:parso:2.0.14 (1 constraints: 8e0c750e) @@ -210,7 +210,7 @@ org.bouncycastle:bcpkix-jdk15on:1.70 (2 constraints: ce1b11b3) org.bouncycastle:bcprov-jdk15on:1.70 (4 constraints: 1f34ee12) org.bouncycastle:bcutil-jdk15on:1.70 (2 constraints: 961ad454) org.brotli:dec:0.1.2 (1 constraints: 5a0ce101) -org.carrot2:carrot2-core:4.0.4 (1 constraints: 0a050336) +org.carrot2:carrot2-core:4.4.2 (1 constraints: 0c050d36) org.carrot2:morfologik-fsa:2.1.8 (1 constraints: da0d9b36) org.carrot2:morfologik-polish:2.1.8 (1 constraints: d212531e) org.carrot2:morfologik-stemming:2.1.8 (2 constraints: d61f8b00) diff --git a/versions.props b/versions.props index bdaaf42b5dd..ecf354b9477 100644 --- a/versions.props +++ b/versions.props @@ -1,6 +1,6 @@ com.adobe.testing:s3mock-junit4=2.1.34 com.carrotsearch.randomizedtesting:*=2.7.9 -com.carrotsearch:hppc=0.8.2 +com.carrotsearch:hppc=0.9.1 com.cybozu.labs:langdetect=1.1-20120112 com.fasterxml.jackson:jackson-bom=2.13.3 com.fasterxml.woodstox:woodstox-core=6.2.8 @@ -53,7 +53,7 @@ org.apache.opennlp:opennlp-tools=1.9.4 org.apache.tika:*=1.28.1 org.apache.zookeeper:*=3.7.0 org.bitbucket.b_c:jose4j=0.7.9 -org.carrot2:carrot2-core=4.0.4 +org.carrot2:carrot2-core=4.4.2 org.codehaus.woodstox:stax2-api=4.2.1 org.eclipse.jetty*:*=9.4.44.v20210927 org.hamcrest:*=2.2
