This is an automated email from the ASF dual-hosted git repository. dweiss pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/solr.git
commit fbc22720d519b09f388e72d1c09b0bee456c832e Author: Dawid Weiss <[email protected]> AuthorDate: Sun Jun 5 21:48:19 2022 +0200 Revert "Upgrade Carrot2 to 4.4.2 and HPPC to 0.9.1 (#884)" This reverts commit 48f1f8cbc9a494652244e143911af8443996fb59. --- solr/CHANGES.txt | 2 -- solr/licenses/carrot2-core-4.0.4.jar.sha1 | 1 + solr/licenses/carrot2-core-4.4.2.jar.sha1 | 1 - solr/licenses/hppc-0.8.2.jar.sha1 | 1 + solr/licenses/hppc-0.9.1.jar.sha1 | 1 - solr/modules/clustering/build.gradle | 1 - .../testCustomLanguageResources/english.label-filters.json | 9 --------- .../conf/testCustomLanguageResources/english.stoplabels.utf8 | 1 + .../conf/testCustomLanguageResources/english.stopwords.utf8 | 2 ++ .../testCustomLanguageResources/english.word-filters.json | 10 ---------- ...ClusteringComponentDistributedTest-testLingoAlgorithm.txt | 11 ++++++----- .../ClusteringComponentDistributedTest-testStcAlgorithm.txt | 1 - .../ClusteringComponentTest-testLingoAlgorithm.txt | 11 ++++++----- .../clustering/ClusteringComponentTest-testStcAlgorithm.txt | 3 +-- .../clustering/ClusteringComponentDistributedTest.java | 2 -- .../solr/handler/clustering/ClusteringComponentTest.java | 2 -- .../solr/handler/clustering/ResourceCheckAlgorithm.java | 12 +++++------- versions.lock | 4 ++-- versions.props | 4 ++-- 19 files changed, 27 insertions(+), 52 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index dd00dfbdc51..7227324bc55 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -48,8 +48,6 @@ Improvements * SOLR-16181: Initialize the LogWatcher earlier in CoreContainer#load() (janhoy) -* SOLR-16225: Upgrade dependencies (Carrot2, HPPC) (Dawid Weiss) - Optimizations --------------------- * SOLR-16120: Optimise hl.fl expansion. (Christine Poerschke, David Smiley, Mike Drob) diff --git a/solr/licenses/carrot2-core-4.0.4.jar.sha1 b/solr/licenses/carrot2-core-4.0.4.jar.sha1 new file mode 100644 index 00000000000..0d26e39096c --- /dev/null +++ b/solr/licenses/carrot2-core-4.0.4.jar.sha1 @@ -0,0 +1 @@ +fb60ab80cfd69abe6cad1939f24bd5210501b177 diff --git a/solr/licenses/carrot2-core-4.4.2.jar.sha1 b/solr/licenses/carrot2-core-4.4.2.jar.sha1 deleted file mode 100644 index 9bdf9a6c06e..00000000000 --- a/solr/licenses/carrot2-core-4.4.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -20f170227ddbbf1e012bf504a3ab21f5696f19d2 diff --git a/solr/licenses/hppc-0.8.2.jar.sha1 b/solr/licenses/hppc-0.8.2.jar.sha1 new file mode 100644 index 00000000000..a73358b9c66 --- /dev/null +++ b/solr/licenses/hppc-0.8.2.jar.sha1 @@ -0,0 +1 @@ +ccb3ef933ead6b5d766fa571582ddb9b447e48c4 diff --git a/solr/licenses/hppc-0.9.1.jar.sha1 b/solr/licenses/hppc-0.9.1.jar.sha1 deleted file mode 100644 index 5cedaaa6433..00000000000 --- a/solr/licenses/hppc-0.9.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4bf4c51e06aec600894d841c4c004566b20dd357 diff --git a/solr/modules/clustering/build.gradle b/solr/modules/clustering/build.gradle index 652a7d60528..fa0811dda7d 100644 --- a/solr/modules/clustering/build.gradle +++ b/solr/modules/clustering/build.gradle @@ -29,7 +29,6 @@ dependencies { implementation 'org.slf4j:slf4j-api' testImplementation project(':solr:test-framework') - testImplementation 'org.apache.lucene:lucene-test-framework' testImplementation 'com.carrotsearch.randomizedtesting:randomizedtesting-runner' testImplementation 'junit:junit' testImplementation 'org.hamcrest:hamcrest' diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json deleted file mode 100644 index 7c805464b2b..00000000000 --- a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "glob": [ - ], - "regexp": [ - "ba.+" - ], - "exact": [ - ] -} diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8 b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8 new file mode 100644 index 00000000000..1471282e6f8 --- /dev/null +++ b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8 @@ -0,0 +1 @@ +ba.+ \ No newline at end of file diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8 b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8 new file mode 100644 index 00000000000..3bd1f0e2974 --- /dev/null +++ b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8 @@ -0,0 +1,2 @@ +foo +bar diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json deleted file mode 100644 index 7746028055e..00000000000 --- a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "exact": [ - "foo", - "bar" - ], - "glob": [ - ], - "regexp": [ - ] -} diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt index 3f35f30ecf5..ee8b3842d0d 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt @@ -1,16 +1,17 @@ - Knowledge Discovery [6] +- Patterns [6] - Data Mining Applications [5] - Statistical Analysis [4] -- Analysis Techniques [3] - Computer [3] +- Creating [3] - Data Mining Solutions [3] - Known as Data Mining [3] - Text Mining [3] -- Courses [2] -- Extraction of Hidden Predictive Information [2] +- Databases KDD [2] +- Extraction of Hidden Predictive [2] - Information from Large [2] -- Machine Learning [2] -- Neural Networks [2] - Open [2] - Powers [2] - Searching [2] +- Tools [2] +- Other topics [1] \ No newline at end of file diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt index aef40034692..1ee2114eeb2 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt @@ -7,5 +7,4 @@ - Businesses [4] - Predictive [4] - Process [4] -- Hidden Predictive Information [2] - Other topics [2] diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt index 3f35f30ecf5..107b60f82f2 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt @@ -1,16 +1,17 @@ - Knowledge Discovery [6] +- Patterns [6] - Data Mining Applications [5] - Statistical Analysis [4] -- Analysis Techniques [3] - Computer [3] +- Creating [3] - Data Mining Solutions [3] - Known as Data Mining [3] - Text Mining [3] -- Courses [2] -- Extraction of Hidden Predictive Information [2] +- Databases KDD [2] +- Extraction of Hidden Predictive [2] - Information from Large [2] -- Machine Learning [2] -- Neural Networks [2] - Open [2] - Powers [2] - Searching [2] +- Tools [2] +- Other topics [1] diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt index a6d985098e2..1ee2114eeb2 100644 --- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt +++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt @@ -4,8 +4,7 @@ - Analysis [5] - Applications [5] - Software [5] -- Business [4] +- Businesses [4] - Predictive [4] - Process [4] -- Hidden Predictive Information [2] - Other topics [2] diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java index 743bbfa2b86..1a9398a6ba5 100644 --- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java +++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.List; import java.util.function.Consumer; import java.util.stream.Collectors; -import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.response.Cluster; @@ -73,7 +72,6 @@ public class ClusteringComponentDistributedTest extends BaseDistributedSearchTes })); } - @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/carrot2/carrot2/issues/149") @Test @ShardsFixed(num = 2) public void testStcAlgorithm() throws Exception { diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java index 8506f7f1726..0c6c968444b 100644 --- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java +++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java @@ -31,7 +31,6 @@ import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; -import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.client.solrj.response.ClusteringResponse; import org.apache.solr.common.SolrDocument; @@ -87,7 +86,6 @@ public class ClusteringComponentTest extends SolrTestCaseJ4 { compareToExpected(clusters("lingo", QUERY_TESTSET_SAMPLE_DOCUMENTS)); } - @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/carrot2/carrot2/issues/149") @Test public void testStcAlgorithm() throws Exception { compareToExpected(clusters("stc", QUERY_TESTSET_SAMPLE_DOCUMENTS)); diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java index 16c275951c7..a7bc78c07cb 100644 --- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java +++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java @@ -28,9 +28,8 @@ import org.carrot2.attrs.AttrString; import org.carrot2.clustering.Cluster; import org.carrot2.clustering.ClusteringAlgorithm; import org.carrot2.clustering.Document; -import org.carrot2.language.LabelFilter; import org.carrot2.language.LanguageComponents; -import org.carrot2.language.StopwordFilter; +import org.carrot2.language.LexicalData; /** * Creates synthetic clusters with diagnostics of {@link LanguageComponents} passed to the @@ -43,7 +42,7 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith @Override public Set<Class<?>> requiredLanguageComponents() { - return Set.of(StopwordFilter.class, LabelFilter.class); + return Set.of(LexicalData.class); } @Override @@ -58,8 +57,7 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith cluster = new Cluster<>(); clusters.add(cluster); - StopwordFilter stopwordFilter = languageComponents.get(StopwordFilter.class); - LabelFilter labelFilter = languageComponents.get(LabelFilter.class); + LexicalData lexicalData = languageComponents.get(LexicalData.class); cluster.addLabel( Arrays.stream(text.get().trim().split("[\\s]+")) .map( @@ -68,8 +66,8 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith Locale.ROOT, "%s[%s, %s]", term, - stopwordFilter.test(term) ? "-" : "ignoredWord", - labelFilter.test(term) ? "-" : "ignoredLabel")) + lexicalData.ignoreWord(term) ? "ignoredWord" : "-", + lexicalData.ignoreLabel(term) ? "ignoredLabel" : "-")) .collect(Collectors.joining(" "))); return clusters; diff --git a/versions.lock b/versions.lock index ec3866498d7..ceeb6b1f9a9 100644 --- a/versions.lock +++ b/versions.lock @@ -1,6 +1,6 @@ # Run ./gradlew --write-locks to regenerate this file com.beust:jcommander:1.82 (2 constraints: 2b123714) -com.carrotsearch:hppc:0.9.1 (2 constraints: ac0fc8a6) +com.carrotsearch:hppc:0.8.2 (2 constraints: ad0fc5a6) com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.7.9 (2 constraints: da1558e3) com.cybozu.labs:langdetect:1.1-20120112 (1 constraints: 5c066d5e) com.epam:parso:2.0.14 (1 constraints: 8e0c750e) @@ -210,7 +210,7 @@ org.bouncycastle:bcpkix-jdk15on:1.70 (2 constraints: ce1b11b3) org.bouncycastle:bcprov-jdk15on:1.70 (4 constraints: 1f34ee12) org.bouncycastle:bcutil-jdk15on:1.70 (2 constraints: 961ad454) org.brotli:dec:0.1.2 (1 constraints: 5a0ce101) -org.carrot2:carrot2-core:4.4.2 (1 constraints: 0c050d36) +org.carrot2:carrot2-core:4.0.4 (1 constraints: 0a050336) org.carrot2:morfologik-fsa:2.1.8 (1 constraints: da0d9b36) org.carrot2:morfologik-polish:2.1.8 (1 constraints: d212531e) org.carrot2:morfologik-stemming:2.1.8 (2 constraints: d61f8b00) diff --git a/versions.props b/versions.props index ecf354b9477..bdaaf42b5dd 100644 --- a/versions.props +++ b/versions.props @@ -1,6 +1,6 @@ com.adobe.testing:s3mock-junit4=2.1.34 com.carrotsearch.randomizedtesting:*=2.7.9 -com.carrotsearch:hppc=0.9.1 +com.carrotsearch:hppc=0.8.2 com.cybozu.labs:langdetect=1.1-20120112 com.fasterxml.jackson:jackson-bom=2.13.3 com.fasterxml.woodstox:woodstox-core=6.2.8 @@ -53,7 +53,7 @@ org.apache.opennlp:opennlp-tools=1.9.4 org.apache.tika:*=1.28.1 org.apache.zookeeper:*=3.7.0 org.bitbucket.b_c:jose4j=0.7.9 -org.carrot2:carrot2-core=4.4.2 +org.carrot2:carrot2-core=4.0.4 org.codehaus.woodstox:stax2-api=4.2.1 org.eclipse.jetty*:*=9.4.44.v20210927 org.hamcrest:*=2.2
