This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 8d9c77fd1 NUTCH-2999 -- upgrade lucene to latest 8.x throughout new 39d59f42b Merge pull request #771 from tballison/NUTCH-2999 8d9c77fd1 is described below commit 8d9c77fd1b044f7c8fc51b70e34321cb9260cfbb Author: tballison <talli...@apache.org> AuthorDate: Wed Aug 30 14:34:48 2023 -0400 NUTCH-2999 -- upgrade lucene to latest 8.x throughout --- src/plugin/indexer-elastic/ivy.xml | 15 ++++ src/plugin/indexer-elastic/plugin.xml | 89 +++++++++++----------- .../howto_upgrade_opensearch.txt | 33 ++++++++ src/plugin/indexer-opensearch-1x/ivy.xml | 15 ++++ src/plugin/indexer-opensearch-1x/plugin.xml | 88 ++++++++++----------- 5 files changed, 152 insertions(+), 88 deletions(-) diff --git a/src/plugin/indexer-elastic/ivy.xml b/src/plugin/indexer-elastic/ivy.xml index abdcceae2..e5cdfdf65 100644 --- a/src/plugin/indexer-elastic/ivy.xml +++ b/src/plugin/indexer-elastic/ivy.xml @@ -40,7 +40,22 @@ <exclude org="commons-logging" name="commons-logging" /> <exclude org="com.tdunning" name="t-digest" /> <exclude org="org.apache.logging.log4j" name="log4j-api" /> + <exclude org="org.apache.lucene" name="*"/> </dependency> + <dependency org="org.apache.lucene" name="lucene-analyzers-common" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-backward-codecs" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-core" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-grouping" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-highlighter" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-join" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-memory" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-misc" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-queries" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-queryparser" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-sandbox" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-spatial-extras" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-spatial3d" rev="8.11.2"/> + <dependency org="org.apache.lucene" name="lucene-suggest" rev="8.11.2"/> </dependencies> </ivy-module> diff --git a/src/plugin/indexer-elastic/plugin.xml b/src/plugin/indexer-elastic/plugin.xml index 679979d32..fc3723a60 100644 --- a/src/plugin/indexer-elastic/plugin.xml +++ b/src/plugin/indexer-elastic/plugin.xml @@ -22,50 +22,51 @@ </library> <!-- Elastic Rest Client Dependencies --> <!-- end of Elastic Rest Client dependencies --> - <library name="HdrHistogram-2.1.9.jar" /> - <library name="aggs-matrix-stats-client-7.13.2.jar" /> - <library name="compiler-0.9.6.jar" /> - <library name="elasticsearch-7.13.2.jar" /> - <library name="elasticsearch-cli-7.13.2.jar" /> - <library name="elasticsearch-core-7.13.2.jar" /> - <library name="elasticsearch-geo-7.13.2.jar" /> - <library name="elasticsearch-plugin-classloader-7.13.2.jar" /> - <library name="elasticsearch-rest-client-7.13.2.jar" /> - <library name="elasticsearch-rest-high-level-client-7.13.2.jar" /> - <library name="elasticsearch-secure-sm-7.13.2.jar" /> - <library name="elasticsearch-x-content-7.13.2.jar" /> - <library name="hppc-0.8.1.jar" /> - <library name="httpasyncclient-4.1.4.jar" /> - <library name="httpclient-4.5.10.jar" /> - <library name="httpcore-4.4.12.jar" /> - <library name="httpcore-nio-4.4.12.jar" /> - <library name="jackson-core-2.10.4.jar" /> - <library name="jackson-dataformat-cbor-2.10.4.jar" /> - <library name="jackson-dataformat-smile-2.10.4.jar" /> - <library name="jackson-dataformat-yaml-2.10.4.jar" /> - <library name="jna-5.7.0-1.jar" /> - <library name="joda-time-2.10.10.jar" /> - <library name="jopt-simple-5.0.2.jar" /> - <library name="lang-mustache-client-7.13.2.jar" /> - <library name="log4j-api-2.11.1.jar" /> - <library name="lucene-analyzers-common-8.11.2.jar" /> - <library name="lucene-backward-codecs-8.11.2.jar" /> - <library name="lucene-core-8.11.2.jar" /> - <library name="lucene-grouping-8.11.2.jar" /> - <library name="lucene-highlighter-8.11.2.jar" /> - <library name="lucene-join-8.11.2.jar" /> - <library name="lucene-memory-8.11.2.jar" /> - <library name="lucene-misc-8.11.2.jar" /> - <library name="lucene-queries-8.11.2.jar" /> - <library name="lucene-queryparser-8.11.2.jar" /> - <library name="lucene-sandbox-8.11.2.jar" /> - <library name="lucene-spatial-extras-8.11.2.jar" /> - <library name="lucene-spatial3d-8.11.2.jar" /> - <library name="lucene-suggest-8.11.2.jar" /> - <library name="mapper-extras-client-7.13.2.jar" /> - <library name="parent-join-client-7.13.2.jar" /> - <library name="rank-eval-client-7.13.2.jar" /> - <library name="snakeyaml-1.26.jar" /> + <library name="HdrHistogram-2.1.9.jar"/> + <library name="aggs-matrix-stats-client-7.13.2.jar"/> + <library name="compiler-0.9.6.jar"/> + <library name="elasticsearch-7.13.2.jar"/> + <library name="elasticsearch-cli-7.13.2.jar"/> + <library name="elasticsearch-core-7.13.2.jar"/> + <library name="elasticsearch-geo-7.13.2.jar"/> + <library name="elasticsearch-plugin-classloader-7.13.2.jar"/> + <library name="elasticsearch-rest-client-7.13.2.jar"/> + <library name="elasticsearch-rest-high-level-client-7.13.2.jar"/> + <library name="elasticsearch-secure-sm-7.13.2.jar"/> + <library name="elasticsearch-x-content-7.13.2.jar"/> + <library name="hppc-0.8.1.jar"/> + <library name="httpasyncclient-4.1.4.jar"/> + <library name="httpclient-4.5.10.jar"/> + <library name="httpcore-4.4.12.jar"/> + <library name="httpcore-nio-4.4.12.jar"/> + <library name="jackson-core-2.10.4.jar"/> + <library name="jackson-dataformat-cbor-2.10.4.jar"/> + <library name="jackson-dataformat-smile-2.10.4.jar"/> + <library name="jackson-dataformat-yaml-2.10.4.jar"/> + <library name="jna-5.7.0-1.jar"/> + <library name="joda-time-2.10.10.jar"/> + <library name="jopt-simple-5.0.2.jar"/> + <library name="lang-mustache-client-7.13.2.jar"/> + <library name="lucene-analyzers-common-8.11.2.jar"/> + <library name="lucene-backward-codecs-8.11.2.jar"/> + <library name="lucene-core-8.11.2.jar"/> + <library name="lucene-grouping-8.11.2.jar"/> + <library name="lucene-highlighter-8.11.2.jar"/> + <library name="lucene-join-8.11.2.jar"/> + <library name="lucene-memory-8.11.2.jar"/> + <library name="lucene-misc-8.11.2.jar"/> + <library name="lucene-queries-8.11.2.jar"/> + <library name="lucene-queryparser-8.11.2.jar"/> + <library name="lucene-sandbox-8.11.2.jar"/> + <library name="lucene-spatial-extras-8.11.2.jar"/> + <library name="lucene-spatial3d-8.11.2.jar"/> + <library name="lucene-suggest-8.11.2.jar"/> + <library name="mapper-extras-client-7.13.2.jar"/> + <library name="parent-join-client-7.13.2.jar"/> + <library name="rank-eval-client-7.13.2.jar"/> + <library name="s2-geometry-library-java-1.0.0.jar"/> + <library name="snakeyaml-1.26.jar"/> + <library name="spatial4j-0.7.jar"/> </runtime> <requires> <import plugin="nutch-extensionpoints" /> diff --git a/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt b/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt new file mode 100644 index 000000000..072590044 --- /dev/null +++ b/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt @@ -0,0 +1,33 @@ +1. Upgrade OpenSearch dependency in src/plugin/indexer-opensearch-1x/ivy.xml + +2. Upgrade the OpenSearch specific dependencies in src/plugin/indexer-opensearch-1x/plugin.xml + To get the list of dependencies and their versions execute: + $ cd src/plugin/indexer-opensearch-1x/ + $ ant -f ./build-ivy.xml + $ ls lib | sed 's/^/ <library name="/g' | sed 's/$/"\/>/g' + + In the plugin.xml replace all lines between + <!-- OpenSearch Rest Client dependencies --> + and + <!-- end of OpenSearch Rest Client dependencies --> + with the output of the command above. + +4. (Optionally) remove overlapping dependencies between indexer-opensearch-1x and Nutch core dependencies: + - check for libs present both in + build/lib + and + build/plugins/indexer-opensearch-1x/ + (eventually with different versions) + - duplicated libs can be added to the exclusions of transitive dependencies in + build/plugins/indexer-opensearch-1x/ivy.xml + - but it should be made sure that the library versions in ivy/ivy.xml correspend to + those required by Tika + +5. Remove the locally "installed" dependencies in src/plugin/indexer-opensearch-1x/lib/: + + $ rm -rf lib/ + +6. Build Nutch and run all unit tests: + + $ cd ../../../ + $ ant clean runtime test \ No newline at end of file diff --git a/src/plugin/indexer-opensearch-1x/ivy.xml b/src/plugin/indexer-opensearch-1x/ivy.xml index 1505ad3c8..ae5d91e41 100644 --- a/src/plugin/indexer-opensearch-1x/ivy.xml +++ b/src/plugin/indexer-opensearch-1x/ivy.xml @@ -40,7 +40,22 @@ <exclude org="commons-logging" name="commons-logging" /> <exclude org="com.tdunning" name="t-digest" /> <exclude org="org.apache.logging.log4j" name="log4j-api" /> + <exclude org="org.apache.lucene" name="*" /> </dependency> + <dependency org="org.apache.lucene" name="lucene-analyzers-common" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-backward-codecs" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-core" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-grouping" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-highlighter" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-join" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-memory" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-misc" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-queries" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-queryparser" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-sandbox" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-spatial-extras" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-spatial3d" rev="8.11.2" /> + <dependency org="org.apache.lucene" name="lucene-suggest" rev="8.11.2" /> </dependencies> </ivy-module> diff --git a/src/plugin/indexer-opensearch-1x/plugin.xml b/src/plugin/indexer-opensearch-1x/plugin.xml index e1dde463d..ee0d45dc2 100644 --- a/src/plugin/indexer-opensearch-1x/plugin.xml +++ b/src/plugin/indexer-opensearch-1x/plugin.xml @@ -22,50 +22,50 @@ </library> <!-- OpenSearch Rest Client Dependencies --> <!-- end of OpenSearch Rest Client dependencies --> - <library name="HdrHistogram-2.1.9.jar" /> - <library name="aggs-matrix-stats-client-1.3.8.jar" /> - <library name="compiler-0.9.10.jar" /> - <library name="opensearch-1.3.8.jar" /> - <library name="opensearch-cli-1.3.8.jar" /> - <library name="opensearch-core-1.3.8.jar" /> - <library name="opensearch-geo-1.3.8.jar" /> - <library name="opensearch-plugin-classloader-1.3.8.jar" /> - <library name="opensearch-rest-client-1.3.8.jar" /> - <library name="opensearch-rest-high-level-client-1.3.8.jar" /> - <library name="opensearch-secure-sm-1.3.8.jar" /> - <library name="opensearch-x-content-1.3.8.jar" /> - <library name="hppc-0.8.1.jar" /> - <library name="httpasyncclient-4.1.4.jar" /> - <library name="httpclient-4.5.13.jar" /> - <library name="httpcore-4.4.13.jar" /> - <library name="httpcore-nio-4.4.14.jar" /> - <library name="jackson-core-2.12.7.jar" /> - <library name="jackson-dataformat-cbor-2.12.7.jar" /> - <library name="jackson-dataformat-smile-2.12.7.jar" /> - <library name="jackson-dataformat-yaml-2.12.7.jar" /> - <library name="jna-5.5.0.jar" /> - <library name="joda-time-2.10.12.jar" /> - <library name="jopt-simple-5.0.4.jar" /> - <library name="lang-mustache-client-1.3.8.jar" /> - <library name="log4j-api-2.17.1.jar" /> - <library name="lucene-analyzers-common-8.11.2.jar" /> - <library name="lucene-backward-codecs-8.11.2.jar" /> - <library name="lucene-core-8.11.2.jar" /> - <library name="lucene-grouping-8.11.2.jar" /> - <library name="lucene-highlighter-8.11.2.jar" /> - <library name="lucene-join-8.11.2.jar" /> - <library name="lucene-memory-8.11.2.jar" /> - <library name="lucene-misc-8.11.2.jar" /> - <library name="lucene-queries-8.11.2.jar" /> - <library name="lucene-queryparser-8.11.2.jar" /> - <library name="lucene-sandbox-8.11.2.jar" /> - <library name="lucene-spatial-extras-8.11.2.jar" /> - <library name="lucene-spatial3d-8.11.2.jar" /> - <library name="lucene-suggest-8.11.2.jar" /> - <library name="mapper-extras-client-1.3.8.jar" /> - <library name="parent-join-client-1.3.8.jar" /> - <library name="rank-eval-client-1.3.8.jar" /> - <library name="snakeyaml-1.32.jar" /> + <library name="HdrHistogram-2.1.9.jar"/> + <library name="aggs-matrix-stats-client-1.3.8.jar"/> + <library name="compiler-0.9.10.jar"/> + <library name="hppc-0.8.1.jar"/> + <library name="httpasyncclient-4.1.4.jar"/> + <library name="httpclient-4.5.13.jar"/> + <library name="httpcore-4.4.12.jar"/> + <library name="httpcore-nio-4.4.12.jar"/> + <library name="jackson-core-2.14.1.jar"/> + <library name="jackson-dataformat-cbor-2.14.1.jar"/> + <library name="jackson-dataformat-smile-2.14.1.jar"/> + <library name="jackson-dataformat-yaml-2.14.1.jar"/> + <library name="jna-5.5.0.jar"/> + <library name="joda-time-2.10.12.jar"/> + <library name="jopt-simple-5.0.4.jar"/> + <library name="lang-mustache-client-1.3.8.jar"/> + <library name="lucene-analyzers-common-8.11.2.jar"/> + <library name="lucene-backward-codecs-8.11.2.jar"/> + <library name="lucene-core-8.11.2.jar"/> + <library name="lucene-grouping-8.11.2.jar"/> + <library name="lucene-highlighter-8.11.2.jar"/> + <library name="lucene-join-8.11.2.jar"/> + <library name="lucene-memory-8.11.2.jar"/> + <library name="lucene-misc-8.11.2.jar"/> + <library name="lucene-queries-8.11.2.jar"/> + <library name="lucene-queryparser-8.11.2.jar"/> + <library name="lucene-sandbox-8.11.2.jar"/> + <library name="lucene-spatial-extras-8.11.2.jar"/> + <library name="lucene-spatial3d-8.11.2.jar"/> + <library name="lucene-suggest-8.11.2.jar"/> + <library name="mapper-extras-client-1.3.8.jar"/> + <library name="opensearch-1.3.8.jar"/> + <library name="opensearch-cli-1.3.8.jar"/> + <library name="opensearch-core-1.3.8.jar"/> + <library name="opensearch-geo-1.3.8.jar"/> + <library name="opensearch-rest-client-1.3.8.jar"/> + <library name="opensearch-rest-high-level-client-1.3.8.jar"/> + <library name="opensearch-secure-sm-1.3.8.jar"/> + <library name="opensearch-x-content-1.3.8.jar"/> + <library name="parent-join-client-1.3.8.jar"/> + <library name="rank-eval-client-1.3.8.jar"/> + <library name="s2-geometry-library-java-1.0.0.jar"/> + <library name="snakeyaml-1.32.jar"/> + <library name="spatial4j-0.7.jar"/> </runtime> <requires> <import plugin="nutch-extensionpoints" />