This is an automated email from the ASF dual-hosted git repository. jnioche pushed a commit to branch 990 in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git
commit 2a5197fb559cb3855ea409e385e0619192c39804 Merge: 82b40dba bdc34cbc Author: Julien Nioche <[email protected]> AuthorDate: Thu Mar 28 15:31:03 2024 +0000 Merge from main Signed-off-by: Julien Nioche <[email protected]> .github/workflows/code_coverage.yml | 29 ++ .github/workflows/maven.yml | 6 +- DISCLAIMER | 10 + NOTICE | 4 +- README.md | 36 +- THIRD-PARTY.properties | 4 + THIRD-PARTY.txt | 546 +++++++++++++++++++++ archetype/pom.xml | 6 +- .../META-INF/maven/archetype-metadata.xml | 71 +-- .../main/resources/archetype-resources/README.md | 2 +- .../archetype-resources/crawler-conf.yaml | 58 ++- .../resources/archetype-resources/crawler.flux | 20 +- .../src/main/resources/archetype-resources/pom.xml | 24 +- .../src/main/java/CrawlTopology.java | 24 +- .../src/main/resources/jsoupfilters.json | 6 +- .../src/main/resources/parsefilters.json | 8 +- .../src/main/resources/urlfilters.json | 18 +- core/pom.xml | 52 +- .../stormcrawler/protocol/Protocol.java | 40 -- .../selenium/DelegatorRemoteDriverProtocol.java | 81 --- .../protocol/selenium/RemoteDriverProtocol.java | 87 ---- .../apache}/stormcrawler/ConfigurableTopology.java | 6 +- .../apache}/stormcrawler/Constants.java | 2 +- .../apache}/stormcrawler/JSONResource.java | 2 +- .../apache}/stormcrawler/Metadata.java | 20 +- .../apache}/stormcrawler/bolt/FeedParserBolt.java | 28 +- .../apache}/stormcrawler/bolt/FetcherBolt.java | 108 ++-- .../apache}/stormcrawler/bolt/JSoupParserBolt.java | 48 +- .../stormcrawler/bolt/SimpleFetcherBolt.java | 45 +- .../stormcrawler/bolt/SiteMapParserBolt.java | 28 +- .../stormcrawler/bolt/StatusEmitterBolt.java | 26 +- .../apache}/stormcrawler/bolt/URLFilterBolt.java | 12 +- .../stormcrawler/bolt/URLPartitionerBolt.java | 8 +- .../apache}/stormcrawler/filtering/URLFilter.java | 6 +- .../apache}/stormcrawler/filtering/URLFilters.java | 84 +++- .../filtering/basic/BasicURLFilter.java | 6 +- .../filtering/basic/BasicURLNormalizer.java | 7 +- .../filtering/basic/SelfURLFilter.java | 6 +- .../filtering/depth/MaxDepthFilter.java | 8 +- .../stormcrawler/filtering/host/HostURLFilter.java | 6 +- .../filtering/metadata/MetadataFilter.java | 6 +- .../filtering/regex/FastURLFilter.java | 10 +- .../stormcrawler/filtering/regex/RegexRule.java | 2 +- .../filtering/regex/RegexURLFilter.java | 2 +- .../filtering/regex/RegexURLFilterBase.java | 6 +- .../filtering/regex/RegexURLNormalizer.java | 6 +- .../filtering/robots/RobotsFilter.java | 14 +- .../filtering/sitemap/SitemapFilter.java | 10 +- .../stormcrawler/indexing/AbstractIndexerBolt.java | 143 ++++-- .../stormcrawler/indexing/DummyIndexer.java | 8 +- .../stormcrawler/indexing/StdOutIndexer.java | 8 +- .../stormcrawler/jsoup/LDJsonParseFilter.java | 12 +- .../stormcrawler/jsoup/LinkParseFilter.java | 20 +- .../apache}/stormcrawler/jsoup/XPathFilter.java | 12 +- .../parse/DocumentFragmentBuilder.java | 2 +- .../apache}/stormcrawler/parse/JSoupFilter.java | 7 +- .../apache}/stormcrawler/parse/JSoupFilters.java | 10 +- .../apache}/stormcrawler/parse/Outlink.java | 4 +- .../apache}/stormcrawler/parse/ParseData.java | 4 +- .../apache}/stormcrawler/parse/ParseFilter.java | 8 +- .../apache}/stormcrawler/parse/ParseFilters.java | 8 +- .../apache}/stormcrawler/parse/ParseResult.java | 4 +- .../apache}/stormcrawler/parse/TextExtractor.java | 4 +- .../parse/filter/CollectionTagger.java | 10 +- .../CommaSeparatedToMultivaluedMetadata.java | 8 +- .../parse/filter/DebugParseFilter.java | 6 +- .../parse/filter/DomainParseFilter.java | 12 +- .../parse/filter/LDJsonParseFilter.java | 10 +- .../stormcrawler/parse/filter/LinkParseFilter.java | 20 +- .../parse/filter/MD5SignatureParseFilter.java | 10 +- .../parse/filter/MimeTypeNormalization.java | 8 +- .../stormcrawler/parse/filter/XPathFilter.java | 10 +- .../persistence/AbstractQueryingSpout.java | 9 +- .../persistence/AbstractStatusUpdaterBolt.java | 10 +- .../persistence/AdaptiveScheduler.java | 22 +- .../stormcrawler/persistence/DefaultScheduler.java | 14 +- .../persistence/EmptyQueueListener.java | 2 +- .../persistence/MemoryStatusUpdater.java | 6 +- .../stormcrawler/persistence/Scheduler.java | 8 +- .../apache}/stormcrawler/persistence/Status.java | 2 +- .../persistence/StdOutStatusUpdater.java | 4 +- .../persistence/urlbuffer/AbstractURLBuffer.java | 8 +- .../persistence/urlbuffer/PriorityURLBuffer.java | 4 +- .../persistence/urlbuffer/SchedulingURLBuffer.java | 4 +- .../persistence/urlbuffer/SimpleURLBuffer.java | 2 +- .../persistence/urlbuffer/URLBuffer.java | 12 +- .../protocol/AbstractHttpProtocol.java | 118 +---- .../stormcrawler/protocol/DelegatorProtocol.java | 159 ++++-- .../apache}/stormcrawler/protocol/HttpHeaders.java | 2 +- .../protocol/HttpRobotRulesParser.java | 87 +++- .../org/apache/stormcrawler/protocol/Protocol.java | 156 ++++++ .../stormcrawler/protocol/ProtocolFactory.java | 17 +- .../stormcrawler/protocol/ProtocolResponse.java | 10 +- .../apache}/stormcrawler/protocol/RobotRules.java | 2 +- .../stormcrawler/protocol/RobotRulesParser.java | 84 +++- .../stormcrawler/protocol/file/FileProtocol.java | 16 +- .../stormcrawler/protocol/file/FileResponse.java | 8 +- .../protocol/httpclient/HttpProtocol.java | 25 +- .../protocol/okhttp/DNSResolutionListener.java | 2 +- .../stormcrawler/protocol/okhttp/HttpProtocol.java | 24 +- .../protocol/selenium/NavigationFilter.java | 8 +- .../protocol/selenium/NavigationFilters.java | 14 +- .../protocol/selenium/RemoteDriverProtocol.java | 131 +++++ .../protocol/selenium/SeleniumProtocol.java | 26 +- .../stormcrawler/proxy/MultiProxyManager.java | 8 +- .../apache}/stormcrawler/proxy/ProxyManager.java | 4 +- .../apache}/stormcrawler/proxy/SCProxy.java | 14 +- .../stormcrawler/proxy/SingleProxyManager.java | 13 +- .../apache}/stormcrawler/spout/FileSpout.java | 32 +- .../apache}/stormcrawler/spout/MemorySpout.java | 10 +- .../stormcrawler/util/AbstractConfigurable.java | 2 +- .../stormcrawler/util/CharsetIdentification.java | 6 +- .../stormcrawler/util/CollectionMetric.java | 2 +- .../apache}/stormcrawler/util/ConfUtils.java | 83 +++- .../apache}/stormcrawler/util/Configurable.java | 2 +- .../stormcrawler/util/ConfigurableHelper.java | 2 +- .../apache}/stormcrawler/util/CookieConverter.java | 2 +- .../stormcrawler/util/InitialisationUtil.java | 2 +- .../stormcrawler/util/MetadataTransfer.java | 30 +- .../stormcrawler/util/PerSecondReducer.java | 2 +- .../apache}/stormcrawler/util/RefreshTag.java | 2 +- .../apache}/stormcrawler/util/RobotsTags.java | 4 +- .../apache}/stormcrawler/util/StringTabScheme.java | 4 +- .../apache}/stormcrawler/util/URLPartitioner.java | 28 +- .../stormcrawler/util/URLStreamGrouping.java | 8 +- .../apache}/stormcrawler/util/URLUtil.java | 2 +- core/src/main/resources/crawler-default.yaml | 97 +++- .../apache/stormcrawler/MetadataTest.java} | 26 +- .../stormcrawler/TestMetadataSerialization.java | 2 +- .../apache}/stormcrawler/TestOutputCollector.java | 2 +- .../apache}/stormcrawler/TestUtil.java | 2 +- .../stormcrawler/bolt/AbstractFetcherBoltTest.java | 12 +- .../stormcrawler/bolt/FeedParserBoltTest.java | 16 +- .../apache}/stormcrawler/bolt/FetcherBoltTest.java | 2 +- .../stormcrawler/bolt/JSoupParserBoltTest.java | 16 +- .../stormcrawler/bolt/SimpleFetcherBoltTest.java | 2 +- .../stormcrawler/bolt/SiteMapParserBoltTest.java | 95 ++-- .../stormcrawler/filtering/BasicURLFilterTest.java | 6 +- .../filtering/BasicURLNormalizerTest.java | 6 +- .../stormcrawler/filtering/FastURLFilterTest.java | 6 +- .../stormcrawler/filtering/HostURLFilterTest.java | 6 +- .../stormcrawler/filtering/MaxDepthFilterTest.java | 8 +- .../stormcrawler/filtering/MetadataFilterTest.java | 6 +- .../stormcrawler/filtering/RegexFilterTest.java | 6 +- .../ClassInheritingFomAbstractAndInterface.java | 6 +- .../ClassInheritingFromAbstractClassOnly.java | 4 +- .../ClassInheritingFromOpenClass.java | 4 +- .../ClassWithoutValidConstructor.java | 4 +- .../initialisation/FinalClassToInitialize.java | 2 +- .../helper/initialisation/SimpleOpenClass.java | 2 +- .../helper/initialisation/base/AbstractClass.java | 2 +- .../helper/initialisation/base/ITestInterface.java | 2 +- .../OpenClassWithAbstractClassAndInterface.java | 2 +- .../stormcrawler/indexer/BasicIndexingTest.java | 27 +- .../apache}/stormcrawler/indexer/DummyIndexer.java | 6 +- .../stormcrawler/indexer/IndexerTester.java | 10 +- .../apache}/stormcrawler/json/JsoupFilterTest.java | 10 +- .../stormcrawler/jsoup/JSoupFiltersTest.java | 10 +- .../stormcrawler/parse/DuplicateLinksTest.java | 10 +- .../apache}/stormcrawler/parse/ParsingTester.java | 8 +- .../stormcrawler/parse/StackOverflowTest.java | 8 +- .../stormcrawler/parse/TextExtractorTest.java | 2 +- .../parse/filter/CSVMetadataFilterTest.java | 8 +- .../parse/filter/CollectionTaggerTest.java | 4 +- .../parse/filter/SubDocumentsFilterTest.java | 8 +- .../parse/filter/SubDocumentsParseFilter.java | 8 +- .../stormcrawler/parse/filter/XPathFilterTest.java | 8 +- .../persistence/AdaptiveSchedulerTest.java | 19 +- .../persistence/DefaultSchedulerTest.java | 4 +- .../stormcrawler/persistence/URLBufferTest.java | 10 +- .../protocol/AbstractProtocolTest.java | 96 ++++ .../protocol/DelegationProtocolTest.java | 41 +- .../stormcrawler/protocol/DummyProtocol.java} | 28 +- .../stormcrawler/protocol/HttpHeadersTest.java | 2 +- .../protocol/HttpRobotRulesParserTest.java | 282 +++++++++++ .../protocol/selenium/ProtocolTest.java | 166 +++++++ .../stormcrawler/proxy/MultiProxyManagerTest.java | 2 +- .../apache}/stormcrawler/proxy/SCProxyTest.java | 2 +- .../stormcrawler/proxy/SingleProxyManagerTest.java | 2 +- .../apache/stormcrawler/util/ConfUtilsTest.java | 64 +++ .../stormcrawler/util/CookieConverterTest.java | 2 +- .../stormcrawler/util/InitialisationUtilTest.java | 6 +- .../stormcrawler/util/MetadataTransferTest.java | 61 ++- .../apache}/stormcrawler/util/RefreshTagTest.java | 2 +- .../apache}/stormcrawler/util/RobotsTagsTest.java | 4 +- core/src/test/resources/basicurlnormalizer.json | 4 +- core/src/test/resources/delegator-conf.yaml | 21 +- core/src/test/resources/test.jsoupfilters.json | 8 +- core/src/test/resources/test.parsefilters.json | 8 +- core/src/test/resources/test.subdocfilter.json | 6 +- .../test/resources/tripadvisor.sitemap.index.xml | 22 + core/src/test/resources/tripadvisor.sitemap.xml.gz | Bin 0 -> 1537978 bytes external/aws/README.md | 2 +- external/aws/pom.xml | 8 +- .../aws/bolt/CloudSearchConstants.java | 2 +- .../aws/bolt/CloudSearchIndexerBolt.java | 12 +- .../stormcrawler/aws/bolt/CloudSearchUtils.java | 2 +- .../stormcrawler/aws/s3/AbstractS3CacheBolt.java | 4 +- .../stormcrawler/aws/s3/S3CacheChecker.java | 6 +- .../apache}/stormcrawler/aws/s3/S3Cacher.java | 6 +- .../stormcrawler/aws/s3/S3ContentCacher.java | 4 +- external/elasticsearch/README.md | 20 +- external/elasticsearch/archetype/pom.xml | 4 +- .../META-INF/maven/archetype-metadata.xml | 30 +- .../archetype-resources/crawler-conf.yaml | 58 ++- .../resources/archetype-resources/es-conf.yaml | 2 +- .../resources/archetype-resources/es-crawler.flux | 22 +- .../archetype-resources/es-injection.flux | 8 +- .../archetype-resources/kibana/importKibana.sh | 8 +- .../src/main/resources/archetype-resources/pom.xml | 24 +- .../src/main/java/ESCrawlTopology.java | 36 +- .../src/main/resources/jsoupfilters.json | 6 +- .../src/main/resources/parsefilters.json | 8 +- .../src/main/resources/urlfilters.json | 18 +- external/elasticsearch/pom.xml | 9 +- .../BulkItemResponseToFailedFlag.java | 2 +- .../elasticsearch/ElasticSearchConnection.java | 4 +- .../elasticsearch/bolt/DeletionBolt.java | 23 +- .../elasticsearch/bolt/IndexerBolt.java | 20 +- .../filtering/JSONURLFilterWrapper.java | 14 +- .../elasticsearch/metrics/MetricsConsumer.java | 8 +- .../elasticsearch/metrics/StatusMetricsBolt.java | 6 +- .../parse/filter/JSONResourceWrapper.java | 14 +- .../elasticsearch/persistence/AbstractSpout.java | 10 +- .../persistence/AggregationSpout.java | 8 +- .../elasticsearch/persistence/CollapsingSpout.java | 4 +- .../elasticsearch/persistence/HybridSpout.java | 6 +- .../elasticsearch/persistence/ScrollSpout.java | 10 +- .../persistence/StatusUpdaterBolt.java | 50 +- .../elasticsearch/bolt/IndexerBoltTest.java | 12 +- .../elasticsearch/bolt/StatusBoltTest.java | 14 +- external/langid/pom.xml | 6 +- .../stormcrawler/parse/filter/LanguageID.java | 12 +- external/opensearch/OS_IndexInit.sh | 23 - external/opensearch/README.md | 17 +- external/opensearch/archetype/pom.xml | 4 +- .../META-INF/archetype-post-generate.groovy | 5 +- .../META-INF/maven/archetype-metadata.xml | 32 +- .../resources/archetype-resources/OS_IndexInit.sh | 25 + .../main/resources/archetype-resources/README.md | 11 +- .../archetype-resources/crawler-conf.yaml | 58 ++- .../resources/archetype-resources/crawler.flux | 22 +- .../dashboards/importDashboards.sh | 8 +- .../resources/archetype-resources/injection.flux | 8 +- .../archetype-resources/opensearch-conf.yaml | 12 +- .../src/main/resources/archetype-resources/pom.xml | 24 +- .../src/main/resources/indexer.mapping | 0 .../src/main/resources/jsoupfilters.json | 6 +- .../src/main/resources/metrics.mapping | 0 .../src/main/resources/parsefilters.json | 8 +- .../src/main/resources/queues.mapping | 0 .../src/main/resources/status.mapping | 0 .../src/main/resources/urlfilters.json | 18 +- external/opensearch/opensearch-conf.yaml | 12 +- external/opensearch/pom.xml | 24 +- .../stormcrawler/opensearch/bolt/DeletionBolt.java | 94 ---- .../opensearch/BulkItemResponseToFailedFlag.java | 6 +- .../apache}/stormcrawler/opensearch/Constants.java | 2 +- .../stormcrawler/opensearch/IndexCreation.java | 2 +- .../opensearch/OpenSearchConnection.java | 90 ++-- .../stormcrawler/opensearch/bolt/DeletionBolt.java | 308 ++++++++++++ .../stormcrawler/opensearch/bolt/IndexerBolt.java | 36 +- .../opensearch/filtering/JSONURLFilterWrapper.java | 14 +- .../opensearch/metrics/MetricsConsumer.java | 10 +- .../opensearch/metrics/StatusMetricsBolt.java | 10 +- .../parse/filter/JSONResourceWrapper.java | 14 +- .../opensearch/persistence/AbstractSpout.java | 16 +- .../opensearch/persistence/AggregationSpout.java | 12 +- .../opensearch/persistence/HybridSpout.java | 10 +- .../opensearch/persistence/QueueBolt.java | 12 +- .../opensearch/persistence/StatusUpdaterBolt.java | 58 ++- .../opensearch/bolt/AbstractOpenSearchTest.java | 46 ++ .../opensearch/bolt/IndexerBoltTest.java | 30 +- .../opensearch/bolt/StatusBoltTest.java | 38 +- .../src/{main => test}/resources/indexer.mapping | 0 .../src/{main => test}/resources/metrics.mapping | 0 .../src/{main => test}/resources/status.mapping | 0 external/pom.xml | 23 +- external/solr/README.md | 2 +- external/solr/pom.xml | 14 +- external/solr/solr-conf.yaml | 2 +- .../apache}/stormcrawler/solr/SeedInjector.java | 10 +- .../apache}/stormcrawler/solr/SolrConnection.java | 4 +- .../stormcrawler/solr/SolrCrawlTopology.java | 26 +- .../stormcrawler/solr/bolt/DeletionBolt.java | 86 ++++ .../stormcrawler/solr/bolt/IndexerBolt.java | 13 +- .../stormcrawler/solr/metrics/MetricsConsumer.java | 6 +- .../stormcrawler/solr/persistence/SolrSpout.java | 11 +- .../solr/persistence/StatusUpdaterBolt.java | 17 +- external/sql/pom.xml | 6 +- external/sql/sql-conf.yaml | 2 +- .../apache}/stormcrawler/sql/Constants.java | 2 +- .../apache}/stormcrawler/sql/IndexerBolt.java | 12 +- .../apache}/stormcrawler/sql/SQLSpout.java | 10 +- .../apache}/stormcrawler/sql/SQLUtil.java | 2 +- .../stormcrawler/sql/StatusUpdaterBolt.java | 33 +- .../stormcrawler/sql/metrics/MetricsConsumer.java | 18 +- external/tika/README.md | 4 +- external/tika/pom.xml | 12 +- .../apache}/stormcrawler/tika/DOMBuilder.java | 2 +- .../apache}/stormcrawler/tika/ParserBolt.java | 38 +- .../apache}/stormcrawler/tika/RedirectionBolt.java | 4 +- .../stormcrawler/tika/XMLCharacterRecognizer.java | 2 +- .../apache}/stormcrawler/tika/ParserBoltTest.java | 16 +- external/urlfrontier/README.md | 2 +- external/urlfrontier/pom.xml | 9 +- .../stormcrawler/urlfrontier/Constants.java | 2 +- .../urlfrontier/ManagedChannelUtil.java | 4 +- .../apache}/stormcrawler/urlfrontier/Spout.java | 10 +- .../urlfrontier/StatusUpdaterBolt.java | 14 +- .../urlfrontier/StatusUpdaterBoltTest.java | 16 +- .../urlfrontier/URLFrontierContainer.java | 2 +- .../urlfrontier/URLFrontierContainerConfig.java | 2 +- external/warc/README.md | 43 +- external/warc/pom.xml | 20 +- .../warc/FileTimeSizeRotationPolicy.java | 2 +- .../apache}/stormcrawler/warc/GzipHdfsBolt.java | 2 +- .../stormcrawler/warc/WARCFileNameFormat.java | 2 +- .../apache}/stormcrawler/warc/WARCHdfsBolt.java | 6 +- .../stormcrawler/warc/WARCRecordFormat.java | 20 +- .../stormcrawler/warc/WARCRequestRecordFormat.java | 8 +- .../apache}/stormcrawler/warc/WARCSpout.java | 65 ++- .../stormcrawler/warc/WARCHdfsBoltTest.java | 10 +- .../stormcrawler/warc/WARCRecordFormatTest.java | 8 +- .../apache/stormcrawler/warc/WARCSpoutTest.java | 70 +++ external/warc/src/test/resources/test.warc.gz | Bin 0 -> 301243 bytes .../src/test/resources/unparsable-date.warc.gz | Bin 0 -> 938 bytes external/warc/src/test/resources/warc.inputs | 2 + pom.xml | 264 ++++++++-- 329 files changed, 4971 insertions(+), 2219 deletions(-) diff --cc THIRD-PARTY.txt index 00000000,8f2ea7b5..98cd4b9c mode 000000,100644..100644 --- a/THIRD-PARTY.txt +++ b/THIRD-PARTY.txt @@@ -1,0 -1,547 +1,546 @@@ + + List of third-party dependencies grouped by their license type. + + + 3-Clause BSD License + + * Kryo (com.esotericsoftware:kryo:5.6.0 - https://github.com/EsotericSoftware/kryo/kryo) + * MinLog (com.esotericsoftware:minlog:1.3.1 - https://github.com/EsotericSoftware/minlog) + * ReflectASM (com.esotericsoftware:reflectasm:1.11.9 - https://github.com/EsotericSoftware/reflectasm) + + Apache License + + * Log4j Implemented Over SLF4J (org.slf4j:log4j-over-slf4j:1.7.36 - http://www.slf4j.org) + + Apache License, Version 2.0 + + * aggs-matrix-stats (org.opensearch.plugin:aggs-matrix-stats-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * Apache Avro (org.apache.avro:avro:1.11.3 - https://avro.apache.org) + * Apache Commons BeanUtils (commons-beanutils:commons-beanutils:1.9.4 - https://commons.apache.org/proper/commons-beanutils/) + * Apache Commons CLI (commons-cli:commons-cli:1.6.0 - https://commons.apache.org/proper/commons-cli/) + * Apache Commons Codec (commons-codec:commons-codec:1.11 - http://commons.apache.org/proper/commons-codec/) + * Apache Commons Codec (commons-codec:commons-codec:1.15 - https://commons.apache.org/proper/commons-codec/) + * Apache Commons Codec (commons-codec:commons-codec:1.16.0 - https://commons.apache.org/proper/commons-codec/) + * Apache Commons Collections (commons-collections:commons-collections:3.2.2 - http://commons.apache.org/collections/) + * Apache Commons Collections (org.apache.commons:commons-collections4:4.4 - https://commons.apache.org/proper/commons-collections/) + * Apache Commons Compress (org.apache.commons:commons-compress:1.21 - https://commons.apache.org/proper/commons-compress/) + * Apache Commons Compress (org.apache.commons:commons-compress:1.24.0 - https://commons.apache.org/proper/commons-compress/) + * Apache Commons Configuration (org.apache.commons:commons-configuration2:2.8.0 - https://commons.apache.org/proper/commons-configuration/) + * Apache Commons Crypto (org.apache.commons:commons-crypto:1.1.0 - https://commons.apache.org/proper/commons-crypto/) + * Apache Commons CSV (org.apache.commons:commons-csv:1.10.0 - https://commons.apache.org/proper/commons-csv/) + * Apache Commons Exec (org.apache.commons:commons-exec:1.3 - http://commons.apache.org/proper/commons-exec/) + * Apache Commons IO (commons-io:commons-io:2.11.0 - https://commons.apache.org/proper/commons-io/) + * Apache Commons Lang (org.apache.commons:commons-lang3:3.12.0 - https://commons.apache.org/proper/commons-lang/) + * Apache Commons Lang (org.apache.commons:commons-lang3:3.13.0 - https://commons.apache.org/proper/commons-lang/) + * Apache Commons Logging (commons-logging:commons-logging:1.2 - http://commons.apache.org/proper/commons-logging/) + * Apache Commons Math (org.apache.commons:commons-math3:3.6.1 - http://commons.apache.org/proper/commons-math/) + * Apache Commons Net (commons-net:commons-net:3.9.0 - https://commons.apache.org/proper/commons-net/) + * Apache Commons Text (org.apache.commons:commons-text:1.10.0 - https://commons.apache.org/proper/commons-text) + * Apache FontBox (org.apache.pdfbox:fontbox:2.0.29 - http://pdfbox.apache.org/) + * Apache Hadoop Annotations (org.apache.hadoop:hadoop-annotations:3.3.6 - no url defined) + * Apache Hadoop Auth (org.apache.hadoop:hadoop-auth:3.3.6 - no url defined) + * Apache Hadoop Client Aggregator (org.apache.hadoop:hadoop-client:3.3.6 - no url defined) + * Apache Hadoop Common (org.apache.hadoop:hadoop-common:3.3.6 - no url defined) + * Apache Hadoop HDFS (org.apache.hadoop:hadoop-hdfs:3.3.6 - no url defined) + * Apache Hadoop HDFS Client (org.apache.hadoop:hadoop-hdfs-client:3.3.6 - no url defined) + * Apache Hadoop MapReduce Common (org.apache.hadoop:hadoop-mapreduce-client-common:3.3.6 - no url defined) + * Apache Hadoop MapReduce Core (org.apache.hadoop:hadoop-mapreduce-client-core:3.3.6 - no url defined) + * Apache Hadoop MapReduce JobClient (org.apache.hadoop:hadoop-mapreduce-client-jobclient:3.3.6 - no url defined) + * Apache Hadoop shaded Guava (org.apache.hadoop.thirdparty:hadoop-shaded-guava:1.1.1 - https://www.apache.org/hadoop-thirdparty/hadoop-shaded-guava/) + * Apache Hadoop shaded Protobuf 3.7 (org.apache.hadoop.thirdparty:hadoop-shaded-protobuf_3_7:1.1.1 - https://www.apache.org/hadoop-thirdparty/hadoop-shaded-protobuf_3_7/) + * Apache Hadoop YARN API (org.apache.hadoop:hadoop-yarn-api:3.3.6 - no url defined) + * Apache Hadoop YARN Client (org.apache.hadoop:hadoop-yarn-client:3.3.6 - no url defined) + * Apache Hadoop YARN Common (org.apache.hadoop:hadoop-yarn-common:3.3.6 - no url defined) + * Apache HBase - Annotations (org.apache.hbase:hbase-annotations:2.5.6-hadoop3 - https://hbase.apache.org/hbase-annotations) + * Apache HBase - Client (org.apache.hbase:hbase-client:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-client) + * Apache HBase - Common (org.apache.hbase:hbase-common:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-common) + * Apache HBase - Hadoop Compatibility (org.apache.hbase:hbase-hadoop-compat:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-hadoop-compat) + * Apache HBase - Hadoop Two Compatibility (org.apache.hbase:hbase-hadoop2-compat:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-hadoop2-compat) + * Apache HBase - Logging (org.apache.hbase:hbase-logging:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-logging) + * Apache HBase - Metrics API (org.apache.hbase:hbase-metrics-api:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-metrics-api) + * Apache HBase - Metrics Implementation (org.apache.hbase:hbase-metrics:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-metrics) + * Apache HBase Patched and Relocated (Shaded) Protobuf (org.apache.hbase.thirdparty:hbase-shaded-protobuf:4.1.5 - https://hbase.apache.org/hbase-shaded-protobuf) + * Apache HBase - Protocol (org.apache.hbase:hbase-protocol:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-protocol) + * Apache HBase Relocated (Shaded) GSON Libs (org.apache.hbase.thirdparty:hbase-shaded-gson:4.1.5 - https://hbase.apache.org/hbase-shaded-gson) + * Apache HBase Relocated (Shaded) Netty Libs (org.apache.hbase.thirdparty:hbase-shaded-netty:4.1.5 - https://hbase.apache.org/hbase-shaded-netty) + * Apache HBase Relocated (Shaded) Third-party Miscellaneous Libs (org.apache.hbase.thirdparty:hbase-shaded-miscellaneous:4.1.5 - https://hbase.apache.org/hbase-shaded-miscellaneous) + * Apache HBase - Shaded Protocol (org.apache.hbase:hbase-protocol-shaded:2.5.6-hadoop3 - https://hbase.apache.org/hbase-build-configuration/hbase-protocol-shaded) + * Apache HBase Unsafe Wrapper (org.apache.hbase.thirdparty:hbase-unsafe:4.1.5 - https://hbase.apache.org/hbase-unsafe) + * Apache HttpAsyncClient (org.apache.httpcomponents:httpasyncclient:4.1.4 - http://hc.apache.org/httpcomponents-asyncclient) + * Apache HttpAsyncClient (org.apache.httpcomponents:httpasyncclient:4.1.5 - http://hc.apache.org/httpcomponents-asyncclient) + * Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.10 - http://hc.apache.org/httpcomponents-client) + * Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.14 - http://hc.apache.org/httpcomponents-client-ga) + * Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.14 - http://hc.apache.org/httpcomponents-client-ga) + * Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.12 - http://hc.apache.org/httpcomponents-core-ga) + * Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.16 - http://hc.apache.org/httpcomponents-core-ga) + * Apache HttpCore NIO (org.apache.httpcomponents:httpcore-nio:4.4.12 - http://hc.apache.org/httpcomponents-core-ga) + * Apache HttpCore NIO (org.apache.httpcomponents:httpcore-nio:4.4.16 - http://hc.apache.org/httpcomponents-core-ga) + * Apache James :: Mime4j :: Core (org.apache.james:apache-mime4j-core:0.8.9 - http://james.apache.org/mime4j/apache-mime4j-core) + * Apache James :: Mime4j :: DOM (org.apache.james:apache-mime4j-dom:0.8.9 - http://james.apache.org/mime4j/apache-mime4j-dom) + * Apache JempBox (org.apache.pdfbox:jempbox:1.8.17 - http://www.apache.org/pdfbox-parent/jempbox/) + * Apache Log4j API (org.apache.logging.log4j:log4j-api:2.23.0 - https://logging.apache.org/log4j/2.x/log4j/log4j-api/) + * Apache Log4j Core (org.apache.logging.log4j:log4j-core:2.23.0 - https://logging.apache.org/log4j/2.x/log4j/log4j-core/) + * Apache Log4j JUL Adapter (org.apache.logging.log4j:log4j-jul:2.21.0 - https://logging.apache.org/log4j/2.x/log4j/log4j-jul/) + * Apache Log4j SLF4J Binding (org.apache.logging.log4j:log4j-slf4j-impl:2.23.0 - https://logging.apache.org/log4j/2.x/log4j/log4j-slf4j-impl/) + * Apache Lucene (module: backward-codecs) (org.apache.lucene:lucene-backward-codecs:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: common) (org.apache.lucene:lucene-analysis-common:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: core) (org.apache.lucene:lucene-core:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: grouping) (org.apache.lucene:lucene-grouping:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: highlighter) (org.apache.lucene:lucene-highlighter:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: join) (org.apache.lucene:lucene-join:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: memory) (org.apache.lucene:lucene-memory:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: misc) (org.apache.lucene:lucene-misc:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: queries) (org.apache.lucene:lucene-queries:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: queryparser) (org.apache.lucene:lucene-queryparser:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: sandbox) (org.apache.lucene:lucene-sandbox:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: spatial3d) (org.apache.lucene:lucene-spatial3d:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: spatial-extras) (org.apache.lucene:lucene-spatial-extras:9.9.2 - https://lucene.apache.org/) + * Apache Lucene (module: suggest) (org.apache.lucene:lucene-suggest:9.9.2 - https://lucene.apache.org/) + * Apache PDFBox (org.apache.pdfbox:pdfbox:2.0.29 - https://www.apache.org/pdfbox-parent/pdfbox/) + * Apache PDFBox tools (org.apache.pdfbox:pdfbox-tools:2.0.29 - https://www.apache.org/pdfbox-parent/pdfbox-tools/) + * Apache POI (org.apache.poi:poi-ooxml-lite:5.2.3 - https://poi.apache.org/) + * Apache POI (org.apache.poi:poi-scratchpad:5.2.3 - https://poi.apache.org/) + * Apache POI - API based on OPC and OOXML schemas (org.apache.poi:poi-ooxml:5.2.3 - https://poi.apache.org/) + * Apache POI - Common (org.apache.poi:poi:5.2.3 - https://poi.apache.org/) + * Apache Solr (module: api) (org.apache.solr:solr-api:9.5.0 - https://solr.apache.org/) + * Apache Solr (module: solrj) (org.apache.solr:solr-solrj:9.5.0 - https://solr.apache.org/) + * Apache Solr (module: solrj-streaming) (org.apache.solr:solr-solrj-streaming:9.5.0 - https://solr.apache.org/) + * Apache Solr (module: solrj-zookeeper) (org.apache.solr:solr-solrj-zookeeper:9.5.0 - https://solr.apache.org/) + * Apache Tika Apple parser module (org.apache.tika:tika-parser-apple-module:2.9.1 - https://tika.apache.org/tika-parser-apple-module/) + * Apache Tika audiovideo parser module (org.apache.tika:tika-parser-audiovideo-module:2.9.1 - https://tika.apache.org/tika-parser-audiovideo-module/) + * Apache Tika cad parser module (org.apache.tika:tika-parser-cad-module:2.9.1 - https://tika.apache.org/tika-parser-cad-module/) + * Apache Tika code parser module (org.apache.tika:tika-parser-code-module:2.9.1 - https://tika.apache.org/tika-parser-code-module/) + * Apache Tika core (org.apache.tika:tika-core:2.9.1 - https://tika.apache.org/) + * Apache Tika crypto parser module (org.apache.tika:tika-parser-crypto-module:2.9.1 - https://tika.apache.org/tika-parser-crypto-module/) + * Apache Tika digest commons (org.apache.tika:tika-parser-digest-commons:2.9.1 - https://tika.apache.org/tika-parser-digest-commons/) + * Apache Tika font parser module (org.apache.tika:tika-parser-font-module:2.9.1 - https://tika.apache.org/tika-parser-font-module/) + * Apache Tika html parser module (org.apache.tika:tika-parser-html-module:2.9.1 - https://tika.apache.org/tika-parser-html-module/) + * Apache Tika image parser module (org.apache.tika:tika-parser-image-module:2.9.1 - https://tika.apache.org/tika-parser-image-module/) + * Apache Tika mail commons (org.apache.tika:tika-parser-mail-commons:2.9.1 - https://tika.apache.org/tika-parser-mail-commons/) + * Apache Tika mail parser module (org.apache.tika:tika-parser-mail-module:2.9.1 - https://tika.apache.org/tika-parser-mail-module/) + * Apache Tika Microsoft parser module (org.apache.tika:tika-parser-microsoft-module:2.9.1 - https://tika.apache.org/tika-parser-microsoft-module/) + * Apache Tika miscellaneous office format parser module (org.apache.tika:tika-parser-miscoffice-module:2.9.1 - https://tika.apache.org/tika-parser-miscoffice-module/) + * Apache Tika news parser module (org.apache.tika:tika-parser-news-module:2.9.1 - https://tika.apache.org/tika-parser-news-module/) + * Apache Tika OCR parser module (org.apache.tika:tika-parser-ocr-module:2.9.1 - https://tika.apache.org/tika-parser-ocr-module/) + * Apache Tika package parser module (org.apache.tika:tika-parser-pkg-module:2.9.1 - https://tika.apache.org/tika-parser-pkg-module/) + * Apache Tika PDF parser module (org.apache.tika:tika-parser-pdf-module:2.9.1 - https://tika.apache.org/tika-parser-pdf-module/) + * Apache Tika plugin for Ogg, Vorbis and FLAC (org.gagravarr:vorbis-java-tika:0.8 - https://github.com/Gagravarr/VorbisJava) + * Apache Tika standard parser package (org.apache.tika:tika-parsers-standard-package:2.9.1 - https://tika.apache.org/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/) + * Apache Tika text parser module (org.apache.tika:tika-parser-text-module:2.9.1 - https://tika.apache.org/tika-parser-text-module/) + * Apache Tika WARC parser module (org.apache.tika:tika-parser-webarchive-module:2.9.1 - https://tika.apache.org/tika-parser-webarchive-module/) + * Apache Tika XML parser module (org.apache.tika:tika-parser-xml-module:2.9.1 - https://tika.apache.org/tika-parser-xml-module/) + * Apache Tika XMP commons (org.apache.tika:tika-parser-xmp-commons:2.9.1 - https://tika.apache.org/tika-parser-xmp-commons/) + * Apache Tika ZIP commons (org.apache.tika:tika-parser-zip-commons:2.9.1 - https://tika.apache.org/tika-parser-zip-commons/) + * Apache XmpBox (org.apache.pdfbox:xmpbox:2.0.29 - https://www.apache.org/pdfbox-parent/xmpbox/) + * Apache Yetus - Audience Annotations (org.apache.yetus:audience-annotations:0.5.0 - https://yetus.apache.org/audience-annotations) + * Apache ZooKeeper - Jute (org.apache.zookeeper:zookeeper-jute:3.6.3 - http://zookeeper.apache.org/zookeeper-jute) + * Apache ZooKeeper - Jute (org.apache.zookeeper:zookeeper-jute:3.9.1 - http://zookeeper.apache.org/zookeeper-jute) + * Apache ZooKeeper - Server (org.apache.zookeeper:zookeeper:3.6.3 - http://zookeeper.apache.org/zookeeper) + * Apache ZooKeeper - Server (org.apache.zookeeper:zookeeper:3.9.1 - http://zookeeper.apache.org/zookeeper) + * AutoService (com.google.auto.service:auto-service-annotations:1.1.1 - https://github.com/google/auto/tree/main/service) + * AWS Java SDK for Amazon CloudSearch (com.amazonaws:aws-java-sdk-cloudsearch:1.12.663 - https://aws.amazon.com/sdkforjava) + * AWS Java SDK for Amazon S3 (com.amazonaws:aws-java-sdk-s3:1.12.663 - https://aws.amazon.com/sdkforjava) + * AWS Java SDK for AWS KMS (com.amazonaws:aws-java-sdk-kms:1.12.663 - https://aws.amazon.com/sdkforjava) + * AWS SDK for Java - Core (com.amazonaws:aws-java-sdk-core:1.12.663 - https://aws.amazon.com/sdkforjava) + * Byte Buddy (without dependencies) (net.bytebuddy:byte-buddy:1.14.11 - https://bytebuddy.net/byte-buddy) + * Caffeine cache (com.github.ben-manes.caffeine:caffeine:3.1.8 - https://github.com/ben-manes/caffeine) + * com.drewnoakes:metadata-extractor (com.drewnoakes:metadata-extractor:2.18.0 - https://drewnoakes.com/code/exif/) + * Commons Daemon (commons-daemon:commons-daemon:1.0.13 - http://commons.apache.org/daemon/) + * Commons Lang (commons-lang:commons-lang:2.6 - http://commons.apache.org/lang/) + * Commons Logging (commons-logging:commons-logging:1.1.3 - http://commons.apache.org/proper/commons-logging/) + * Commons Math (org.apache.commons:commons-math3:3.1.1 - http://commons.apache.org/math/) + * compiler (com.github.spullara.mustache.java:compiler:0.9.10 - http://github.com/spullara/mustache.java) + * compiler (com.github.spullara.mustache.java:compiler:0.9.6 - http://github.com/spullara/mustache.java) + * Crawler-commons (com.github.crawler-commons:crawler-commons:1.4 - https://github.com/crawler-commons/crawler-commons) + * Curator Client (org.apache.curator:curator-client:5.2.0 - http://curator.apache.org/curator-client) + * Curator Framework (org.apache.curator:curator-framework:5.2.0 - http://curator.apache.org/curator-framework) + * Curator Recipes (org.apache.curator:curator-recipes:5.2.0 - http://curator.apache.org/curator-recipes) + * error-prone annotations (com.google.errorprone:error_prone_annotations:2.14.0 - https://errorprone.info/error_prone_annotations) + * error-prone annotations (com.google.errorprone:error_prone_annotations:2.21.1 - https://errorprone.info/error_prone_annotations) + * Failsafe (dev.failsafe:failsafe:3.3.2 - https://failsafe.dev/failsafe) + * FindBugs-jsr305 (com.google.code.findbugs:jsr305:3.0.2 - http://findbugs.sourceforge.net/) + * Google Android Annotations Library (com.google.android:annotations:4.1.1.4 - http://source.android.com/) + * Graphite Integration for Metrics (io.dropwizard.metrics:metrics-graphite:3.2.6 - http://metrics.dropwizard.io/metrics-graphite/) + * Gson (com.google.code.gson:gson:2.9.0 - https://github.com/google/gson/gson) + * Guava: Google Core Libraries for Java (com.google.guava:guava:18.0 - http://code.google.com/p/guava-libraries/guava) + * Guava: Google Core Libraries for Java (com.google.guava:guava:31.1-android - https://github.com/google/guava) + * Guava: Google Core Libraries for Java (com.google.guava:guava:32.1.3-jre - https://github.com/google/guava) + * Guava: Google Core Libraries for Java (com.google.guava:guava:33.0.0-jre - https://github.com/google/guava) + * Guava InternalFutureFailureAccess and InternalFutures (com.google.guava:failureaccess:1.0.1 - https://github.com/google/guava/failureaccess) + * Guava InternalFutureFailureAccess and InternalFutures (com.google.guava:failureaccess:1.0.2 - https://github.com/google/guava/failureaccess) + * Guava ListenableFuture only (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava - https://github.com/google/guava/listenablefuture) + * HPPC Collections (com.carrotsearch:hppc:0.8.1 - http://labs.carrotsearch.com/hppc.html/hppc) + * IntelliJ IDEA Annotations (com.intellij:annotations:12.0 - http://www.jetbrains.org) + * io.grpc:grpc-api (io.grpc:grpc-api:1.50.2 - https://github.com/grpc/grpc-java) + * io.grpc:grpc-context (io.grpc:grpc-context:1.50.2 - https://github.com/grpc/grpc-java) + * io.grpc:grpc-core (io.grpc:grpc-core:1.50.2 - https://github.com/grpc/grpc-java) + * io.grpc:grpc-netty-shaded (io.grpc:grpc-netty-shaded:1.50.2 - https://github.com/grpc/grpc-java) + * io.grpc:grpc-protobuf (io.grpc:grpc-protobuf:1.50.2 - https://github.com/grpc/grpc-java) + * io.grpc:grpc-protobuf-lite (io.grpc:grpc-protobuf-lite:1.50.2 - https://github.com/grpc/grpc-java) + * io.grpc:grpc-stub (io.grpc:grpc-stub:1.50.2 - https://github.com/grpc/grpc-java) + * J2ObjC Annotations (com.google.j2objc:j2objc-annotations:1.3 - https://github.com/google/j2objc/) + * J2ObjC Annotations (com.google.j2objc:j2objc-annotations:2.8 - https://github.com/google/j2objc/) + * Jackcess (com.healthmarketscience.jackcess:jackcess:4.0.5 - https://jackcess.sourceforge.io) + * Jackcess Encrypt (com.healthmarketscience.jackcess:jackcess-encrypt:4.0.2 - http://jackcessencrypt.sf.net) + * Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.15.2 - https://github.com/FasterXML/jackson) + * Jackson-core (com.fasterxml.jackson.core:jackson-core:2.10.4 - https://github.com/FasterXML/jackson-core) + * Jackson-core (com.fasterxml.jackson.core:jackson-core:2.15.2 - https://github.com/FasterXML/jackson-core) + * Jackson-core (com.fasterxml.jackson.core:jackson-core:2.16.1 - https://github.com/FasterXML/jackson-core) + * jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.15.2 - https://github.com/FasterXML/jackson) + * Jackson dataformat: CBOR (com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.10.4 - http://github.com/FasterXML/jackson-dataformats-binary) + * Jackson dataformat: CBOR (com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.12.6 - http://github.com/FasterXML/jackson-dataformats-binary) + * Jackson dataformat: CBOR (com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.16.1 - https://github.com/FasterXML/jackson-dataformats-binary) + * Jackson dataformat: Smile (com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.10.4 - http://github.com/FasterXML/jackson-dataformats-binary) + * Jackson dataformat: Smile (com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.16.1 - https://github.com/FasterXML/jackson-dataformats-binary) + * Jackson-dataformat-YAML (com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.10.4 - https://github.com/FasterXML/jackson-dataformats-text) + * Jackson-dataformat-YAML (com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.16.1 - https://github.com/FasterXML/jackson-dataformats-text) + * Jackson-JAXRS-base (com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.12.7 - http://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-base) + * Jackson-JAXRS-JSON (com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.12.7 - http://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-json-provider) + * Jackson module: JAXB Annotations (com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.12.7 - https://github.com/FasterXML/jackson-modules-base) + * java-libpst (com.pff:java-libpst:0.9.3 - https://github.com/rjohnsondev/java-libpst) + * JCIP Annotations under Apache License (com.github.stephenc.jcip:jcip-annotations:1.0-1 - http://stephenc.github.com/jcip-annotations) + * JCL 1.2 implemented over SLF4J (org.slf4j:jcl-over-slf4j:2.0.10 - http://www.slf4j.org) + * JCL 1.2 implemented over SLF4J (org.slf4j:jcl-over-slf4j:2.0.9 - http://www.slf4j.org) + * JetBrains Java Annotations (org.jetbrains:annotations:24.1.0 - https://github.com/JetBrains/java-annotations) + * Jettison (org.codehaus.jettison:jettison:1.1 - no url defined) + * JMES Path Query library (com.amazonaws:jmespath-java:1.12.663 - https://aws.amazon.com/sdkforjava) + * Joda-Time (joda-time:joda-time:2.10.10 - https://www.joda.org/joda-time/) + * Joda-Time (joda-time:joda-time:2.12.2 - https://www.joda.org/joda-time/) + * Joda-Time (joda-time:joda-time:2.8.1 - http://www.joda.org/joda-time/) + * jsonic (net.arnx:jsonic:1.2.11 - http://jsonic.sourceforge.jp/) + * JVM Integration for Metrics (io.dropwizard.metrics:metrics-jvm:3.2.6 - http://metrics.dropwizard.io/metrics-jvm/) + * jwarc (org.netpreserve:jwarc:0.28.3 - https://github.com/iipc/jwarc) + * jwarc (org.netpreserve:jwarc:0.29.0 - https://github.com/iipc/jwarc) + * Kerb Simple Kdc (org.apache.kerby:kerb-simplekdc:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-simplekdc) + * Kerby ASN1 Project (org.apache.kerby:kerby-asn1:1.0.1 - http://directory.apache.org/kerby/kerby-common/kerby-asn1) + * Kerby Config (org.apache.kerby:kerby-config:1.0.1 - http://directory.apache.org/kerby/kerby-common/kerby-config) + * Kerby-kerb Admin (org.apache.kerby:kerb-admin:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-admin) + * Kerby-kerb Client (org.apache.kerby:kerb-client:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-client) + * Kerby-kerb Common (org.apache.kerby:kerb-common:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-common) + * Kerby-kerb core (org.apache.kerby:kerb-core:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-core) + * Kerby-kerb Crypto (org.apache.kerby:kerb-crypto:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-crypto) + * Kerby-kerb Identity (org.apache.kerby:kerb-identity:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-identity) + * Kerby-kerb Server (org.apache.kerby:kerb-server:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-server) + * Kerby-kerb Util (org.apache.kerby:kerb-util:1.0.1 - http://directory.apache.org/kerby/kerby-kerb/kerb-util) + * Kerby PKIX Project (org.apache.kerby:kerby-pkix:1.0.1 - http://directory.apache.org/kerby/kerby-pkix) + * Kerby Util (org.apache.kerby:kerby-util:1.0.1 - http://directory.apache.org/kerby/kerby-common/kerby-util) + * Kerby XDR Project (org.apache.kerby:kerby-xdr:1.0.1 - http://directory.apache.org/kerby/kerby-common/kerby-xdr) + * Kotlin Stdlib (org.jetbrains.kotlin:kotlin-stdlib:1.8.21 - https://kotlinlang.org/) + * Kotlin Stdlib Common (org.jetbrains.kotlin:kotlin-stdlib-common:1.9.10 - https://kotlinlang.org/) + * Kotlin Stdlib Jdk7 (org.jetbrains.kotlin:kotlin-stdlib-jdk7:1.8.21 - https://kotlinlang.org/) + * Kotlin Stdlib Jdk8 (org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.8.21 - https://kotlinlang.org/) + * lang-mustache (org.opensearch.plugin:lang-mustache-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * language-detector (com.optimaize.languagedetector:language-detector:0.6 - https://github.com/optimaize/language-detector) + * Lucene Common Analyzers (org.apache.lucene:lucene-analyzers-common:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-analyzers-common) + * Lucene Core (org.apache.lucene:lucene-core:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-core) + * Lucene Grouping (org.apache.lucene:lucene-grouping:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-grouping) + * Lucene Highlighter (org.apache.lucene:lucene-highlighter:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-highlighter) + * Lucene Join (org.apache.lucene:lucene-join:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-join) + * Lucene Memory (org.apache.lucene:lucene-backward-codecs:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-backward-codecs) + * Lucene Memory (org.apache.lucene:lucene-memory:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-memory) + * Lucene Miscellaneous (org.apache.lucene:lucene-misc:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-misc) + * Lucene Queries (org.apache.lucene:lucene-queries:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-queries) + * Lucene QueryParsers (org.apache.lucene:lucene-queryparser:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-queryparser) + * Lucene Sandbox (org.apache.lucene:lucene-sandbox:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-sandbox) + * Lucene Spatial 3D (org.apache.lucene:lucene-spatial3d:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-spatial3d) + * Lucene Suggest (org.apache.lucene:lucene-suggest:8.11.1 - https://lucene.apache.org/lucene-parent/lucene-suggest) + * LZ4 and xxHash (org.lz4:lz4-java:1.8.0 - https://github.com/lz4/lz4-java) + * mapper-extras (org.opensearch.plugin:mapper-extras-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * Metrics Core (io.dropwizard.metrics:metrics-core:3.2.6 - http://metrics.dropwizard.io/metrics-core/) + * Netty/All-in-One (io.netty:netty-all:4.1.89.Final - https://netty.io/netty-all/) + * Netty/Buffer (io.netty:netty-buffer:4.1.89.Final - https://netty.io/netty-buffer/) + * Netty/Buffer (io.netty:netty-buffer:4.1.94.Final - https://netty.io/netty-buffer/) + * Netty/Codec/DNS (io.netty:netty-codec-dns:4.1.89.Final - https://netty.io/netty-codec-dns/) + * Netty/Codec/HAProxy (io.netty:netty-codec-haproxy:4.1.89.Final - https://netty.io/netty-codec-haproxy/) + * Netty/Codec/HTTP (io.netty:netty-codec-http:4.1.89.Final - https://netty.io/netty-codec-http/) + * Netty/Codec/HTTP2 (io.netty:netty-codec-http2:4.1.89.Final - https://netty.io/netty-codec-http2/) + * Netty/Codec/Memcache (io.netty:netty-codec-memcache:4.1.89.Final - https://netty.io/netty-codec-memcache/) + * Netty/Codec/MQTT (io.netty:netty-codec-mqtt:4.1.89.Final - https://netty.io/netty-codec-mqtt/) + * Netty/Codec/Redis (io.netty:netty-codec-redis:4.1.89.Final - https://netty.io/netty-codec-redis/) + * Netty/Codec/SMTP (io.netty:netty-codec-smtp:4.1.89.Final - https://netty.io/netty-codec-smtp/) + * Netty/Codec/Socks (io.netty:netty-codec-socks:4.1.89.Final - https://netty.io/netty-codec-socks/) + * Netty/Codec/Stomp (io.netty:netty-codec-stomp:4.1.89.Final - https://netty.io/netty-codec-stomp/) + * Netty/Codec/XML (io.netty:netty-codec-xml:4.1.89.Final - https://netty.io/netty-codec-xml/) + * Netty/Codec (io.netty:netty-codec:4.1.89.Final - https://netty.io/netty-codec/) + * Netty/Codec (io.netty:netty-codec:4.1.94.Final - https://netty.io/netty-codec/) + * Netty/Common (io.netty:netty-common:4.1.89.Final - https://netty.io/netty-common/) + * Netty/Common (io.netty:netty-common:4.1.94.Final - https://netty.io/netty-common/) + * Netty/Handler/Proxy (io.netty:netty-handler-proxy:4.1.89.Final - https://netty.io/netty-handler-proxy/) + * Netty/Handler/Ssl/Ocsp (io.netty:netty-handler-ssl-ocsp:4.1.89.Final - https://netty.io/netty-handler-ssl-ocsp/) + * Netty/Handler (io.netty:netty-handler:4.1.89.Final - https://netty.io/netty-handler/) + * Netty/Handler (io.netty:netty-handler:4.1.94.Final - https://netty.io/netty-handler/) + * Netty/Resolver/DNS/Classes/MacOS (io.netty:netty-resolver-dns-classes-macos:4.1.89.Final - https://netty.io/netty-resolver-dns-classes-macos/) + * Netty/Resolver/DNS/Native/MacOS (io.netty:netty-resolver-dns-native-macos:4.1.89.Final - https://netty.io/netty-resolver-dns-native-macos/) + * Netty/Resolver/DNS (io.netty:netty-resolver-dns:4.1.89.Final - https://netty.io/netty-resolver-dns/) + * Netty/Resolver (io.netty:netty-resolver:4.1.89.Final - https://netty.io/netty-resolver/) + * Netty/Resolver (io.netty:netty-resolver:4.1.94.Final - https://netty.io/netty-resolver/) + * Netty/TomcatNative [BoringSSL - Static] (io.netty:netty-tcnative-boringssl-static:2.0.61.Final - https://github.com/netty/netty-tcnative/netty-tcnative-boringssl-static/) + * Netty/TomcatNative [OpenSSL - Classes] (io.netty:netty-tcnative-classes:2.0.61.Final - https://github.com/netty/netty-tcnative/netty-tcnative-classes/) + * Netty/Transport/Classes/Epoll (io.netty:netty-transport-classes-epoll:4.1.89.Final - https://netty.io/netty-transport-classes-epoll/) + * Netty/Transport/Classes/Epoll (io.netty:netty-transport-classes-epoll:4.1.94.Final - https://netty.io/netty-transport-classes-epoll/) + * Netty/Transport/Classes/KQueue (io.netty:netty-transport-classes-kqueue:4.1.89.Final - https://netty.io/netty-transport-classes-kqueue/) + * Netty/Transport/Native/Epoll (io.netty:netty-transport-native-epoll:4.1.63.Final - https://netty.io/netty-transport-native-epoll/) + * Netty/Transport/Native/Epoll (io.netty:netty-transport-native-epoll:4.1.89.Final - https://netty.io/netty-transport-native-epoll/) + * Netty/Transport/Native/Epoll (io.netty:netty-transport-native-epoll:4.1.94.Final - https://netty.io/netty-transport-native-epoll/) + * Netty/Transport/Native/KQueue (io.netty:netty-transport-native-kqueue:4.1.89.Final - https://netty.io/netty-transport-native-kqueue/) + * Netty/Transport/Native/Unix/Common (io.netty:netty-transport-native-unix-common:4.1.89.Final - https://netty.io/netty-transport-native-unix-common/) + * Netty/Transport/Native/Unix/Common (io.netty:netty-transport-native-unix-common:4.1.94.Final - https://netty.io/netty-transport-native-unix-common/) + * Netty/Transport/RXTX (io.netty:netty-transport-rxtx:4.1.89.Final - https://netty.io/netty-transport-rxtx/) + * Netty/Transport/SCTP (io.netty:netty-transport-sctp:4.1.89.Final - https://netty.io/netty-transport-sctp/) + * Netty/Transport/UDT (io.netty:netty-transport-udt:4.1.89.Final - https://netty.io/netty-transport-udt/) + * Netty/Transport (io.netty:netty-transport:4.1.89.Final - https://netty.io/netty-transport/) + * Netty/Transport (io.netty:netty-transport:4.1.94.Final - https://netty.io/netty-transport/) + * Netty (io.netty:netty:3.10.6.Final - http://netty.io/) + * Nimbus JOSE+JWT (com.nimbusds:nimbus-jose-jwt:9.8.1 - https://bitbucket.org/connect2id/nimbus-jose-jwt) + * Non-Blocking Reactive Foundation for the JVM (io.projectreactor:reactor-core:3.5.14 - https://github.com/reactor/reactor-core) + * Objenesis (org.objenesis:objenesis:3.3 - http://objenesis.org/objenesis) + * Ogg and Vorbis for Java, Core (org.gagravarr:vorbis-java-core:0.8 - https://github.com/Gagravarr/VorbisJava) + * okhttp (com.squareup.okhttp3:okhttp:4.12.0 - https://square.github.io/okhttp/) + * okhttp-brotli (com.squareup.okhttp3:okhttp-brotli:4.12.0 - https://square.github.io/okhttp/) + * okio (com.squareup.okio:okio:3.6.0 - https://github.com/square/okio/) + * okio (com.squareup.okio:okio-jvm:3.6.0 - https://github.com/square/okio/) + * opensearch-cli (org.opensearch:opensearch-cli:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * opensearch-common (org.opensearch:opensearch-common:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * opensearch-compress (org.opensearch:opensearch-compress:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * opensearch-core (org.opensearch:opensearch-core:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * opensearch-geo (org.opensearch:opensearch-geo:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * opensearch-secure-sm (org.opensearch:opensearch-secure-sm:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * opensearch-telemetry (org.opensearch:opensearch-telemetry:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * opensearch-x-content (org.opensearch:opensearch-x-content:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-api:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-api-events:1.35.0-alpha - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-context:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-exporter-logging:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-extension-incubator:1.35.0-alpha - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-sdk:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-sdk-common:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-sdk-extension-autoconfigure:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-sdk-extension-autoconfigure-spi:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-sdk-logs:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-sdk-metrics:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-sdk-trace:1.35.0 - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Java (io.opentelemetry:opentelemetry-semconv:1.15.0-alpha - https://github.com/open-telemetry/opentelemetry-java) + * OpenTelemetry Semantic Conventions Java (io.opentelemetry.semconv:opentelemetry-semconv:1.23.1-alpha - https://github.com/open-telemetry/semantic-conventions-java) + * org.seleniumhq.selenium:selenium-api (org.seleniumhq.selenium:selenium-api:4.18.1 - https://selenium.dev/) + * org.seleniumhq.selenium:selenium-http (org.seleniumhq.selenium:selenium-http:4.18.1 - https://selenium.dev/) + * org.seleniumhq.selenium:selenium-json (org.seleniumhq.selenium:selenium-json:4.18.1 - https://selenium.dev/) + * org.seleniumhq.selenium:selenium-manager (org.seleniumhq.selenium:selenium-manager:4.18.1 - https://selenium.dev/) + * org.seleniumhq.selenium:selenium-os (org.seleniumhq.selenium:selenium-os:4.18.1 - https://selenium.dev/) + * org.seleniumhq.selenium:selenium-remote-driver (org.seleniumhq.selenium:selenium-remote-driver:4.18.1 - https://selenium.dev/) + * org.seleniumhq.selenium:selenium-support (org.seleniumhq.selenium:selenium-support:4.18.1 - https://selenium.dev/) + * parent-join (org.opensearch.plugin:parent-join-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * parso (com.epam:parso:2.0.14 - https://github.com/epam/parso) + * PDFBox JBIG2 ImageIO plugin (org.apache.pdfbox:jbig2-imageio:3.0.4 - https://www.apache.org/jbig2-imageio/) + * perfmark:perfmark-api (io.perfmark:perfmark-api:0.25.0 - https://github.com/perfmark/perfmark) + * proto-google-common-protos (com.google.api.grpc:proto-google-common-protos:2.9.0 - https://github.com/googleapis/java-iam/proto-google-common-protos) + * rank-eval (org.opensearch.plugin:rank-eval-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * rest (org.elasticsearch.client:elasticsearch-rest-client:7.17.7 - https://github.com/elastic/elasticsearch) + * rest (org.opensearch.client:opensearch-rest-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * rest-high-level (org.opensearch.client:opensearch-rest-high-level-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * rome (com.rometools:rome:2.1.0 - http://rometools.com/rome) + * rome-utils (com.rometools:rome-utils:2.1.0 - http://rometools.com/rome-utils) + * server (org.opensearch:opensearch:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * Shaded Deps for Storm Client (org.apache.storm:storm-shaded-deps:2.6.1 - https://storm.apache.org/storm-shaded-deps) + * SnakeYAML (org.yaml:snakeyaml:2.2 - https://bitbucket.org/snakeyaml/snakeyaml) + * snappy-java (org.xerial.snappy:snappy-java:1.1.8.2 - https://github.com/xerial/snappy-java) + * sniffer (org.elasticsearch.client:elasticsearch-rest-client-sniffer:7.17.7 - https://github.com/elastic/elasticsearch) + * sniffer (org.opensearch.client:opensearch-rest-client-sniffer:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) + * SparseBitSet (com.zaxxer:SparseBitSet:1.2 - https://github.com/brettwooldridge/SparseBitSet) + * storm-autocreds (org.apache.storm:storm-autocreds:2.6.1 - https://storm.apache.org/external/storm-autocreds) + * Storm Client (org.apache.storm:storm-client:2.6.1 - https://storm.apache.org/storm-client) - * storm-crawler-core (com.digitalpebble.stormcrawler:storm-crawler-core:2.12-SNAPSHOT - https://github.com/DigitalPebble/storm-crawler/tree/master/core) + * storm-hdfs (org.apache.storm:storm-hdfs:2.6.1 - https://storm.apache.org/external/storm-hdfs) + * swagger-annotations-jakarta (io.swagger.core.v3:swagger-annotations-jakarta:2.2.17 - https://github.com/swagger-api/swagger-core/modules/swagger-annotations-jakarta) + * TagSoup (org.ccil.cowan.tagsoup:tagsoup:1.2.1 - http://home.ccil.org/~cowan/XML/tagsoup/) + * T-Digest (com.tdunning:t-digest:3.2 - https://github.com/tdunning/t-digest) + * Token provider (org.apache.kerby:token-provider:1.0.1 - http://directory.apache.org/kerby/kerby-provider/token-provider) + * urlfrontier-API (com.github.crawler-commons:urlfrontier-API:2.3.1 - https://github.com/crawler-commons/url-frontier/urlfrontier-API) + * Woodstox (com.fasterxml.woodstox:woodstox-core:5.4.0 - https://github.com/FasterXML/woodstox) + * Xerces2-j (xerces:xercesImpl:2.12.2 - https://xerces.apache.org/xerces2-j/) + * XmlBeans (org.apache.xmlbeans:xmlbeans:5.1.1 - https://xmlbeans.apache.org/) + + Apache License, Version 2.0, Eclipse Public License - Version 1.0 + + * Jetty :: Asynchronous HTTP Client (org.eclipse.jetty:jetty-client:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-client) + * Jetty :: Http Utility (org.eclipse.jetty:jetty-http:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-http) + * Jetty :: IO Utility (org.eclipse.jetty:jetty-io:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-io) + * Jetty :: Security (org.eclipse.jetty:jetty-security:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-security) + * Jetty :: Server Core (org.eclipse.jetty:jetty-server:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-server) + * Jetty :: Servlet Handling (org.eclipse.jetty:jetty-servlet:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-servlet) + * Jetty :: Utilities :: Ajax(JSON) (org.eclipse.jetty:jetty-util-ajax:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-util-ajax) + * Jetty :: Utilities (org.eclipse.jetty:jetty-util:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-util) + * Jetty :: Webapp Application Support (org.eclipse.jetty:jetty-webapp:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-webapp) + * Jetty :: Websocket :: API (org.eclipse.jetty.websocket:websocket-api:9.4.51.v20230217 - https://eclipse.org/jetty/websocket-parent/websocket-api) + * Jetty :: Websocket :: Client (org.eclipse.jetty.websocket:websocket-client:9.4.51.v20230217 - https://eclipse.org/jetty/websocket-parent/websocket-client) + * Jetty :: Websocket :: Common (org.eclipse.jetty.websocket:websocket-common:9.4.51.v20230217 - https://eclipse.org/jetty/websocket-parent/websocket-common) + * Jetty :: XML utilities (org.eclipse.jetty:jetty-xml:9.4.51.v20230217 - https://eclipse.org/jetty/jetty-xml) + + Apache License, Version 2.0, Eclipse Public License - Version 2.0 + + * Jetty :: ALPN :: Client (org.eclipse.jetty:jetty-alpn-client:10.0.19 - https://eclipse.dev/jetty/jetty-alpn-parent/jetty-alpn-client) + * Jetty :: ALPN :: JDK9 Client Implementation (org.eclipse.jetty:jetty-alpn-java-client:10.0.19 - https://eclipse.dev/jetty/jetty-alpn-parent/jetty-alpn-java-client) + * Jetty :: Asynchronous HTTP Client (org.eclipse.jetty:jetty-client:10.0.19 - https://eclipse.dev/jetty/jetty-client) + * Jetty :: HTTP2 :: Client (org.eclipse.jetty.http2:http2-client:10.0.19 - https://eclipse.dev/jetty/http2-parent/http2-client) + * Jetty :: HTTP2 :: Common (org.eclipse.jetty.http2:http2-common:10.0.19 - https://eclipse.dev/jetty/http2-parent/http2-common) + * Jetty :: HTTP2 :: HPACK (org.eclipse.jetty.http2:http2-hpack:10.0.19 - https://eclipse.dev/jetty/http2-parent/http2-hpack) + * Jetty :: HTTP2 :: HTTP Client Transport (org.eclipse.jetty.http2:http2-http-client-transport:10.0.19 - https://eclipse.dev/jetty/http2-parent/http2-http-client-transport) + * Jetty :: Http Utility (org.eclipse.jetty:jetty-http:10.0.19 - https://eclipse.dev/jetty/jetty-http) + * Jetty :: IO Utility (org.eclipse.jetty:jetty-io:10.0.19 - https://eclipse.dev/jetty/jetty-io) + * Jetty :: Utilities (org.eclipse.jetty:jetty-util:10.0.19 - https://eclipse.dev/jetty/jetty-util) + + Apache License, Version 2.0, LGPL-2.1-or-later + + * Java Native Access (net.java.dev.jna:jna:5.10.0 - https://github.com/java-native-access/jna) + * Java Native Access (net.java.dev.jna:jna:5.13.0 - https://github.com/java-native-access/jna) + + Bouncy Castle Licence + + * Bouncy Castle ASN.1 Extension and Utility APIs (org.bouncycastle:bcutil-jdk18on:1.76 - https://www.bouncycastle.org/java.html) + * Bouncy Castle PKIX, CMS, EAC, TSP, PKCS, OCSP, CMP, and CRMF APIs (org.bouncycastle:bcpkix-jdk18on:1.76 - https://www.bouncycastle.org/java.html) + * Bouncy Castle Provider (org.bouncycastle:bcprov-jdk18on:1.76 - https://www.bouncycastle.org/java.html) + * Bouncy Castle S/MIME API (org.bouncycastle:bcmail-jdk18on:1.76 - https://www.bouncycastle.org/java.html) + + BSD-2-Clause, Public Domain, per Creative Commons CC0 + + * HdrHistogram (org.hdrhistogram:HdrHistogram:2.1.12 - http://hdrhistogram.github.io/HdrHistogram/) + + BSD 2-Clause License + + * dnsjava (dnsjava:dnsjava:2.1.7 - http://www.dnsjava.org) + * zstd-jni (com.github.luben:zstd-jni:1.5.5-5 - https://github.com/luben/zstd-jni) + + BSD 3-Clause License + + * Adobe XMPCore (com.adobe.xmp:xmpcore:6.1.11 - https://www.adobe.com/devnet/xmp/library/eula-xmp-library-java.html) + * asm (org.ow2.asm:asm:9.6 - http://asm.ow2.io/) + * leveldbjni-all (org.fusesource.leveldbjni:leveldbjni-all:1.8 - http://leveldbjni.fusesource.org/leveldbjni-all) + * Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf) + * Protocol Buffers [Core] (com.google.protobuf:protobuf-java:3.21.7 - https://developers.google.com/protocol-buffers/protobuf-java/) + * Protocol Buffers [Core] (com.google.protobuf:protobuf-java:3.22.3 - https://developers.google.com/protocol-buffers/protobuf-java/) + + BSD 3-clause License w/nuclear disclaimer + + * Java Advanced Imaging Image I/O Tools API core (standalone) (com.github.jai-imageio:jai-imageio-core:1.4.0 - https://github.com/jai-imageio/jai-imageio-core) + + BSD License + + * curvesapi (com.github.virtuald:curvesapi:1.07 - https://github.com/virtuald/curvesapi) + * JLine Bundle (org.jline:jline:3.9.0 - http://nexus.sonatype.org/oss-repository-hosting.html/jline-parent/jline) + * JMatIO (org.tallison:jmatio:1.5 - https://github.com/tballison/jmatio) + * JZlib (com.jcraft:jzlib:1.1.3 - http://www.jcraft.com/jzlib/) + * Stax2 API (org.codehaus.woodstox:stax2-api:4.2.1 - http://github.com/FasterXML/stax2-api) + + CDDL, v1.0, LGPL, v2.1 or later + + * JHighlight (org.codelibs:jhighlight:1.1.0 - https://github.com/codelibs/jhighlight) + + CDDL/GPLv2+CE + + * JavaBeans Activation Framework API jar (javax.activation:javax.activation-api:1.2.0 - http://java.net/all/javax.activation-api/) + + Common Development and Distribution License + + * Expression Language 3.0 (org.glassfish:javax.el:3.0.1-b12 - http://uel.java.net) + * Java Servlet API (javax.servlet:javax.servlet-api:3.1.0 - http://servlet-spec.java.net) + * javax.annotation API (javax.annotation:javax.annotation-api:1.3.2 - http://jcp.org/en/jsr/detail?id=250) + * jsr311-api (javax.ws.rs:jsr311-api:1.1.1 - https://jsr311.dev.java.net) + + Common Development and Distribution License (CDDL) v1.1, The GNU General Public License (GPL), Version 2, With Classpath Exception + + * jaxb-api (javax.xml.bind:jaxb-api:2.3.0 - https://github.com/javaee/jaxb-spec/jaxb-api) + * JAXB RI (com.sun.xml.bind:jaxb-impl:2.2.3-1 - http://jaxb.java.net/) + * jersey-client (com.sun.jersey:jersey-client:1.19.4 - https://jersey.java.net/jersey-client/) + * jersey-core (com.sun.jersey:jersey-core:1.19.4 - https://jersey.java.net/jersey-core/) + * jersey-json (com.github.pjfanning:jersey-json:1.20 - https://github.com/pjfanning/jersey-1.x) + * jersey-server (com.sun.jersey:jersey-server:1.19.4 - https://jersey.java.net/jersey-server/) + * jersey-servlet (com.sun.jersey:jersey-servlet:1.19.4 - https://jersey.java.net/jersey-servlet/) + * jsp-api (javax.servlet.jsp:jsp-api:2.1 - no url defined) + + Eclipse Distribution License, Version 1.0 + + * istack common utility code runtime (com.sun.istack:istack-commons-runtime:3.0.12 - https://projects.eclipse.org/projects/ee4j/istack-commons/istack-commons-runtime) + * jakarta.xml.bind-api (jakarta.xml.bind:jakarta.xml.bind-api:2.3.2 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api) + * JavaBeans Activation Framework (com.sun.activation:jakarta.activation:1.2.1 - https://github.com/eclipse-ee4j/jaf/jakarta.activation) + * JavaBeans Activation Framework API jar (jakarta.activation:jakarta.activation-api:1.2.1 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api) + * JAXB Runtime (org.glassfish.jaxb:jaxb-runtime:2.3.6 - https://eclipse-ee4j.github.io/jaxb-ri/) + * TXW2 Runtime (org.glassfish.jaxb:txw2:2.3.6 - https://eclipse-ee4j.github.io/jaxb-ri/) + + Eclipse Public License, Version 2.0, GPL-2.0-with-classpath-exception + + * Jakarta RESTful WS API (jakarta.ws.rs:jakarta.ws.rs-api:3.1.0 - https://github.com/eclipse-ee4j/jaxrs-api) + + Eclipse Public License, Version 2.0, The GNU General Public License (GPL), Version 2, With Classpath Exception + + * Jakarta Annotations API (jakarta.annotation:jakarta.annotation-api:1.3.5 - https://projects.eclipse.org/projects/ee4j.ca) + + Elastic License 2.0 + + * rest-high-level (org.elasticsearch.client:elasticsearch-rest-high-level-client:7.17.7 - https://github.com/elastic/elasticsearch) + + Elastic License 2.0, Server Side Public License, v 1 + + * aggs-matrix-stats (org.elasticsearch.plugin:aggs-matrix-stats-client:7.17.7 - https://github.com/elastic/elasticsearch) + * elasticsearch-cli (org.elasticsearch:elasticsearch-cli:7.17.7 - https://github.com/elastic/elasticsearch) + * elasticsearch-core (org.elasticsearch:elasticsearch-core:7.17.7 - https://github.com/elastic/elasticsearch) + * elasticsearch-geo (org.elasticsearch:elasticsearch-geo:7.17.7 - https://github.com/elastic/elasticsearch) + * elasticsearch-lz4 (org.elasticsearch:elasticsearch-lz4:7.17.7 - https://github.com/elastic/elasticsearch) + * elasticsearch-plugin-classloader (org.elasticsearch:elasticsearch-plugin-classloader:7.17.7 - https://github.com/elastic/elasticsearch) + * elasticsearch-secure-sm (org.elasticsearch:elasticsearch-secure-sm:7.17.7 - https://github.com/elastic/elasticsearch) + * elasticsearch-x-content (org.elasticsearch:elasticsearch-x-content:7.17.7 - https://github.com/elastic/elasticsearch) + * lang-mustache (org.elasticsearch.plugin:lang-mustache-client:7.17.7 - https://github.com/elastic/elasticsearch) + * mapper-extras (org.elasticsearch.plugin:mapper-extras-client:7.17.7 - https://github.com/elastic/elasticsearch) + * parent-join (org.elasticsearch.plugin:parent-join-client:7.17.7 - https://github.com/elastic/elasticsearch) + * rank-eval (org.elasticsearch.plugin:rank-eval-client:7.17.7 - https://github.com/elastic/elasticsearch) + * server (org.elasticsearch:elasticsearch:7.17.7 - https://github.com/elastic/elasticsearch) + + GENERAL PUBLIC LICENSE, version 3 (GPL-3.0), GNU LESSER GENERAL PUBLIC LICENSE, version 3 (LGPL-3.0), Mozilla Public License Version 1.1 + + * juniversalchardet (com.github.albfernandez:juniversalchardet:2.4.0 - https://github.com/albfernandez/juniversalchardet) + + MIT-0 + + * reactive-streams (org.reactivestreams:reactive-streams:1.0.4 - http://www.reactive-streams.org/) + + MIT License + + * Animal Sniffer Annotations (org.codehaus.mojo:animal-sniffer-annotations:1.21 - https://www.mojohaus.org/animal-sniffer/animal-sniffer-annotations) + * Checker Qual (org.checkerframework:checker-qual:3.37.0 - https://checkerframework.org/) + * dd-plist (com.googlecode.plist:dd-plist:1.27 - http://www.github.com/3breadt/dd-plist) + * JCodings (org.jruby.jcodings:jcodings:1.0.55 - http://nexus.sonatype.org/oss-repository-hosting.html/jcodings) + * Joni (org.jruby.joni:joni:2.1.31 - http://nexus.sonatype.org/oss-repository-hosting.html/joni) + * JOpt Simple (net.sf.jopt-simple:jopt-simple:5.0.2 - http://pholser.github.io/jopt-simple) + * JOpt Simple (net.sf.jopt-simple:jopt-simple:5.0.4 - http://jopt-simple.github.io/jopt-simple) + * jsoup Java HTML Parser (org.jsoup:jsoup:1.17.2 - https://jsoup.org/) + * org.brotli:dec (org.brotli:dec:0.1.2 - http://brotli.org/dec) + * semver4j (org.semver4j:semver4j:5.2.2 - https://github.com/semver4j/semver4j) + * SLF4J API Module (org.slf4j:slf4j-api:1.7.36 - http://www.slf4j.org) + * SLF4J API Module (org.slf4j:slf4j-api:1.7.6 - http://www.slf4j.org) + * SLF4J API Module (org.slf4j:slf4j-api:2.0.10 - http://www.slf4j.org) + * xsoup (us.codecraft:xsoup:0.3.7 - https://github.com/code4craft/xsoup/) + + Public Domain + + * XZ for Java (org.tukaani:xz:1.9 - https://tukaani.org/xz/java.html) + + Public Domain, per Creative Commons CC0 + + * HdrHistogram (org.hdrhistogram:HdrHistogram:2.1.9 - http://hdrhistogram.github.io/HdrHistogram/) + + Revised BSD + + * JSch (com.jcraft:jsch:0.1.55 - http://www.jcraft.com/jsch/) + + Similar to Apache License but with the acknowledgment clause removed + + * JDOM (org.jdom:jdom2:2.0.6.1 - http://www.jdom.org) + + The Go license + + * re2j (com.google.re2j:re2j:1.1 - http://github.com/google/re2j) + + Unicode/ICU License + + * icu4j (com.ibm.icu:icu4j:74.2 - https://icu.unicode.org/main/icu4j/) + + UnRar License + + * Java Unrar (com.github.junrar:junrar:7.5.5 - https://github.com/junrar/junrar) diff --cc external/opensearch/archetype/src/main/resources/archetype-resources/injection.flux index 56556426,060c1052..2f66da4e --- a/external/opensearch/archetype/src/main/resources/archetype-resources/injection.flux +++ b/external/opensearch/archetype/src/main/resources/archetype-resources/injection.flux @@@ -28,13 -28,9 +28,13 @@@ bolts parallelism: 1 - id: "status" - className: "com.digitalpebble.stormcrawler.opensearch.persistence.StatusUpdaterBolt" + className: "org.apache.stormcrawler.opensearch.persistence.StatusUpdaterBolt" parallelism: 1 + - id: "queues" + className: "com.digitalpebble.stormcrawler.opensearch.persistence.QueueBolt" + parallelism: 1 + streams: - from: "filespout" to: "filter" @@@ -49,13 -45,6 +49,13 @@@ streamId: "status" type: CUSTOM customClass: - className: "com.digitalpebble.stormcrawler.util.URLStreamGrouping" + className: "org.apache.stormcrawler.util.URLStreamGrouping" constructorArgs: - "byDomain" + + - from: "status" + to: "queues" + grouping: + type: FIELDS + args: ["key"] + streamId: "queue" diff --cc external/opensearch/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml index 731d0827,f37b8d5a..d4145481 --- a/external/opensearch/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml +++ b/external/opensearch/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml @@@ -13,13 -21,9 +21,13 @@@ config opensearch.indexer.concurrentRequests: 1 # MetricsConsumer - opensearch.metrics.addresses: "http://localhost:9200" + # opensearch.metrics.addresses: "http://localhost:9200" opensearch.metrics.index.name: "metrics" + # Queues index + opensearch.queues.addresses: "http://localhost:9200" + opensearch.queues.index.name: "queues" + # Spout and persistence bolt opensearch.status.addresses: "http://localhost:9200" opensearch.status.index.name: "status" diff --cc external/opensearch/archetype/src/main/resources/archetype-resources/src/main/resources/queues.mapping index 7c880b3c,00000000..7c880b3c mode 100644,000000..100644 --- a/external/opensearch/archetype/src/main/resources/archetype-resources/src/main/resources/queues.mapping +++ b/external/opensearch/archetype/src/main/resources/archetype-resources/src/main/resources/queues.mapping diff --cc external/opensearch/opensearch-conf.yaml index 3be058aa,f37b8d5a..2279ec89 --- a/external/opensearch/opensearch-conf.yaml +++ b/external/opensearch/opensearch-conf.yaml @@@ -13,12 -21,8 +21,12 @@@ config opensearch.indexer.concurrentRequests: 1 # MetricsConsumer - opensearch.metrics.addresses: "http://localhost:9200" + # opensearch.metrics.addresses: "http://localhost:9200" opensearch.metrics.index.name: "metrics" + + # Queues index + opensearch.queues.addresses: "http://localhost:9200" + opensearch.queues.index.name: "queues" # Spout and persistence bolt opensearch.status.addresses: "http://localhost:9200" diff --cc external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/QueueBolt.java index dd4e0ff1,00000000..b866c1bf mode 100644,000000..100644 --- a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/QueueBolt.java +++ b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/QueueBolt.java @@@ -1,124 -1,0 +1,122 @@@ +/** + * Licensed to DigitalPebble Ltd under one or more contributor license agreements. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. + * DigitalPebble licenses this file to You under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * <p>http://www.apache.org/licenses/LICENSE-2.0 + * + * <p>Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ - package com.digitalpebble.stormcrawler.opensearch.persistence; ++package org.apache.stormcrawler.opensearch.persistence; + - import com.digitalpebble.stormcrawler.opensearch.IndexCreation; - import com.digitalpebble.stormcrawler.opensearch.OpenSearchConnection; - import com.digitalpebble.stormcrawler.util.ConfUtils; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import org.apache.storm.task.OutputCollector; +import org.apache.storm.task.TopologyContext; +import org.apache.storm.topology.OutputFieldsDeclarer; +import org.apache.storm.topology.base.BaseRichBolt; +import org.apache.storm.tuple.Tuple; ++import org.apache.stormcrawler.opensearch.IndexCreation; ++import org.apache.stormcrawler.opensearch.OpenSearchConnection; ++import org.apache.stormcrawler.util.ConfUtils; +import org.joda.time.Instant; +import org.opensearch.action.index.IndexRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Sends information about the queues into an OpenSearch index. This has to be connected to a status + * updater bolt. + */ +public class QueueBolt extends BaseRichBolt { + + private static final Logger LOG = LoggerFactory.getLogger(QueueBolt.class); + + private static final String OSBoltType = "queues"; + + static final String ESIndexNameParamName = - com.digitalpebble.stormcrawler.opensearch.Constants.PARAMPREFIX - + OSBoltType - + ".index.name"; ++ org.apache.stormcrawler.opensearch.Constants.PARAMPREFIX + OSBoltType + ".index.name"; + + private OutputCollector _collector; + + private String indexName; + + private OpenSearchConnection connection; + + private Cache<String, String> knownQueue; + + public QueueBolt() {} + + /** Sets the index name instead of taking it from the configuration. * */ + public QueueBolt(String indexName) { + this.indexName = indexName; + } + + @Override + public void prepare( + Map<String, Object> conf, TopologyContext context, OutputCollector collector) { + _collector = collector; + if (indexName == null) { + indexName = ConfUtils.getString(conf, QueueBolt.ESIndexNameParamName, OSBoltType); + } + try { + connection = OpenSearchConnection.getConnection(conf, OSBoltType); + } catch (Exception e1) { + LOG.error("Can't connect to OpenSearch", e1); + throw new RuntimeException(e1); + } + try { + IndexCreation.checkOrCreateIndex(connection.getClient(), indexName, OSBoltType, LOG); + } catch (IOException e) { + throw new RuntimeException(e); + } + + knownQueue = Caffeine.newBuilder().maximumSize(10000).build(); + } + + @Override + public void cleanup() { + if (connection != null) connection.close(); + } + + @Override + public void execute(Tuple tuple) { + + final String key = tuple.getStringByField("key"); + + // check whether this key is already known + if (knownQueue.getIfPresent(key) != null) { + _collector.ack(tuple); + return; + } + + final String docID = org.apache.commons.codec.digest.DigestUtils.sha256Hex(key); + + final HashMap<String, String> fields = new HashMap<>(); + fields.put("key", key); + fields.put("lastUpdated", Instant.now().toString()); + + final IndexRequest indexRequest = + new IndexRequest(indexName).source(fields).id(docID).create(true); + + knownQueue.put(key, docID); + + connection.addToProcessor(indexRequest); + + // ack no matter what + _collector.ack(tuple); + } + + @Override + public void declareOutputFields(OutputFieldsDeclarer declarer) { + // nothing to do here - this bolt is the last of a topology + } +} diff --cc external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java index 74b08e57,abdf5574..0185a75a --- a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java +++ b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java @@@ -45,7 -37,16 +37,17 @@@ import org.apache.storm.metric.api.Mult import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.tuple.Tuple; +import org.apache.storm.tuple.Values; + import org.apache.stormcrawler.Metadata; + import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag; + import org.apache.stormcrawler.opensearch.Constants; + import org.apache.stormcrawler.opensearch.IndexCreation; + import org.apache.stormcrawler.opensearch.OpenSearchConnection; + import org.apache.stormcrawler.persistence.AbstractStatusUpdaterBolt; + import org.apache.stormcrawler.persistence.Status; + import org.apache.stormcrawler.util.ConfUtils; + import org.apache.stormcrawler.util.PerSecondReducer; + import org.apache.stormcrawler.util.URLPartitioner; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.opensearch.action.DocWriteRequest; @@@ -230,12 -244,6 +245,11 @@@ public class StatusUpdaterBolt extends partitionKey = "_DEFAULT_"; } + // send a tuple on the queue stream in case a bolt + // wants to handle it + super._collector.emit( - com.digitalpebble.stormcrawler.Constants.QUEUE_STREAM_NAME, - new Values(partitionKey)); ++ org.apache.stormcrawler.Constants.QUEUE_STREAM_NAME, new Values(partitionKey)); + // store routing key in metadata? if (StringUtils.isNotBlank(fieldNameForRoutingKey) && routingFieldNameInMetadata) { builder.field(fieldNameForRoutingKey, partitionKey);
