This is an automated email from the ASF dual-hosted git repository. jnioche pushed a commit to branch 1176 in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git
commit c79bcc61b3f219c1425b5e0ae59c8065241110c4 Author: Julien Nioche <[email protected]> AuthorDate: Fri Mar 29 10:33:15 2024 +0000 Remove hyphen in storm-crawler, including in artefacts generated by POMs Signed-off-by: Julien Nioche <[email protected]> --- README.md | 6 +++--- archetype/pom.xml | 6 +++--- .../src/main/resources/META-INF/maven/archetype-metadata.xml | 2 +- archetype/src/main/resources/archetype-resources/pom.xml | 6 +++--- core/pom.xml | 8 ++++---- .../java/org/apache/stormcrawler/bolt/JSoupParserBolt.java | 2 +- external/aws/README.md | 8 ++++---- external/aws/pom.xml | 6 +++--- external/elasticsearch/README.md | 4 ++-- external/elasticsearch/archetype/pom.xml | 4 ++-- .../src/main/resources/META-INF/maven/archetype-metadata.xml | 2 +- .../archetype/src/main/resources/archetype-resources/pom.xml | 6 +++--- external/elasticsearch/pom.xml | 8 ++++---- external/langid/pom.xml | 6 +++--- external/opensearch/README.md | 4 ++-- external/opensearch/archetype/pom.xml | 4 ++-- .../src/main/resources/META-INF/maven/archetype-metadata.xml | 2 +- .../archetype/src/main/resources/archetype-resources/README.md | 2 +- .../archetype/src/main/resources/archetype-resources/pom.xml | 6 +++--- external/opensearch/pom.xml | 8 ++++---- external/pom.xml | 8 ++++---- external/solr/README.md | 8 ++++---- external/solr/pom.xml | 8 ++++---- external/sql/README.md | 4 ++-- external/sql/pom.xml | 6 +++--- external/tika/pom.xml | 8 ++++---- external/urlfrontier/README.md | 2 +- external/urlfrontier/pom.xml | 8 ++++---- external/warc/README.md | 4 ++-- external/warc/pom.xml | 10 +++++----- pom.xml | 6 +++--- 31 files changed, 86 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index 183a05b8..c709daf7 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -[](http://stormcrawler.net/) +[](http://stormcrawler.net/) ============= [](http://www.apache.org/licenses/LICENSE-2.0)  -[](https://javadoc.io/doc/org.apache.stormcrawler/storm-crawler-core/) +[](https://javadoc.io/doc/org.apache.stormcrawler/stormcrawler-core/) [](https://coveralls.io/github/apache/incubator-stormcrawler?branch=master) StormCrawler is an open source collection of resources for building low-latency, scalable web crawlers on [Apache Storm](http://storm.apache.org/). It is provided under [Apache License](http://www.apache.org/licenses/LICENSE-2.0) and is written mostly in Java. @@ -18,7 +18,7 @@ The version of Apache Storm to install must match the one defined in the pom.xml Once Storm is installed, the easiest way to get started is to generate a brand new StormCrawler project using \: -`mvn archetype:generate -DarchetypeGroupId=com.digitalpebble.stormcrawler -DarchetypeArtifactId=storm-crawler-archetype -DarchetypeVersion=2.11` +`mvn archetype:generate -DarchetypeGroupId=com.digitalpebble.stormcrawler -DarchetypeArtifactId=stormcrawler-archetype -DarchetypeVersion=2.11` You'll be asked to enter a groupId (e.g. com.mycompany.crawler), an artefactId (e.g. stormcrawler), a version, a package name and details about the user agent to use. diff --git a/archetype/pom.xml b/archetype/pom.xml index 190efebd..a7d4edd5 100644 --- a/archetype/pom.xml +++ b/archetype/pom.xml @@ -23,11 +23,11 @@ under the License. <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler</artifactId> + <artifactId>stormcrawler</artifactId> <version>2.12-SNAPSHOT</version> </parent> - <artifactId>storm-crawler-archetype</artifactId> - <name>storm-crawler-archetype</name> + <artifactId>stormcrawler-archetype</artifactId> + <name>stormcrawler-archetype</name> <build> <resources> <resource> diff --git a/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml b/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml index aa61c331..97cc2910 100644 --- a/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -23,7 +23,7 @@ under the License. xmlns="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0 http://maven.apache.org/xsd/archetype-descriptor-1.1.0.xsd" - name="storm-crawler-core"> + name="stormcrawler-core"> <requiredProperties> <requiredProperty key="http-agent-name"> diff --git a/archetype/src/main/resources/archetype-resources/pom.xml b/archetype/src/main/resources/archetype-resources/pom.xml index aeb0e7b4..d33d0dd1 100644 --- a/archetype/src/main/resources/archetype-resources/pom.xml +++ b/archetype/src/main/resources/archetype-resources/pom.xml @@ -119,7 +119,7 @@ under the License. <dependencies> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${stormcrawler.version}</version> </dependency> <dependency> @@ -135,12 +135,12 @@ under the License. </dependency> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-tika</artifactId> + <artifactId>stormcrawler-tika</artifactId> <version>${stormcrawler.version}</version> </dependency> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-urlfrontier</artifactId> + <artifactId>stormcrawler-urlfrontier</artifactId> <version>${stormcrawler.version}</version> </dependency> <dependency> diff --git a/core/pom.xml b/core/pom.xml index 4b92d3c9..9886cfa8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler</artifactId> + <artifactId>stormcrawler</artifactId> <version>2.12-SNAPSHOT</version> </parent> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <packaging>jar</packaging> - <name>storm-crawler-core</name> - <url>https://github.com/apache/incubator-stormcrawler/storm-crawler/tree/master/core</url> + <name>stormcrawler-core</name> + <url>https://github.com/apache/incubator-stormcrawler/tree/master/core</url> <description>StormCrawler core Java API.</description> <properties> diff --git a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java index 2824c8f1..67aa673e 100644 --- a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java +++ b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java @@ -68,7 +68,7 @@ import org.w3c.dom.DocumentFragment; /** * Parser for HTML documents only which uses ICU4J to detect the charset encoding. Kindly donated to - * storm-crawler by shopstyle.com. + * stormcrawler by shopstyle.com. */ public class JSoupParserBolt extends StatusEmitterBolt { diff --git a/external/aws/README.md b/external/aws/README.md index 6edd4885..49119e3d 100644 --- a/external/aws/README.md +++ b/external/aws/README.md @@ -1,16 +1,16 @@ # stormcrawler-aws ================================ -AWS resources for Storm-Crawler, currently contains an indexer bolt for [CloudSearch](https://aws.amazon.com/cloudsearch/) and another bolt for storing and retrieving web pages to/from [S3](https://aws.amazon.com/s3/). +AWS resources for StormCrawler, currently contains an indexer bolt for [CloudSearch](https://aws.amazon.com/cloudsearch/) and another bolt for storing and retrieving web pages to/from [S3](https://aws.amazon.com/s3/). ## Prerequisites -Add storm-crawler-aws to the dependencies of your project\: +Add stormcrawler-aws to the dependencies of your project\: ```xml <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-aws</artifactId> + <artifactId>stormcrawler-aws</artifactId> <version>XXXX</version> </dependency> ``` @@ -21,7 +21,7 @@ Edit `~/.aws/credentials`, see [http://docs.aws.amazon.com/cli/latest/userguide/ * How to use? -Add storm-crawler-aws as a Maven dependency, use the class CloudSearchIndexWriter in your Storm topology alongside the core StormCrawler components and create a yaml configuration file (see below). +Add stormcrawler-aws as a Maven dependency, use the class CloudSearchIndexWriter in your Storm topology alongside the core StormCrawler components and create a yaml configuration file (see below). * AWS credentials diff --git a/external/aws/pom.xml b/external/aws/pom.xml index 0b331463..ca29ef15 100644 --- a/external/aws/pom.xml +++ b/external/aws/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-aws</artifactId> + <artifactId>stormcrawler-aws</artifactId> <packaging>jar</packaging> - <name>storm-crawler-aws</name> + <name>stormcrawler-aws</name> <url>https://github.com/apache/incubator-stormcrawler/tree/master/external/aws</url> <description>AWS resources for StormCrawler</description> diff --git a/external/elasticsearch/README.md b/external/elasticsearch/README.md index a8cfe1a9..792456da 100644 --- a/external/elasticsearch/README.md +++ b/external/elasticsearch/README.md @@ -1,4 +1,4 @@ -storm-crawler-elasticsearch +stormcrawler-elasticsearch =========================== A collection of resources for [Elasticsearch](https://www.elastic.co/products/elasticsearch): @@ -16,7 +16,7 @@ Getting started Use the archetype for Elasticsearch with: -`mvn archetype:generate -DarchetypeGroupId=org.apache.stormcrawler -DarchetypeArtifactId=storm-crawler-elasticsearch-archetype -DarchetypeVersion=2.11` +`mvn archetype:generate -DarchetypeGroupId=org.apache.stormcrawler -DarchetypeArtifactId=stormcrawler-elasticsearch-archetype -DarchetypeVersion=2.11` You'll be asked to enter a groupId (e.g. com.mycompany.crawler), an artefactId (e.g. stormcrawler), a version, a package name and details about the user agent to use. diff --git a/external/elasticsearch/archetype/pom.xml b/external/elasticsearch/archetype/pom.xml index 499173ee..8e7244fa 100644 --- a/external/elasticsearch/archetype/pom.xml +++ b/external/elasticsearch/archetype/pom.xml @@ -24,12 +24,12 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler</artifactId> + <artifactId>stormcrawler</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../../../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-elasticsearch-archetype</artifactId> + <artifactId>stormcrawler-elasticsearch-archetype</artifactId> <packaging>maven-archetype</packaging> diff --git a/external/elasticsearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml b/external/elasticsearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml index c3a0c6d2..ae9bcd04 100644 --- a/external/elasticsearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/external/elasticsearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -23,7 +23,7 @@ under the License. xmlns="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0 http://maven.apache.org/xsd/archetype-descriptor-1.1.0.xsd" - name="storm-crawler-core"> + name="stormcrawler-core"> <requiredProperties> <requiredProperty key="http-agent-name"> diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/pom.xml b/external/elasticsearch/archetype/src/main/resources/archetype-resources/pom.xml index cca05f97..144ad528 100644 --- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/pom.xml +++ b/external/elasticsearch/archetype/src/main/resources/archetype-resources/pom.xml @@ -121,12 +121,12 @@ under the License. <dependencies> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${stormcrawler.version}</version> </dependency> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-elasticsearch</artifactId> + <artifactId>stormcrawler-elasticsearch</artifactId> <version>${stormcrawler.version}</version> </dependency> <dependency> @@ -142,7 +142,7 @@ under the License. </dependency> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-tika</artifactId> + <artifactId>stormcrawler-tika</artifactId> <version>${stormcrawler.version}</version> </dependency> </dependencies> diff --git a/external/elasticsearch/pom.xml b/external/elasticsearch/pom.xml index dc5f0615..15215d73 100644 --- a/external/elasticsearch/pom.xml +++ b/external/elasticsearch/pom.xml @@ -24,7 +24,7 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> @@ -33,10 +33,10 @@ under the License. <elasticsearch.version>7.17.7</elasticsearch.version> </properties> - <artifactId>storm-crawler-elasticsearch</artifactId> + <artifactId>stormcrawler-elasticsearch</artifactId> <packaging>jar</packaging> - <name>storm-crawler-elasticsearch</name> + <name>stormcrawler-elasticsearch</name> <url>https://github.com/apache/incubator-stormcrawler/tree/master/external/elasticsearch</url> <description>Elasticsearch resources for StormCrawler</description> @@ -83,7 +83,7 @@ under the License. <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> diff --git a/external/langid/pom.xml b/external/langid/pom.xml index c5c9153e..7d023f81 100644 --- a/external/langid/pom.xml +++ b/external/langid/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-langid</artifactId> + <artifactId>stormcrawler-langid</artifactId> <packaging>jar</packaging> - <name>storm-crawler-langid</name> + <name>stormcrawler-langid</name> <url>https://github.com/apache/incubator-stormcrawler/tree/master/external/langid</url> <description>Language Identification for StormCrawler</description> diff --git a/external/opensearch/README.md b/external/opensearch/README.md index 6b10ce78..1868d2aa 100644 --- a/external/opensearch/README.md +++ b/external/opensearch/README.md @@ -1,4 +1,4 @@ -storm-crawler-opensearch +stormcrawler-opensearch =========================== A collection of resources for [OpenSearch](https://opensearch.org/): @@ -16,7 +16,7 @@ Getting started The easiest way is currently to use the archetype for OpenSearch with: -`mvn archetype:generate -DarchetypeGroupId=org.apache.stormcrawler -DarchetypeArtifactId=storm-crawler-opensearch-archetype -DarchetypeVersion=2.11` +`mvn archetype:generate -DarchetypeGroupId=org.apache.stormcrawler -DarchetypeArtifactId=stormcrawler-opensearch-archetype -DarchetypeVersion=2.11` You'll be asked to enter a groupId (e.g. com.mycompany.crawler), an artefactId (e.g. stormcrawler), a version, a package name and details about the user agent to use. diff --git a/external/opensearch/archetype/pom.xml b/external/opensearch/archetype/pom.xml index 6b53be43..ae141b2d 100644 --- a/external/opensearch/archetype/pom.xml +++ b/external/opensearch/archetype/pom.xml @@ -24,12 +24,12 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler</artifactId> + <artifactId>stormcrawler</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../../../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-opensearch-archetype</artifactId> + <artifactId>stormcrawler-opensearch-archetype</artifactId> <packaging>maven-archetype</packaging> diff --git a/external/opensearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml b/external/opensearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml index f8b880d1..9abe82dd 100644 --- a/external/opensearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/external/opensearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -23,7 +23,7 @@ under the License. xmlns="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0 http://maven.apache.org/xsd/archetype-descriptor-1.1.0.xsd" - name="storm-crawler-core"> + name="stormcrawler-core"> <requiredProperties> <requiredProperty key="http-agent-name"> diff --git a/external/opensearch/archetype/src/main/resources/archetype-resources/README.md b/external/opensearch/archetype/src/main/resources/archetype-resources/README.md index 007509d2..526788f4 100644 --- a/external/opensearch/archetype/src/main/resources/archetype-resources/README.md +++ b/external/opensearch/archetype/src/main/resources/archetype-resources/README.md @@ -60,7 +60,7 @@ The file _storm.ndjson_ is used to display some of Storm's internal metrics and -Happy crawling! If you have any questions, please ask on [StackOverflow with the tag stormcrawler](http://stackoverflow.com/questions/tagged/stormcrawler) or the [discussions](https://github.com/DigitalPebble/storm-crawler/discussions) section on GitHub. +Happy crawling! If you have any questions, please ask on [StackOverflow with the tag stormcrawler](http://stackoverflow.com/questions/tagged/stormcrawler) or the [discussions](https://github.com/apache/incubator-stormcrawler/discussions) section on GitHub. diff --git a/external/opensearch/archetype/src/main/resources/archetype-resources/pom.xml b/external/opensearch/archetype/src/main/resources/archetype-resources/pom.xml index 8283f5f3..dc8f55b0 100644 --- a/external/opensearch/archetype/src/main/resources/archetype-resources/pom.xml +++ b/external/opensearch/archetype/src/main/resources/archetype-resources/pom.xml @@ -121,12 +121,12 @@ under the License. <dependencies> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${stormcrawler.version}</version> </dependency> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-opensearch</artifactId> + <artifactId>stormcrawler-opensearch</artifactId> <version>${stormcrawler.version}</version> </dependency> <dependency> @@ -142,7 +142,7 @@ under the License. </dependency> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-tika</artifactId> + <artifactId>stormcrawler-tika</artifactId> <version>${stormcrawler.version}</version> </dependency> </dependencies> diff --git a/external/opensearch/pom.xml b/external/opensearch/pom.xml index d80b9327..33ab14f9 100644 --- a/external/opensearch/pom.xml +++ b/external/opensearch/pom.xml @@ -26,7 +26,7 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> @@ -35,10 +35,10 @@ under the License. <opensearch.version>2.12.0</opensearch.version> </properties> - <artifactId>storm-crawler-opensearch</artifactId> + <artifactId>stormcrawler-opensearch</artifactId> <packaging>jar</packaging> - <name>storm-crawler-opensearch</name> + <name>stormcrawler-opensearch</name> <url> https://github.com/apache/incubator-stormcrawler/tree/master/external/opensearch</url> <description>Opensearch resources for StormCrawler</description> @@ -82,7 +82,7 @@ under the License. <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> diff --git a/external/pom.xml b/external/pom.xml index 9b75305f..e8122b2a 100644 --- a/external/pom.xml +++ b/external/pom.xml @@ -24,12 +24,12 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler</artifactId> + <artifactId>stormcrawler</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <packaging>pom</packaging> <profiles> @@ -42,7 +42,7 @@ under the License. <artifactId>maven-source-plugin</artifactId> </plugin> - <!-- no test jar for storm-crawler-external --> + <!-- no test jar for stormcrawler-external --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> @@ -92,7 +92,7 @@ under the License. <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> </dependency> diff --git a/external/solr/README.md b/external/solr/README.md index b23d5fa8..7eed8d56 100644 --- a/external/solr/README.md +++ b/external/solr/README.md @@ -1,4 +1,4 @@ -storm-crawler-solr +stormcrawler-solr ================== Set of Solr resources for StormCrawler that allows you to create topologies that consume from a Solr collection and store metrics, status or parsed content into Solr. @@ -10,7 +10,7 @@ In your project you can use this by adding the following dependency: ```xml <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-solr</artifactId> + <artifactId>stormcrawler-solr</artifactId> <version>${stormcrawler.version}</version> </dependency> ``` @@ -74,7 +74,7 @@ solr.status.bucket.field: host solr.status.bucket.maxsize: 100 ``` -This feature can be combined with the [partition features](https://github.com/DigitalPebble/storm-crawler/wiki/Configuration#fetching-and-partitioning) provided by storm-crawler to balance the crawling process and not just the URL coverage. +This feature can be combined with the [partition features](https://github.com/apache/incubator-stormcrawler/wiki/Configuration#fetching-and-partitioning) provided by StormCrawler to balance the crawling process and not just the URL coverage. ### Metadata @@ -113,5 +113,5 @@ In the `parse` and `status` cores the `uniqueKey` is defined to be the `url` fie Also keep in mind that depending on your needs you can use the [Schemaless Mode](https://cwiki.apache.org/confluence/display/solr/Schemaless+Mode) available in Solr. -To start SOLR with the preconfigured cores for StormCrawler, you can do `bin/solr start -s storm-crawler/external/solr/cores`, then open the SOLR UI (http://localhost:8983) to check that they have been loaded correctly. Alternatively, create the cores (here `status`) by `bin/solr create -c status -d storm-crawler/external/solr/cores/status/`. +To start SOLR with the preconfigured cores for StormCrawler, you can do `bin/solr start -s stormcrawler/external/solr/cores`, then open the SOLR UI (http://localhost:8983) to check that they have been loaded correctly. Alternatively, create the cores (here `status`) by `bin/solr create -c status -d stormcrawler/external/solr/cores/status/`. diff --git a/external/solr/pom.xml b/external/solr/pom.xml index 5dd9a5b9..5c2a1dfb 100644 --- a/external/solr/pom.xml +++ b/external/solr/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-solr</artifactId> + <artifactId>stormcrawler-solr</artifactId> <packaging>jar</packaging> - <name>storm-crawler-solr</name> + <name>stormcrawler-solr</name> <url> https://github.com/apache/incubator-stormcrawler/tree/master/external/solr</url> <description>Solr resources for StormCrawler</description> @@ -56,7 +56,7 @@ under the License. <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> diff --git a/external/sql/README.md b/external/sql/README.md index a9d851af..25554a4d 100644 --- a/external/sql/README.md +++ b/external/sql/README.md @@ -2,11 +2,11 @@ Contains a spout implementation as well as a status updater bolt and a MetricsConsumer. -The [tableCreation.script](https://github.com/DigitalPebble/storm-crawler/blob/master/external/sql/tableCreation.script) is based on MySQL and is used for the creation of the tables. +The [tableCreation.script](https://github.com/apache/incubator-stormcrawler/blob/main/external/sql/tableCreation.script) is based on MySQL and is used for the creation of the tables. This [tutorial](https://digitalpebble.blogspot.co.uk/2015/09/index-web-with-aws-cloudsearch.html) uses this module. -Check that you have specified a configuration file such as [sql-conf.yaml](https://github.com/DigitalPebble/storm-crawler/blob/master/external/sql/sql-conf.yaml) and have a Java driver in the dependencies of your POM +Check that you have specified a configuration file such as [sql-conf.yaml](https://github.com/apache/incubator-stormcrawler/blob/master/external/sql/sql-conf.yaml) and have a Java driver in the dependencies of your POM ``` <dependency> diff --git a/external/sql/pom.xml b/external/sql/pom.xml index 6b5dda23..5c90955b 100644 --- a/external/sql/pom.xml +++ b/external/sql/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-sql</artifactId> + <artifactId>stormcrawler-sql</artifactId> <packaging>jar</packaging> - <name>storm-crawler-sql</name> + <name>stormcrawler-sql</name> <url>https://github.com/apache/incubator-stormcrawler/tree/master/external/sql</url> <description>SQL-based resources for StormCrawler</description> diff --git a/external/tika/pom.xml b/external/tika/pom.xml index 351eaf4b..b92841f0 100644 --- a/external/tika/pom.xml +++ b/external/tika/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-tika</artifactId> + <artifactId>stormcrawler-tika</artifactId> <packaging>jar</packaging> - <name>storm-crawler-tika</name> + <name>stormcrawler-tika</name> <url>https://github.com/apache/incubator-stormcrawler/tree/master/external/tika</url> <description>Tika-based parser bolt for StormCrawler</description> @@ -55,7 +55,7 @@ under the License. <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> diff --git a/external/urlfrontier/README.md b/external/urlfrontier/README.md index ac9b5746..8647fc04 100644 --- a/external/urlfrontier/README.md +++ b/external/urlfrontier/README.md @@ -29,7 +29,7 @@ Your StormCrawler topology requires the following dependency in its pom.xml (jus ``` <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-urlfrontier</artifactId> + <artifactId>stormcrawler-urlfrontier</artifactId> <version>${stormcrawler.version}</version> </dependency> ``` diff --git a/external/urlfrontier/pom.xml b/external/urlfrontier/pom.xml index 2d98ae21..96f94948 100644 --- a/external/urlfrontier/pom.xml +++ b/external/urlfrontier/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-urlfrontier</artifactId> + <artifactId>stormcrawler-urlfrontier</artifactId> <packaging>jar</packaging> - <name>storm-crawler-urlfrontier</name> + <name>stormcrawler-urlfrontier</name> <url>https://github.com/apache/incubator-stormcrawler/tree/master/external/urlfrontier</url> <description>URL Frontier resources for StormCrawler</description> @@ -71,7 +71,7 @@ under the License. <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> diff --git a/external/warc/README.md b/external/warc/README.md index 9b56bbba..7791a3fe 100644 --- a/external/warc/README.md +++ b/external/warc/README.md @@ -5,8 +5,8 @@ First, you need to add the WARC module to the dependencies of your project. ``` <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-warc</artifactId> - <version>${storm-crawler.version}</version> + <artifactId>stormcrawler-warc</artifactId> + <version>${stormcrawler.version}</version> </dependency> ``` diff --git a/external/warc/pom.xml b/external/warc/pom.xml index c5297242..f9767cdd 100644 --- a/external/warc/pom.xml +++ b/external/warc/pom.xml @@ -24,15 +24,15 @@ under the License. <parent> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-external</artifactId> + <artifactId>stormcrawler-external</artifactId> <version>2.12-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> - <artifactId>storm-crawler-warc</artifactId> + <artifactId>stormcrawler-warc</artifactId> <packaging>jar</packaging> - <name>storm-crawler-warc</name> + <name>stormcrawler-warc</name> <url>https://github.com/apache/incubator-stormcrawler/tree/master/external/warc</url> <description>WARC resources for StormCrawler</description> @@ -46,7 +46,7 @@ under the License. codec dependencies ?!?!? --> <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> </dependency> @@ -89,7 +89,7 @@ under the License. <dependency> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler-core</artifactId> + <artifactId>stormcrawler-core</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> diff --git a/pom.xml b/pom.xml index 9c6f5a8c..9bee5b14 100644 --- a/pom.xml +++ b/pom.xml @@ -23,11 +23,11 @@ under the License. <modelVersion>4.0.0</modelVersion> <groupId>org.apache.stormcrawler</groupId> - <artifactId>storm-crawler</artifactId> + <artifactId>stormcrawler</artifactId> <version>2.12-SNAPSHOT</version> <packaging>pom</packaging> - <name>storm-crawler</name> + <name>StormCrawler</name> <description>A collection of resources for building low-latency, scalable web crawlers on Apache Storm.</description> <url>http://stormcrawler.net/</url> @@ -46,7 +46,7 @@ under the License. <organization>DigitalPebble</organization> <organizationUrl>http://www.digitalpebble.com</organizationUrl> </developer> - <developer> + <developer> <name>Sebastian Nagel</name> <email>[email protected]</email> <organization>CommonCrawl</organization>
