(incubator-stormcrawler) 01/02: #1401 archetype: remove java topology

rzo1 Wed, 13 Nov 2024 09:32:25 -0800

This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git


commit d8776b4584fc5a209f94a9937fa96a710d1a59fb
Author: Markos Volikas <[email protected]>
AuthorDate: Wed Nov 13 15:50:06 2024 +0200

    #1401 archetype: remove java topology
---
 .../META-INF/maven/archetype-metadata.xml          |  6 --
 .../src/main/java/CrawlTopology.java               | 75 ----------------------
 2 files changed, 81 deletions(-)

diff --git a/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml 
b/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
index 97cc2910..3db738d6 100644
--- a/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ b/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -38,12 +38,6 @@ under the License.
        </requiredProperties>
 
        <fileSets>
-               <fileSet filtered="true" packaged="true" encoding="UTF-8">
-                       <directory>src/main/java</directory>
-                       <includes>
-                               <include>**/*.java</include>
-                       </includes>
-               </fileSet>
                <fileSet filtered="true" encoding="UTF-8">
                        <directory>src/main/resources</directory>
                        <includes>
diff --git 
a/archetype/src/main/resources/archetype-resources/src/main/java/CrawlTopology.java
 
b/archetype/src/main/resources/archetype-resources/src/main/java/CrawlTopology.java
deleted file mode 100644
index 859e448e..00000000
--- 
a/archetype/src/main/resources/archetype-resources/src/main/java/CrawlTopology.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package ${package};
-
-import org.apache.storm.topology.TopologyBuilder;
-import org.apache.storm.tuple.Fields;
-import org.apache.stormcrawler.bolt.FeedParserBolt;
-import org.apache.stormcrawler.bolt.FetcherBolt;
-import org.apache.stormcrawler.bolt.JSoupParserBolt;
-import org.apache.stormcrawler.bolt.SiteMapParserBolt;
-import org.apache.stormcrawler.bolt.URLPartitionerBolt;
-import org.apache.stormcrawler.indexing.StdOutIndexer;
-import org.apache.stormcrawler.tika.ParserBolt;
-import org.apache.stormcrawler.tika.RedirectionBolt;
-import org.apache.stormcrawler.urlfrontier.Spout;
-import org.apache.stormcrawler.urlfrontier.StatusUpdaterBolt;
-
-/**
- * Dummy topology to play with the spouts and bolts
- */
-public class CrawlTopology extends ConfigurableTopology {
-
-       public static void main(String[] args) throws Exception {
-               ConfigurableTopology.start(new CrawlTopology(), args);
-       }
-
-       @Override
-       protected int run(String[] args) {
-               TopologyBuilder builder = new TopologyBuilder();
-
-               builder.setSpout("spout", new Spout());
-
-               builder.setBolt("partitioner", new 
URLPartitionerBolt()).shuffleGrouping("spout");
-
-               builder.setBolt("fetch", new 
FetcherBolt()).fieldsGrouping("partitioner", new Fields("key"));
-
-               builder.setBolt("sitemap", new 
SiteMapParserBolt()).localOrShuffleGrouping("fetch");
-
-               builder.setBolt("feeds", new 
FeedParserBolt()).localOrShuffleGrouping("sitemap");
-
-               builder.setBolt("parse", new 
JSoupParserBolt()).localOrShuffleGrouping("feeds");
-
-               builder.setBolt("shunt", new 
RedirectionBolt()).localOrShuffleGrouping("parse");
-
-               builder.setBolt("tika", new 
ParserBolt()).localOrShuffleGrouping("shunt", "tika");
-
-               builder.setBolt("index", new 
StdOutIndexer()).localOrShuffleGrouping("shunt").localOrShuffleGrouping("tika");
-
-               Fields furl = new Fields("url");
-
-               // can also use MemoryStatusUpdater for simple recursive crawls
-               builder.setBolt("status", new 
StatusUpdaterBolt()).fieldsGrouping("fetch", Constants.StatusStreamName, furl)
-                               .fieldsGrouping("sitemap", 
Constants.StatusStreamName, furl)
-                               .fieldsGrouping("feeds", 
Constants.StatusStreamName, furl)
-                               .fieldsGrouping("parse", 
Constants.StatusStreamName, furl)
-                               .fieldsGrouping("tika", 
Constants.StatusStreamName, furl)
-                               .fieldsGrouping("index", 
Constants.StatusStreamName, furl);
-
-               return submit("crawl", conf, builder);
-       }
-}

(incubator-stormcrawler) 01/02: #1401 archetype: remove java topology

Reply via email to