This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 7ebd35d NUTCH-2495: Use -deleteGone instead of clean job in crawl script while indexing new c50575a Merge pull request #517 from sebastian-nagel/NUTCH-2495-bin-crawl-delete-while-indexing 7ebd35d is described below commit 7ebd35dc96b8d40846103a8c343edecec1763595 Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Mon Apr 27 10:24:18 2020 +0200 NUTCH-2495: Use -deleteGone instead of clean job in crawl script while indexing --- src/bin/crawl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/bin/crawl b/src/bin/crawl index 331ee65..9b77ce4 100755 --- a/src/bin/crawl +++ b/src/bin/crawl @@ -370,8 +370,8 @@ do echo "CrawlDB update" __bin_nutch updatedb "${commonOptions[@]}" "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments/$SEGMENT -# note that the link inversion - indexing routine can be done within the main loop -# on a per segment basis + # note that the link inversion - indexing routine can be done within the main loop + # on a per segment basis echo "Link inversion" __bin_nutch invertlinks "${commonOptions[@]}" "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT @@ -380,10 +380,7 @@ do if $INDEXFLAG; then echo "Indexing $SEGMENT to index" - __bin_nutch index "${commonOptions[@]}" "$CRAWL_PATH"/crawldb -linkdb "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT - - echo "Cleaning up index if possible" - __bin_nutch clean "${commonOptions[@]}" "$CRAWL_PATH"/crawldb + __bin_nutch index "${commonOptions[@]}" "$CRAWL_PATH"/crawldb -linkdb "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT -deleteGone else echo "Skipping indexing ..." fi