This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 83acd501e Update crawl documentation
83acd501e is described below
commit 83acd501e0a873c906fdb542e2c5ee86787a15a2
Author: Jakob Berlin <[email protected]>
AuthorDate: Thu Dec 14 16:23:11 2023 +0100
Update crawl documentation
Show --dedup-group instead of -dedup-group which have lead to
misunderstanding output
---
src/bin/crawl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/bin/crawl b/src/bin/crawl
index db4221868..409f72799 100755
--- a/src/bin/crawl
+++ b/src/bin/crawl
@@ -48,7 +48,7 @@
# --time-limit-fetch <time_limit_fetch> Number of minutes allocated to the
fetching [default: 180]
# --num-threads <num_threads> Number of threads for fetching /
sitemap processing [default: 50]
#
-# -dedup-group <none|host|domain> Deduplication group method [default:
none]
+# --dedup-group <none|host|domain> Deduplication group method
[default: none]
#
function __to_seconds() {
@@ -109,7 +109,7 @@ function __print_usage {
echo -e " \t\t\t\t\t - never [default]"
echo -e " \t\t\t\t\t - always (processing takes place in every iteration)"
echo -e " \t\t\t\t\t - once (processing only takes place in the first
iteration)"
- echo -e " -dedup-group <none|host|domain>\tDeduplication group method
[default: none]"
+ echo -e " --dedup-group <none|host|domain>\tDeduplication group method
[default: none]"
exit 1
}