This is an automated email from the ASF dual-hosted git repository. markus pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 84cda2abd NUTCH-3029 Host specific max. and min. intervals in adaptive scheduler 84cda2abd is described below commit 84cda2abd500667222fdb00e503780ee0bdaaab4 Author: Markus Jelsma <mar...@apache.org> AuthorDate: Wed Mar 13 16:12:21 2024 +0000 NUTCH-3029 Host specific max. and min. intervals in adaptive scheduler --- .../org/apache/nutch/crawl/AdaptiveFetchSchedule.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java index a403d5649..4d4a3af73 100644 --- a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java +++ b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java @@ -189,6 +189,9 @@ public class AdaptiveFetchSchedule extends AbstractFetchSchedule { /** * Strip a URL, leaving only the host name. + * + * @param url url to get hostname for + * @return hostname */ public static String getHostName(String url) throws URISyntaxException { URI uri = new URI(url); @@ -198,9 +201,10 @@ public class AdaptiveFetchSchedule extends AbstractFetchSchedule { /** * Returns the max_interval for this URL, which might depend on the host. - * @param url the URL to be scheduled - * @param defaultMaxInterval the value to which to default - * if max_interval has not been configured for this host + * + * @param url the URL to be scheduled + * @param defaultMaxInterval the value to which to default if max_interval has not been configured for this host + * @return the configured maximum interval or the default interval */ public float getMaxInterval(Text url, float defaultMaxInterval){ if (hostSpecificMaxInterval.isEmpty()) { @@ -220,9 +224,10 @@ public class AdaptiveFetchSchedule extends AbstractFetchSchedule { /** * Returns the min_interval for this URL, which might depend on the host. - * @param url the URL to be scheduled - * @param defaultMinInterval the value to which to default - * if min_interval has not been configured for this host + * + * @param url the URL to be scheduled + * @param defaultMinInterval the value to which to default if min_interval has not been configured for this host + * @return the configured minimum interval or the default interval */ public float getMinInterval(Text url, float defaultMinInterval){ if (hostSpecificMinInterval.isEmpty()) {