This is an automated email from the ASF dual-hosted git repository.
markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 84cda2abd NUTCH-3029 Host specific max. and min. intervals in adaptive
scheduler
84cda2abd is described below
commit 84cda2abd500667222fdb00e503780ee0bdaaab4
Author: Markus Jelsma <[email protected]>
AuthorDate: Wed Mar 13 16:12:21 2024 +0000
NUTCH-3029 Host specific max. and min. intervals in adaptive scheduler
---
.../org/apache/nutch/crawl/AdaptiveFetchSchedule.java | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
index a403d5649..4d4a3af73 100644
--- a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
+++ b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
@@ -189,6 +189,9 @@ public class AdaptiveFetchSchedule extends
AbstractFetchSchedule {
/**
* Strip a URL, leaving only the host name.
+ *
+ * @param url url to get hostname for
+ * @return hostname
*/
public static String getHostName(String url) throws URISyntaxException {
URI uri = new URI(url);
@@ -198,9 +201,10 @@ public class AdaptiveFetchSchedule extends
AbstractFetchSchedule {
/**
* Returns the max_interval for this URL, which might depend on the host.
- * @param url the URL to be scheduled
- * @param defaultMaxInterval the value to which to default
- * if max_interval has not been configured for this host
+ *
+ * @param url the URL to be scheduled
+ * @param defaultMaxInterval the value to which to default if max_interval
has not been configured for this host
+ * @return the configured maximum interval or the default interval
*/
public float getMaxInterval(Text url, float defaultMaxInterval){
if (hostSpecificMaxInterval.isEmpty()) {
@@ -220,9 +224,10 @@ public class AdaptiveFetchSchedule extends
AbstractFetchSchedule {
/**
* Returns the min_interval for this URL, which might depend on the host.
- * @param url the URL to be scheduled
- * @param defaultMinInterval the value to which to default
- * if min_interval has not been configured for this host
+ *
+ * @param url the URL to be scheduled
+ * @param defaultMinInterval the value to which to default if min_interval
has not been configured for this host
+ * @return the configured minimum interval or the default interval
*/
public float getMinInterval(Text url, float defaultMinInterval){
if (hostSpecificMinInterval.isEmpty()) {