This is an automated email from the ASF dual-hosted git repository.

markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 84cda2abd NUTCH-3029 Host specific max. and min. intervals in adaptive 
scheduler
84cda2abd is described below

commit 84cda2abd500667222fdb00e503780ee0bdaaab4
Author: Markus Jelsma <mar...@apache.org>
AuthorDate: Wed Mar 13 16:12:21 2024 +0000

    NUTCH-3029 Host specific max. and min. intervals in adaptive scheduler
---
 .../org/apache/nutch/crawl/AdaptiveFetchSchedule.java   | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java 
b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
index a403d5649..4d4a3af73 100644
--- a/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
+++ b/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
@@ -189,6 +189,9 @@ public class AdaptiveFetchSchedule extends 
AbstractFetchSchedule {
 
   /**
    * Strip a URL, leaving only the host name.
+   *
+   * @param url url to get hostname for
+   * @return hostname
    */
   public static String getHostName(String url) throws URISyntaxException {
     URI uri = new URI(url);
@@ -198,9 +201,10 @@ public class AdaptiveFetchSchedule extends 
AbstractFetchSchedule {
 
   /**
    * Returns the max_interval for this URL, which might depend on the host.
-   * @param  url  the URL to be scheduled
-   * @param  defaultMaxInterval  the value to which to default
-   * if max_interval has not been configured for this host
+   *
+   * @param url the URL to be scheduled
+   * @param defaultMaxInterval the value to which to default if max_interval 
has not been configured for this host
+   * @return the configured maximum interval or the default interval
    */
   public float getMaxInterval(Text url, float defaultMaxInterval){
     if (hostSpecificMaxInterval.isEmpty()) {
@@ -220,9 +224,10 @@ public class AdaptiveFetchSchedule extends 
AbstractFetchSchedule {
 
   /**
    * Returns the min_interval for this URL, which might depend on the host.
-   * @param  url  the URL to be scheduled
-   * @param  defaultMinInterval  the value to which to default
-   * if min_interval has not been configured for this host
+   *
+   * @param url the URL to be scheduled
+   * @param defaultMinInterval the value to which to default if min_interval 
has not been configured for this host
+   * @return the configured minimum interval or the default interval
    */
   public float getMinInterval(Text url, float defaultMinInterval){
     if (hostSpecificMinInterval.isEmpty()) {

Reply via email to