This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new ee874af  DRILL-8092: Add Auto Pagination to HTTP Storage Plugin (#2424)
ee874af is described below

commit ee874af953ed99614c8e7291ea9164b4ddc5ff24
Author: James Turton <[email protected]>
AuthorDate: Wed Jan 19 12:32:38 2022 +0200

    DRILL-8092: Add Auto Pagination to HTTP Storage Plugin (#2424)
    
    * Simplify Pagintor classes.
    
    * Correct Javadoc on JsonLoader#readBatch().
---
 .../drill/exec/store/http/HttpBatchReader.java     |  2 +-
 .../drill/exec/store/http/HttpCSVBatchReader.java  |  3 +-
 .../exec/store/http/HttpScanBatchCreator.java      |  2 +-
 .../drill/exec/store/http/HttpXMLBatchReader.java  |  3 +-
 .../exec/store/http/paginator/OffsetPaginator.java | 47 +++------------
 .../exec/store/http/paginator/PagePaginator.java   | 41 +++----------
 .../drill/exec/store/http/paginator/Paginator.java | 70 ++++------------------
 .../drill/exec/store/http/util/SimpleHttp.java     |  2 +-
 .../drill/exec/store/http/TestPaginator.java       | 53 ++++------------
 .../exec/store/easy/json/loader/JsonLoader.java    |  4 +-
 10 files changed, 45 insertions(+), 182 deletions(-)

diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
index 05d8c49..ad056d7 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
@@ -251,7 +251,7 @@ public class HttpBatchReader implements 
ManagedReader<SchemaNegotiator> {
     if (paginator != null &&
       maxRecords < 0 && (resultSetLoader.totalRowCount()) < 
paginator.getPageSize()) {
       logger.debug("Partially filled page received, ending pagination");
-      paginator.endPagination();
+      paginator.notifyPartialPage();
     }
     return result;
   }
diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
index 481525d..2341a87 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
@@ -179,8 +179,7 @@ public class HttpCSVBatchReader extends HttpBatchReader {
 
       if (paginator != null &&
         maxRecords < 0 && (resultLoader.totalRowCount()) < 
paginator.getPageSize()) {
-        logger.debug("Ending CSV pagination: Pages too small");
-        paginator.endPagination();
+        paginator.notifyPartialPage();
       }
       return false;
     }
diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
index e0e139f..a50fdc8 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
@@ -165,7 +165,7 @@ public class HttpScanBatchCreator implements 
BatchCreator<HttpSubScan> {
         * a new batch reader for each URL.  In the future, this could be 
parallelized in
         * the group scan such that the calls could be sent to different 
drillbits.
         */
-        if (!paginator.hasMore()) {
+        if (!paginator.hasNext()) {
           return null;
         }
 
diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
index afc3dcc..72cb805 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
@@ -133,8 +133,7 @@ public class HttpXMLBatchReader extends HttpBatchReader {
 
     if (paginator != null &&
       maxRecords < 0 && (resultLoader.totalRowCount()) < 
paginator.getPageSize()) {
-      logger.debug("Ending XML pagination: Pages too small");
-      paginator.endPagination();
+      paginator.notifyPartialPage();
     }
 
     return result;
diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
index 1e2663f..22a0233 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
@@ -18,14 +18,12 @@
 
 package org.apache.drill.exec.store.http.paginator;
 
-import okhttp3.HttpUrl;
 import okhttp3.HttpUrl.Builder;
 import org.apache.drill.common.exceptions.UserException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.NoSuchElementException;
 
 public class OffsetPaginator extends Paginator {
 
@@ -48,11 +46,10 @@ public class OffsetPaginator extends Paginator {
    * @param offsetParam The field name which corresponds to the offset field 
from the API
    */
   public OffsetPaginator(Builder builder, int limit, int pageSize, String 
limitParam, String offsetParam) {
-    super(builder, paginationMode.OFFSET, pageSize, limit > 0);
+    super(builder, paginationMode.OFFSET, pageSize, limit);
     this.limit = limit;
     this.limitParam = limitParam;
     this.offsetParam = offsetParam;
-    this.paginatedUrls = buildPaginatedURLs();
     this.offset = 0;
 
     // Page size must be greater than zero
@@ -64,21 +61,17 @@ public class OffsetPaginator extends Paginator {
     }
   }
 
-  public int getLimit() {
-    return limit;
+  @Override
+  public boolean hasNext() {
+    return !partialPageReceived && (limit < 0 || offset < limit);
   }
 
   @Override
   public String next() {
-    if (hasLimit) {
-      return super.next();
-    } else {
-      return generateNextUrl();
+    if (!hasNext()) {
+      throw new NoSuchElementException();
     }
-  }
 
-  @Override
-  public String generateNextUrl() {
     builder.removeAllEncodedQueryParameters(offsetParam);
     builder.removeAllEncodedQueryParameters(limitParam);
 
@@ -88,30 +81,4 @@ public class OffsetPaginator extends Paginator {
 
     return builder.build().url().toString();
   }
-
-
-  /**
-   * Build the paginated URLs.  If the parameters are invalid, return a list 
with the original URL.
-   *
-   * @return List of paginated URLs
-   */
-  @Override
-  public List<HttpUrl> buildPaginatedURLs() {
-    paginatedUrls = new ArrayList<>();
-    // If user wants 1000 records, and the page size is 100, we need to send 
10 requests
-    int requestedPages = limit / pageSize;
-
-    for (int i = 0; i < requestedPages; i++) {
-      // Clear out old params
-      builder.removeAllEncodedQueryParameters(offsetParam);
-      builder.removeAllEncodedQueryParameters(limitParam);
-
-      builder.addQueryParameter(offsetParam, String.valueOf(offset));
-      builder.addQueryParameter(limitParam, String.valueOf(pageSize));
-      offset += pageSize;
-      paginatedUrls.add(builder.build());
-    }
-
-    return paginatedUrls;
-  }
 }
diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
index f42a959..16746c1 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
@@ -18,14 +18,12 @@
 
 package org.apache.drill.exec.store.http.paginator;
 
-import okhttp3.HttpUrl;
 import okhttp3.HttpUrl.Builder;
 import org.apache.drill.common.exceptions.UserException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.NoSuchElementException;
 
 public class PagePaginator extends Paginator {
 
@@ -36,7 +34,6 @@ public class PagePaginator extends Paginator {
   private final String pageSizeParam;
   private int currentPage;
 
-
   /**
    * The Page Paginator works similarly to the offset paginator.  It requires 
the user to supply a page number query
    * parameter, a page size variable, and a maximum page size.
@@ -47,11 +44,10 @@ public class PagePaginator extends Paginator {
    * @param pageSizeParam The API Query parameter which specifies how many 
results per page
    */
   public PagePaginator(Builder builder, int limit, int pageSize, String 
pageParam, String pageSizeParam) {
-    super(builder, paginationMode.PAGE, pageSize, limit > 0);
+    super(builder, paginationMode.PAGE, pageSize, limit);
     this.limit = limit;
     this.pageParam = pageParam;
     this.pageSizeParam = pageSizeParam;
-    this.paginatedUrls = buildPaginatedURLs();
     currentPage = 1;
 
     // Page size must be greater than zero
@@ -64,16 +60,16 @@ public class PagePaginator extends Paginator {
   }
 
   @Override
-  public String next() {
-    if (hasLimit) {
-      return super.next();
-    } else {
-      return generateNextUrl();
-    }
+  public boolean hasNext() {
+    return !partialPageReceived && (limit < 0 || (currentPage-1) * pageSize < 
limit);
   }
 
   @Override
-  public String generateNextUrl() {
+  public String next() {
+    if (!hasNext()) {
+      throw new NoSuchElementException();
+    }
+
     builder.removeAllEncodedQueryParameters(pageParam);
     builder.removeAllEncodedQueryParameters(pageSizeParam);
 
@@ -83,23 +79,4 @@ public class PagePaginator extends Paginator {
 
     return builder.build().url().toString();
   }
-
-
-  @Override
-  public List<HttpUrl> buildPaginatedURLs() {
-    this.paginatedUrls = new ArrayList<>();
-    int pageCount = Math.max(1, (limit / pageSize));
-
-    for (int i = 1; i <= pageCount; i++) {
-      // Clear out old params
-      builder.removeAllEncodedQueryParameters(pageParam);
-      builder.removeAllEncodedQueryParameters(pageSizeParam);
-
-      builder.addQueryParameter(pageParam, String.valueOf(i));
-      builder.addQueryParameter(pageSizeParam, String.valueOf(pageSize));
-      paginatedUrls.add(builder.build());
-    }
-
-    return paginatedUrls;
-  }
 }
diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
index 1106c8c..eb1f1e7 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
@@ -18,12 +18,11 @@
 
 package org.apache.drill.exec.store.http.paginator;
 
-import okhttp3.HttpUrl;
 import okhttp3.HttpUrl.Builder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.List;
+import java.util.Iterator;
 
 /**
  * This class is the abstraction for the Paginator class.  There are
@@ -36,52 +35,34 @@ import java.util.List;
  * generate a list of URLs with the appropriate pagination parameters.  In the 
future
  * this could be parallelized, however in the V1 all these requests are run in 
series.
  */
-public abstract class Paginator {
+public abstract class Paginator implements Iterator<String> {
 
   private static final Logger logger = 
LoggerFactory.getLogger(Paginator.class);
   private static final int MAX_ATTEMPTS = 100;
   protected final int pageSize;
-  private boolean hasMore;
 
   public enum paginationMode {
     OFFSET,
     PAGE
   }
 
-  protected final boolean hasLimit;
   protected final paginationMode MODE;
-  protected int index;
+  protected final int limit;
+  protected boolean partialPageReceived;
   protected Builder builder;
-  protected List<HttpUrl> paginatedUrls;
 
-
-  public Paginator(Builder builder, paginationMode mode, int pageSize, boolean 
hasLimit) {
+  public Paginator(Builder builder, paginationMode mode, int pageSize, int 
limit) {
     this.MODE = mode;
     this.builder = builder;
     this.pageSize = pageSize;
-    this.hasLimit = hasLimit;
-    hasMore = true;
-    index = 0;
+    this.limit = limit;
+    this.partialPageReceived = false;
   }
 
   public void setBuilder(Builder builder) {
     this.builder = builder;
   }
 
-  public abstract List<HttpUrl> buildPaginatedURLs();
-
-  /**
-   * This method is to be used when the user does not include a limit in the 
query
-   * In each paginator, the paginator tracks whether there is more data.  If 
there is
-   * more data, the paginator marks the hasMore variable false.
-   * @return
-   */
-  public abstract String generateNextUrl();
-
-  public List<HttpUrl> getPaginatedUrls() {
-    return this.paginatedUrls;
-  }
-
   /**
    * This method is used in pagination queries when no limit is present.  The 
intended
    * flow is that if no limit is present, if the batch reader encounters a 
page which has
@@ -91,41 +72,10 @@ public abstract class Paginator {
    * In the event that the API simply runs out of data, the reader will return 
false anyway
    * and the pagination will stop.
    */
-  public void endPagination() {
-    hasMore = false;
+  public void notifyPartialPage() {
+    partialPageReceived = true;
+    logger.debug("Ending pagination: partial page received");
   }
 
   public int getPageSize() { return pageSize; }
-
-  public String next() {
-    if (!hasMore()) {
-      return null;
-    }
-    String url = paginatedUrls.get(index).toString();
-    index++;
-    if (index >= paginatedUrls.size()) {
-      hasMore = false;
-    }
-    return url;
-  }
-
-  /**
-   * Returns true if the paginator has more pages, false if not.
-   * @return True if there are more pages to visit, false if not.
-   */
-  public boolean hasMore() {
-    return hasMore;
-  }
-
-  /**
-   * Returns the count of URLs generated.  Only meaningful for OFFSET 
paginator.
-   * @return The count of pages.
-   */
-  public int count() {
-    if (paginatedUrls == null) {
-      return 0;
-    } else {
-      return paginatedUrls.size();
-    }
-  }
 }
diff --git 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
index af40e7b..2f9f8b8 100644
--- 
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
+++ 
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
@@ -244,7 +244,7 @@ public class SimpleHttp {
       if (paginator != null && (
         response.code() != 200 || response.body() == null ||
         response.body().contentLength() == 0)) {
-        paginator.endPagination();
+        paginator.notifyPartialPage();
       }
 
       // If the request is unsuccessful, throw a UserException
diff --git 
a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
 
b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
index c1feeb9..b6077b5 100644
--- 
a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
+++ 
b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
@@ -23,11 +23,8 @@ import 
org.apache.drill.exec.store.http.paginator.OffsetPaginator;
 import org.apache.drill.exec.store.http.paginator.PagePaginator;
 import org.junit.Test;
 
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 /**
  * This class tests the functionality of the various paginator classes as it 
pertains
@@ -41,39 +38,11 @@ public class TestPaginator {
     HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder();
 
     OffsetPaginator op = new OffsetPaginator(urlBuilder, 25, 5, "limit", 
"offset");
-    List<HttpUrl> urls = op.buildPaginatedURLs();
-    assertEquals(5, urls.size());
-
-    List<HttpUrl> expected = new ArrayList<>();
-    expected.add( HttpUrl.parse("https://myapi.com/?offset=0&limit=5";));
-    expected.add( HttpUrl.parse("https://myapi.com/?offset=5&limit=5";));
-    expected.add( HttpUrl.parse("https://myapi.com/?offset=10&limit=5";));
-    expected.add( HttpUrl.parse("https://myapi.com/?offset=15&limit=5";));
-    expected.add( HttpUrl.parse("https://myapi.com/?offset=20&limit=5";));
-
-    assertEquals(expected, urls);
-  }
-
-  @Test
-  public void TestOffsetPaginatorIterator() {
-    String baseUrl = "https://myapi.com";;
-    HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder();
-
-    OffsetPaginator op = new OffsetPaginator(urlBuilder, 25, 5, "limit", 
"offset");
-    assertEquals(5, op.count());
 
-    String next = op.next();
-    assertEquals(next, "https://myapi.com/?offset=0&limit=5";);
-    next = op.next();
-    assertEquals(next, "https://myapi.com/?offset=5&limit=5";);
-    next = op.next();
-    assertEquals(next, "https://myapi.com/?offset=10&limit=5";);
-    next = op.next();
-    assertEquals(next, "https://myapi.com/?offset=15&limit=5";);
-    next = op.next();
-    assertEquals(next, "https://myapi.com/?offset=20&limit=5";);
-    next = op.next();
-    assertNull(next);
+    for (int i = 0; i < 25; i += 5) {
+      assertEquals(String.format("%s/?offset=%d&limit=5", baseUrl, i), 
op.next());
+    }
+    assertFalse(op.hasNext());
   }
 
   @Test
@@ -82,11 +51,13 @@ public class TestPaginator {
     HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder();
 
     PagePaginator pp = new PagePaginator(urlBuilder, 10, 2, "page", 
"per_page");
-    List<HttpUrl> urls = pp.getPaginatedUrls();
-    assertEquals(5, urls.size());
+    for (int i = 1; i <= 5; i++) {
+      assertEquals(String.format("%s?page=%d&per_page=%d", baseUrl, i, 2), 
pp.next());
+    }
+    assertFalse(pp.hasNext());
 
     PagePaginator pp2 = new PagePaginator(urlBuilder, 10, 100, "page", 
"per_page");
-    List<HttpUrl> urls2 = pp2.getPaginatedUrls();
-    assertEquals(1, urls2.size());
+    assertEquals(String.format("%s?page=%d&per_page=%d", baseUrl, 1, 100), 
pp2.next());
+    assertFalse(pp.hasNext());
   }
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
index e4c5a0f..4ca6dab 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
@@ -48,9 +48,9 @@ public interface JsonLoader {
   String JSON_LITERAL_MODE = "json";
 
   /**
-   * Read one record of data.
+   * Read one batch of row data.
    *
-   * @return {@code true} if a record was loaded, {@code false} if EOF.
+   * @return {@code true} if at least one record was loaded, {@code false} if 
EOF.
    * @throws {org.apache.drill.common.exceptions.UserException
    * for most errors
    * @throws RuntimeException for unexpected errors, most often due

Reply via email to