This is an automated email from the ASF dual-hosted git repository.
dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new ee874af DRILL-8092: Add Auto Pagination to HTTP Storage Plugin (#2424)
ee874af is described below
commit ee874af953ed99614c8e7291ea9164b4ddc5ff24
Author: James Turton <[email protected]>
AuthorDate: Wed Jan 19 12:32:38 2022 +0200
DRILL-8092: Add Auto Pagination to HTTP Storage Plugin (#2424)
* Simplify Pagintor classes.
* Correct Javadoc on JsonLoader#readBatch().
---
.../drill/exec/store/http/HttpBatchReader.java | 2 +-
.../drill/exec/store/http/HttpCSVBatchReader.java | 3 +-
.../exec/store/http/HttpScanBatchCreator.java | 2 +-
.../drill/exec/store/http/HttpXMLBatchReader.java | 3 +-
.../exec/store/http/paginator/OffsetPaginator.java | 47 +++------------
.../exec/store/http/paginator/PagePaginator.java | 41 +++----------
.../drill/exec/store/http/paginator/Paginator.java | 70 ++++------------------
.../drill/exec/store/http/util/SimpleHttp.java | 2 +-
.../drill/exec/store/http/TestPaginator.java | 53 ++++------------
.../exec/store/easy/json/loader/JsonLoader.java | 4 +-
10 files changed, 45 insertions(+), 182 deletions(-)
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
index 05d8c49..ad056d7 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
@@ -251,7 +251,7 @@ public class HttpBatchReader implements
ManagedReader<SchemaNegotiator> {
if (paginator != null &&
maxRecords < 0 && (resultSetLoader.totalRowCount()) <
paginator.getPageSize()) {
logger.debug("Partially filled page received, ending pagination");
- paginator.endPagination();
+ paginator.notifyPartialPage();
}
return result;
}
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
index 481525d..2341a87 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java
@@ -179,8 +179,7 @@ public class HttpCSVBatchReader extends HttpBatchReader {
if (paginator != null &&
maxRecords < 0 && (resultLoader.totalRowCount()) <
paginator.getPageSize()) {
- logger.debug("Ending CSV pagination: Pages too small");
- paginator.endPagination();
+ paginator.notifyPartialPage();
}
return false;
}
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
index e0e139f..a50fdc8 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
@@ -165,7 +165,7 @@ public class HttpScanBatchCreator implements
BatchCreator<HttpSubScan> {
* a new batch reader for each URL. In the future, this could be
parallelized in
* the group scan such that the calls could be sent to different
drillbits.
*/
- if (!paginator.hasMore()) {
+ if (!paginator.hasNext()) {
return null;
}
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
index afc3dcc..72cb805 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpXMLBatchReader.java
@@ -133,8 +133,7 @@ public class HttpXMLBatchReader extends HttpBatchReader {
if (paginator != null &&
maxRecords < 0 && (resultLoader.totalRowCount()) <
paginator.getPageSize()) {
- logger.debug("Ending XML pagination: Pages too small");
- paginator.endPagination();
+ paginator.notifyPartialPage();
}
return result;
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
index 1e2663f..22a0233 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/OffsetPaginator.java
@@ -18,14 +18,12 @@
package org.apache.drill.exec.store.http.paginator;
-import okhttp3.HttpUrl;
import okhttp3.HttpUrl.Builder;
import org.apache.drill.common.exceptions.UserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.NoSuchElementException;
public class OffsetPaginator extends Paginator {
@@ -48,11 +46,10 @@ public class OffsetPaginator extends Paginator {
* @param offsetParam The field name which corresponds to the offset field
from the API
*/
public OffsetPaginator(Builder builder, int limit, int pageSize, String
limitParam, String offsetParam) {
- super(builder, paginationMode.OFFSET, pageSize, limit > 0);
+ super(builder, paginationMode.OFFSET, pageSize, limit);
this.limit = limit;
this.limitParam = limitParam;
this.offsetParam = offsetParam;
- this.paginatedUrls = buildPaginatedURLs();
this.offset = 0;
// Page size must be greater than zero
@@ -64,21 +61,17 @@ public class OffsetPaginator extends Paginator {
}
}
- public int getLimit() {
- return limit;
+ @Override
+ public boolean hasNext() {
+ return !partialPageReceived && (limit < 0 || offset < limit);
}
@Override
public String next() {
- if (hasLimit) {
- return super.next();
- } else {
- return generateNextUrl();
+ if (!hasNext()) {
+ throw new NoSuchElementException();
}
- }
- @Override
- public String generateNextUrl() {
builder.removeAllEncodedQueryParameters(offsetParam);
builder.removeAllEncodedQueryParameters(limitParam);
@@ -88,30 +81,4 @@ public class OffsetPaginator extends Paginator {
return builder.build().url().toString();
}
-
-
- /**
- * Build the paginated URLs. If the parameters are invalid, return a list
with the original URL.
- *
- * @return List of paginated URLs
- */
- @Override
- public List<HttpUrl> buildPaginatedURLs() {
- paginatedUrls = new ArrayList<>();
- // If user wants 1000 records, and the page size is 100, we need to send
10 requests
- int requestedPages = limit / pageSize;
-
- for (int i = 0; i < requestedPages; i++) {
- // Clear out old params
- builder.removeAllEncodedQueryParameters(offsetParam);
- builder.removeAllEncodedQueryParameters(limitParam);
-
- builder.addQueryParameter(offsetParam, String.valueOf(offset));
- builder.addQueryParameter(limitParam, String.valueOf(pageSize));
- offset += pageSize;
- paginatedUrls.add(builder.build());
- }
-
- return paginatedUrls;
- }
}
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
index f42a959..16746c1 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/PagePaginator.java
@@ -18,14 +18,12 @@
package org.apache.drill.exec.store.http.paginator;
-import okhttp3.HttpUrl;
import okhttp3.HttpUrl.Builder;
import org.apache.drill.common.exceptions.UserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.NoSuchElementException;
public class PagePaginator extends Paginator {
@@ -36,7 +34,6 @@ public class PagePaginator extends Paginator {
private final String pageSizeParam;
private int currentPage;
-
/**
* The Page Paginator works similarly to the offset paginator. It requires
the user to supply a page number query
* parameter, a page size variable, and a maximum page size.
@@ -47,11 +44,10 @@ public class PagePaginator extends Paginator {
* @param pageSizeParam The API Query parameter which specifies how many
results per page
*/
public PagePaginator(Builder builder, int limit, int pageSize, String
pageParam, String pageSizeParam) {
- super(builder, paginationMode.PAGE, pageSize, limit > 0);
+ super(builder, paginationMode.PAGE, pageSize, limit);
this.limit = limit;
this.pageParam = pageParam;
this.pageSizeParam = pageSizeParam;
- this.paginatedUrls = buildPaginatedURLs();
currentPage = 1;
// Page size must be greater than zero
@@ -64,16 +60,16 @@ public class PagePaginator extends Paginator {
}
@Override
- public String next() {
- if (hasLimit) {
- return super.next();
- } else {
- return generateNextUrl();
- }
+ public boolean hasNext() {
+ return !partialPageReceived && (limit < 0 || (currentPage-1) * pageSize <
limit);
}
@Override
- public String generateNextUrl() {
+ public String next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+
builder.removeAllEncodedQueryParameters(pageParam);
builder.removeAllEncodedQueryParameters(pageSizeParam);
@@ -83,23 +79,4 @@ public class PagePaginator extends Paginator {
return builder.build().url().toString();
}
-
-
- @Override
- public List<HttpUrl> buildPaginatedURLs() {
- this.paginatedUrls = new ArrayList<>();
- int pageCount = Math.max(1, (limit / pageSize));
-
- for (int i = 1; i <= pageCount; i++) {
- // Clear out old params
- builder.removeAllEncodedQueryParameters(pageParam);
- builder.removeAllEncodedQueryParameters(pageSizeParam);
-
- builder.addQueryParameter(pageParam, String.valueOf(i));
- builder.addQueryParameter(pageSizeParam, String.valueOf(pageSize));
- paginatedUrls.add(builder.build());
- }
-
- return paginatedUrls;
- }
}
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
index 1106c8c..eb1f1e7 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/Paginator.java
@@ -18,12 +18,11 @@
package org.apache.drill.exec.store.http.paginator;
-import okhttp3.HttpUrl;
import okhttp3.HttpUrl.Builder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.List;
+import java.util.Iterator;
/**
* This class is the abstraction for the Paginator class. There are
@@ -36,52 +35,34 @@ import java.util.List;
* generate a list of URLs with the appropriate pagination parameters. In the
future
* this could be parallelized, however in the V1 all these requests are run in
series.
*/
-public abstract class Paginator {
+public abstract class Paginator implements Iterator<String> {
private static final Logger logger =
LoggerFactory.getLogger(Paginator.class);
private static final int MAX_ATTEMPTS = 100;
protected final int pageSize;
- private boolean hasMore;
public enum paginationMode {
OFFSET,
PAGE
}
- protected final boolean hasLimit;
protected final paginationMode MODE;
- protected int index;
+ protected final int limit;
+ protected boolean partialPageReceived;
protected Builder builder;
- protected List<HttpUrl> paginatedUrls;
-
- public Paginator(Builder builder, paginationMode mode, int pageSize, boolean
hasLimit) {
+ public Paginator(Builder builder, paginationMode mode, int pageSize, int
limit) {
this.MODE = mode;
this.builder = builder;
this.pageSize = pageSize;
- this.hasLimit = hasLimit;
- hasMore = true;
- index = 0;
+ this.limit = limit;
+ this.partialPageReceived = false;
}
public void setBuilder(Builder builder) {
this.builder = builder;
}
- public abstract List<HttpUrl> buildPaginatedURLs();
-
- /**
- * This method is to be used when the user does not include a limit in the
query
- * In each paginator, the paginator tracks whether there is more data. If
there is
- * more data, the paginator marks the hasMore variable false.
- * @return
- */
- public abstract String generateNextUrl();
-
- public List<HttpUrl> getPaginatedUrls() {
- return this.paginatedUrls;
- }
-
/**
* This method is used in pagination queries when no limit is present. The
intended
* flow is that if no limit is present, if the batch reader encounters a
page which has
@@ -91,41 +72,10 @@ public abstract class Paginator {
* In the event that the API simply runs out of data, the reader will return
false anyway
* and the pagination will stop.
*/
- public void endPagination() {
- hasMore = false;
+ public void notifyPartialPage() {
+ partialPageReceived = true;
+ logger.debug("Ending pagination: partial page received");
}
public int getPageSize() { return pageSize; }
-
- public String next() {
- if (!hasMore()) {
- return null;
- }
- String url = paginatedUrls.get(index).toString();
- index++;
- if (index >= paginatedUrls.size()) {
- hasMore = false;
- }
- return url;
- }
-
- /**
- * Returns true if the paginator has more pages, false if not.
- * @return True if there are more pages to visit, false if not.
- */
- public boolean hasMore() {
- return hasMore;
- }
-
- /**
- * Returns the count of URLs generated. Only meaningful for OFFSET
paginator.
- * @return The count of pages.
- */
- public int count() {
- if (paginatedUrls == null) {
- return 0;
- } else {
- return paginatedUrls.size();
- }
- }
}
diff --git
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
index af40e7b..2f9f8b8 100644
---
a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
+++
b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java
@@ -244,7 +244,7 @@ public class SimpleHttp {
if (paginator != null && (
response.code() != 200 || response.body() == null ||
response.body().contentLength() == 0)) {
- paginator.endPagination();
+ paginator.notifyPartialPage();
}
// If the request is unsuccessful, throw a UserException
diff --git
a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
index c1feeb9..b6077b5 100644
---
a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
+++
b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPaginator.java
@@ -23,11 +23,8 @@ import
org.apache.drill.exec.store.http.paginator.OffsetPaginator;
import org.apache.drill.exec.store.http.paginator.PagePaginator;
import org.junit.Test;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* This class tests the functionality of the various paginator classes as it
pertains
@@ -41,39 +38,11 @@ public class TestPaginator {
HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder();
OffsetPaginator op = new OffsetPaginator(urlBuilder, 25, 5, "limit",
"offset");
- List<HttpUrl> urls = op.buildPaginatedURLs();
- assertEquals(5, urls.size());
-
- List<HttpUrl> expected = new ArrayList<>();
- expected.add( HttpUrl.parse("https://myapi.com/?offset=0&limit=5"));
- expected.add( HttpUrl.parse("https://myapi.com/?offset=5&limit=5"));
- expected.add( HttpUrl.parse("https://myapi.com/?offset=10&limit=5"));
- expected.add( HttpUrl.parse("https://myapi.com/?offset=15&limit=5"));
- expected.add( HttpUrl.parse("https://myapi.com/?offset=20&limit=5"));
-
- assertEquals(expected, urls);
- }
-
- @Test
- public void TestOffsetPaginatorIterator() {
- String baseUrl = "https://myapi.com";
- HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder();
-
- OffsetPaginator op = new OffsetPaginator(urlBuilder, 25, 5, "limit",
"offset");
- assertEquals(5, op.count());
- String next = op.next();
- assertEquals(next, "https://myapi.com/?offset=0&limit=5");
- next = op.next();
- assertEquals(next, "https://myapi.com/?offset=5&limit=5");
- next = op.next();
- assertEquals(next, "https://myapi.com/?offset=10&limit=5");
- next = op.next();
- assertEquals(next, "https://myapi.com/?offset=15&limit=5");
- next = op.next();
- assertEquals(next, "https://myapi.com/?offset=20&limit=5");
- next = op.next();
- assertNull(next);
+ for (int i = 0; i < 25; i += 5) {
+ assertEquals(String.format("%s/?offset=%d&limit=5", baseUrl, i),
op.next());
+ }
+ assertFalse(op.hasNext());
}
@Test
@@ -82,11 +51,13 @@ public class TestPaginator {
HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder();
PagePaginator pp = new PagePaginator(urlBuilder, 10, 2, "page",
"per_page");
- List<HttpUrl> urls = pp.getPaginatedUrls();
- assertEquals(5, urls.size());
+ for (int i = 1; i <= 5; i++) {
+ assertEquals(String.format("%s?page=%d&per_page=%d", baseUrl, i, 2),
pp.next());
+ }
+ assertFalse(pp.hasNext());
PagePaginator pp2 = new PagePaginator(urlBuilder, 10, 100, "page",
"per_page");
- List<HttpUrl> urls2 = pp2.getPaginatedUrls();
- assertEquals(1, urls2.size());
+ assertEquals(String.format("%s?page=%d&per_page=%d", baseUrl, 1, 100),
pp2.next());
+ assertFalse(pp.hasNext());
}
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
index e4c5a0f..4ca6dab 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/loader/JsonLoader.java
@@ -48,9 +48,9 @@ public interface JsonLoader {
String JSON_LITERAL_MODE = "json";
/**
- * Read one record of data.
+ * Read one batch of row data.
*
- * @return {@code true} if a record was loaded, {@code false} if EOF.
+ * @return {@code true} if at least one record was loaded, {@code false} if
EOF.
* @throws {org.apache.drill.common.exceptions.UserException
* for most errors
* @throws RuntimeException for unexpected errors, most often due