This is an automated email from the ASF dual-hosted git repository. cgivre pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push: new 097da744ff [DRILL-8457] Allow configuring csv parser in http storage plugin configuration (#2840) 097da744ff is described below commit 097da744ff416bda7bd6e83df3d7b3f31369c42e Author: Zbigniew Tomanek <136699936+ztomanek...@users.noreply.github.com> AuthorDate: Tue Oct 31 16:30:07 2023 +0100 [DRILL-8457] Allow configuring csv parser in http storage plugin configuration (#2840) --- contrib/storage-http/CSV_Options.md | 139 +++++++++ contrib/storage-http/README.md | 3 + .../drill/exec/store/http/HttpApiConfig.java | 166 ++++++----- .../drill/exec/store/http/HttpCSVBatchReader.java | 58 +++- .../drill/exec/store/http/HttpCSVOptions.java | 313 +++++++++++++++++++++ .../drill/exec/store/http/TestHttpApiConfig.java | 234 +++++++++++++++ .../drill/exec/store/http/TestHttpCSVOptions.java | 150 ++++++++++ .../drill/exec/store/http/TestHttpPlugin.java | 51 ++++ .../src/test/resources/data/csvOptions.json | 1 + .../test/resources/data/exampleHttpApiConfig.json | 1 + .../src/test/resources/data/response.tsv | 3 + 11 files changed, 1039 insertions(+), 80 deletions(-) diff --git a/contrib/storage-http/CSV_Options.md b/contrib/storage-http/CSV_Options.md new file mode 100644 index 0000000000..43ebb79e03 --- /dev/null +++ b/contrib/storage-http/CSV_Options.md @@ -0,0 +1,139 @@ +# CSV options and configuration + +CSV parser of HTTP Storage plugin can be configured using `csvOptions`. + +```json +{ + "csvOptions": { + "delimiter": ",", + "quote": "\"", + "quoteEscape": "\"", + "lineSeparator": "\n", + "headerExtractionEnabled": null, + "numberOfRowsToSkip": 0, + "numberOfRecordsToRead": -1, + "lineSeparatorDetectionEnabled": true, + "maxColumns": 512, + "maxCharsPerColumn": 4096, + "skipEmptyLines": true, + "ignoreLeadingWhitespaces": true, + "ignoreTrailingWhitespaces": true, + "nullValue": null + } +} +``` + +## Configuration options + +- **delimiter**: The character used to separate individual values in a CSV record. + Default: `,` + +- **quote**: The character used to enclose fields that may contain special characters (like the + delimiter or line separator). + Default: `"` + +- **quoteEscape**: The character used to escape a quote inside a field enclosed by quotes. + Default: `"` + +- **lineSeparator**: The string that represents a line break in the CSV file. + Default: `\n` + +- **headerExtractionEnabled**: Determines if the first row of the CSV contains the headers (field + names). If set to `true`, the parser will use the first row as headers. + Default: `null` + +- **numberOfRowsToSkip**: Number of rows to skip before starting to read records. Useful for + skipping initial lines that are not records or headers. + Default: `0` + +- **numberOfRecordsToRead**: Specifies the maximum number of records to read from the input. A + negative value (e.g., `-1`) means there's no limit. + Default: `-1` + +- **lineSeparatorDetectionEnabled**: When set to `true`, the parser will automatically detect and + use the line separator present in the input. This is useful when you don't know the line separator + in advance. + Default: `true` + +- **maxColumns**: The maximum number of columns a record can have. Any record with more columns than + this will cause an exception. + Default: `512` + +- **maxCharsPerColumn**: The maximum number of characters a single field can have. Any field with + more characters than this will cause an exception. + Default: `4096` + +- **skipEmptyLines**: When set to `true`, the parser will skip any lines that are empty or only + contain whitespace. + Default: `true` + +- **ignoreLeadingWhitespaces**: When set to `true`, the parser will ignore any whitespaces at the + start of a field. + Default: `true` + +- **ignoreTrailingWhitespaces**: When set to `true`, the parser will ignore any whitespaces at the + end of a field. + Default: `true` + +- **nullValue**: Specifies a string that should be interpreted as a `null` value when reading. If a + field matches this string, it will be returned as `null`. + Default: `null` + +## Example + +### Parse tsv + +To parse `.tsv` files you can use a following `csvOptions` config: + +```json +{ + "csvOptions": { + "delimiter": "\t" + } +} +``` + +Then we can create a following connector plugin which queries a `.tsv` file from GitHub, let's call +it `github`: + +```json +{ + "type": "http", + "connections": { + "test-data": { + "url": "https://raw.githubusercontent.com/semantic-web-company/wic-tsv/master/data/de/Test/test_examples.txt", + "requireTail": false, + "method": "GET", + "authType": "none", + "inputType": "csv", + "xmlDataLevel": 1, + "postParameterLocation": "QUERY_STRING", + "csvOptions": { + "delimiter": "\t", + "quote": "\"", + "quoteEscape": "\"", + "lineSeparator": "\n", + "numberOfRecordsToRead": -1, + "lineSeparatorDetectionEnabled": true, + "maxColumns": 512, + "maxCharsPerColumn": 4096, + "skipEmptyLines": true, + "ignoreLeadingWhitespaces": true, + "ignoreTrailingWhitespaces": true + }, + "verifySSLCert": true + } + }, + "timeout": 5, + "retryDelay": 1000, + "proxyType": "direct", + "authMode": "SHARED_USER", + "enabled": true +} +``` + +And we can query it using a following query: + +```sql +SELECT * from github.`test-data` +``` diff --git a/contrib/storage-http/README.md b/contrib/storage-http/README.md index a19017e9c5..05f41029d6 100644 --- a/contrib/storage-http/README.md +++ b/contrib/storage-http/README.md @@ -294,6 +294,9 @@ The REST plugin accepts three different types of input: `json`, `csv` and `xml`. #### JSON Configuration [Read the documentation for configuring json options, including schema provisioning.](JSON_Options.md) +#### CSV Configuration +[Read the documentation for configuring csv options.](CSV_Options.md) + #### XML Configuration [Read the documentation for configuring XML options, including schema provisioning.](XML_Options.md) diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java index 43bc2308bb..6511320e02 100644 --- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java +++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import com.google.common.collect.ImmutableList; import okhttp3.HttpUrl; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -31,7 +32,6 @@ import org.apache.drill.common.logical.security.CredentialsProvider; import org.apache.drill.exec.store.security.CredentialProviderUtils; import org.apache.drill.exec.store.security.UsernamePasswordCredentials; import org.apache.drill.exec.store.security.UsernamePasswordWithProxyCredentials; -import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +40,7 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; + @JsonInclude(JsonInclude.Include.NON_DEFAULT) @JsonDeserialize(builder = HttpApiConfig.HttpApiConfigBuilder.class) public class HttpApiConfig { @@ -116,6 +117,9 @@ public class HttpApiConfig { @JsonProperty private final HttpXmlOptions xmlOptions; + @JsonProperty + private final HttpCSVOptions csvOptions; + @JsonInclude @JsonProperty private final boolean verifySSLCert; @@ -185,10 +189,15 @@ public class HttpApiConfig { public HttpJsonOptions jsonOptions() { return this.jsonOptions; } + public HttpXmlOptions xmlOptions() { return this.xmlOptions; } + public HttpCSVOptions csvOptions() { + return this.csvOptions; + } + public boolean verifySSLCert() { return this.verifySSLCert; } @@ -211,56 +220,59 @@ public class HttpApiConfig { } HttpApiConfig that = (HttpApiConfig) o; return requireTail == that.requireTail - && errorOn400 == that.errorOn400 - && verifySSLCert == that.verifySSLCert - && directCredentials == that.directCredentials - && caseSensitiveFilters == that.caseSensitiveFilters - && Objects.equals(url, that.url) - && Objects.equals(method, that.method) - && Objects.equals(postBody, that.postBody) - && Objects.equals(headers, that.headers) - && Objects.equals(params, that.params) - && Objects.equals(postParameterLocation, that.postParameterLocation) - && Objects.equals(dataPath, that.dataPath) - && Objects.equals(authType, that.authType) - && Objects.equals(inputType, that.inputType) - && Objects.equals(limitQueryParam, that.limitQueryParam) - && Objects.equals(jsonOptions, that.jsonOptions) - && Objects.equals(xmlOptions, that.xmlOptions) - && Objects.equals(credentialsProvider, that.credentialsProvider) - && Objects.equals(paginator, that.paginator); + && errorOn400 == that.errorOn400 + && verifySSLCert == that.verifySSLCert + && directCredentials == that.directCredentials + && caseSensitiveFilters == that.caseSensitiveFilters + && Objects.equals(url, that.url) + && Objects.equals(method, that.method) + && Objects.equals(postBody, that.postBody) + && Objects.equals(headers, that.headers) + && Objects.equals(params, that.params) + && Objects.equals(postParameterLocation, that.postParameterLocation) + && Objects.equals(dataPath, that.dataPath) + && Objects.equals(authType, that.authType) + && Objects.equals(inputType, that.inputType) + && Objects.equals(limitQueryParam, that.limitQueryParam) + && Objects.equals(jsonOptions, that.jsonOptions) + && Objects.equals(xmlOptions, that.xmlOptions) + && Objects.equals(credentialsProvider, that.credentialsProvider) + && Objects.equals(paginator, that.paginator) + && Objects.equals(csvOptions, that.csvOptions); } @Override public int hashCode() { return Objects.hash(url, requireTail, method, postBody, headers, params, dataPath, - authType, inputType, limitQueryParam, errorOn400, jsonOptions, xmlOptions, verifySSLCert, - credentialsProvider, paginator, directCredentials, postParameterLocation, caseSensitiveFilters); + authType, inputType, limitQueryParam, errorOn400, jsonOptions, xmlOptions, verifySSLCert, + credentialsProvider, paginator, directCredentials, postParameterLocation, + caseSensitiveFilters, csvOptions); } @Override public String toString() { return new PlanStringBuilder(this) - .field("url", url) - .field("requireTail", requireTail) - .field("method", method) - .field("postBody", postBody) - .field("postParameterLocation", postParameterLocation) - .field("headers", headers) - .field("params", params) - .field("dataPath", dataPath) - .field("caseSensitiveFilters", caseSensitiveFilters) - .field("authType", authType) - .field("inputType", inputType) - .field("limitQueryParam", limitQueryParam) - .field("errorOn400", errorOn400) - .field("jsonOptions", jsonOptions) - .field("xmlOptions", xmlOptions) - .field("verifySSLCert", verifySSLCert) - .field("credentialsProvider", credentialsProvider) - .field("paginator", paginator) - .field("directCredentials", directCredentials) - .toString(); + .field("url", url) + .field("requireTail", requireTail) + .field("method", method) + .field("postBody", postBody) + .field("postParameterLocation", postParameterLocation) + .field("headers", headers) + .field("params", params) + .field("dataPath", dataPath) + .field("caseSensitiveFilters", caseSensitiveFilters) + .field("authType", authType) + .field("inputType", inputType) + .field("limitQueryParam", limitQueryParam) + .field("errorOn400", errorOn400) + .field("jsonOptions", jsonOptions) + .field("xmlOptions", xmlOptions) + .field("verifySSLCert", verifySSLCert) + .field("credentialsProvider", credentialsProvider) + .field("paginator", paginator) + .field("directCredentials", directCredentials) + .field("csvOptions", csvOptions) + .toString(); } /** @@ -307,39 +319,50 @@ public class HttpApiConfig { this.url = builder.url; this.jsonOptions = builder.jsonOptions; this.xmlOptions = builder.xmlOptions; + this.csvOptions = builder.csvOptions; + + final HttpMethod httpMethod; + try { + httpMethod = HttpMethod.valueOf(this.method); + } catch (IllegalArgumentException e) { + throw UserException + .validationError() + .message("Invalid HTTP method: %s. Drill supports 'GET' and , 'POST'.", method) + .build(logger); + } - HttpMethod httpMethod = HttpMethod.valueOf(this.method); - // Get the request method. Only accept GET and POST requests. Anything else will default to GET. + // Get the request method. Only accept GET and POST requests. Anything else will default to + // GET. switch (httpMethod) { - case GET: - case POST: - break; - default: - throw UserException + case GET: + case POST: + break; + default: + throw UserException .validationError() .message("Invalid HTTP method: %s. Drill supports 'GET' and , 'POST'.", method) .build(logger); } if (StringUtils.isEmpty(url)) { throw UserException - .validationError() - .message("URL is required for the HTTP storage plugin.") - .build(logger); + .validationError() + .message("URL is required for the HTTP storage plugin.") + .build(logger); } // Default to query string to avoid breaking changes this.postParameterLocation = StringUtils.isEmpty(builder.postParameterLocation) ? - PostLocation.QUERY_STRING.toString() : builder.postParameterLocation.trim().toUpperCase(); + PostLocation.QUERY_STRING.toString() : builder.postParameterLocation.trim().toUpperCase(); - // Get the authentication method. Future functionality will include OAUTH2 authentication but for now + // Get the authentication method. Future functionality will include OAUTH2 authentication but + // for now // Accept either basic or none. The default is none. this.authType = StringUtils.defaultIfEmpty(builder.authType, "none"); this.postBody = builder.postBody; - this.params = CollectionUtils.isEmpty(builder.params) ? null : - ImmutableList.copyOf(builder.params); + ImmutableList.copyOf(builder.params); this.dataPath = StringUtils.defaultIfEmpty(builder.dataPath, null); // Default to true for backward compatibility with first PR. @@ -353,7 +376,8 @@ public class HttpApiConfig { this.xmlDataLevel = Math.max(1, builder.xmlDataLevel); this.errorOn400 = builder.errorOn400; this.caseSensitiveFilters = builder.caseSensitiveFilters; - this.credentialsProvider = CredentialProviderUtils.getCredentialsProvider(builder.userName, builder.password, builder.credentialsProvider); + this.credentialsProvider = CredentialProviderUtils.getCredentialsProvider(builder.userName, + builder.password, builder.credentialsProvider); this.directCredentials = builder.credentialsProvider == null; this.limitQueryParam = builder.limitQueryParam; @@ -366,8 +390,8 @@ public class HttpApiConfig { return null; } return getUsernamePasswordCredentials() - .map(UsernamePasswordCredentials::getUsername) - .orElse(null); + .map(UsernamePasswordCredentials::getUsername) + .orElse(null); } @JsonProperty @@ -376,8 +400,8 @@ public class HttpApiConfig { return null; } return getUsernamePasswordCredentials() - .map(UsernamePasswordCredentials::getPassword) - .orElse(null); + .map(UsernamePasswordCredentials::getPassword) + .orElse(null); } @JsonIgnore @@ -398,16 +422,16 @@ public class HttpApiConfig { @JsonIgnore public Optional<UsernamePasswordWithProxyCredentials> getUsernamePasswordCredentials() { return new UsernamePasswordWithProxyCredentials.Builder() - .setCredentialsProvider(credentialsProvider) - .build(); + .setCredentialsProvider(credentialsProvider) + .build(); } @JsonIgnore public Optional<UsernamePasswordWithProxyCredentials> getUsernamePasswordCredentials(String username) { return new UsernamePasswordWithProxyCredentials.Builder() - .setCredentialsProvider(credentialsProvider) - .setQueryUser(username) - .build(); + .setCredentialsProvider(credentialsProvider) + .setQueryUser(username) + .build(); } @JsonProperty @@ -455,6 +479,8 @@ public class HttpApiConfig { private HttpJsonOptions jsonOptions; private HttpXmlOptions xmlOptions; + private HttpCSVOptions csvOptions; + private CredentialsProvider credentialsProvider; private HttpPaginatorConfig paginator; @@ -481,6 +507,10 @@ public class HttpApiConfig { return this.verifySSLCert; } + public HttpCSVOptions csvOptions() { + return this.csvOptions; + } + public String inputType() { return this.inputType; } @@ -562,6 +592,7 @@ public class HttpApiConfig { /** * Do not use. Use xmlOptions instead to set XML data level. + * * @param xmlDataLevel * @return */ @@ -586,6 +617,11 @@ public class HttpApiConfig { return this; } + public HttpApiConfigBuilder csvOptions(HttpCSVOptions options) { + this.csvOptions = options; + return this; + } + public HttpApiConfigBuilder credentialsProvider(CredentialsProvider credentialsProvider) { this.credentialsProvider = credentialsProvider; return this; diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java index f12cbfaf5e..1428d96190 100644 --- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java +++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVBatchReader.java @@ -18,6 +18,7 @@ package org.apache.drill.exec.store.http; +import com.univocity.parsers.csv.CsvFormat; import com.univocity.parsers.csv.CsvParser; import com.univocity.parsers.csv.CsvParserSettings; import okhttp3.HttpUrl; @@ -44,10 +45,10 @@ import java.io.File; import java.io.InputStream; import java.util.ArrayList; import java.util.List; +import java.util.Objects; public class HttpCSVBatchReader extends HttpBatchReader { private final HttpSubScan subScan; - private final CsvParserSettings csvSettings; private final int maxRecords; private CsvParser csvReader; private List<StringColumnWriter> columnWriters; @@ -63,18 +64,43 @@ public class HttpCSVBatchReader extends HttpBatchReader { super(subScan); this.subScan = subScan; this.maxRecords = subScan.maxRecords(); - - this.csvSettings = new CsvParserSettings(); - csvSettings.setLineSeparatorDetectionEnabled(true); } public HttpCSVBatchReader(HttpSubScan subScan, Paginator paginator) { super(subScan, paginator); this.subScan = subScan; this.maxRecords = subScan.maxRecords(); + } + + private CsvParserSettings buildCsvSettings() { + CsvParserSettings settings = new CsvParserSettings(); + CsvFormat format = settings.getFormat(); + HttpCSVOptions csvOptions = subScan.tableSpec().connectionConfig().csvOptions(); + + if (Objects.isNull(csvOptions)) { + settings.setLineSeparatorDetectionEnabled(true); + return settings; + } - this.csvSettings = new CsvParserSettings(); - csvSettings.setLineSeparatorDetectionEnabled(true); + format.setDelimiter(csvOptions.getDelimiter()); + format.setQuote(csvOptions.getQuote()); + format.setQuoteEscape(csvOptions.getQuoteEscape()); + format.setLineSeparator(csvOptions.getLineSeparator()); + + settings.setLineSeparatorDetectionEnabled(csvOptions.isLineSeparatorDetectionEnabled()); + if (!Objects.isNull(csvOptions.getHeaderExtractionEnabled())) { + settings.setHeaderExtractionEnabled(csvOptions.getHeaderExtractionEnabled()); + } + settings.setNullValue(csvOptions.getNullValue()); + settings.setNumberOfRowsToSkip(csvOptions.getNumberOfRowsToSkip()); + settings.setNumberOfRecordsToRead(csvOptions.getNumberOfRecordsToRead()); + settings.setMaxColumns(csvOptions.getMaxColumns()); + settings.setMaxCharsPerColumn(csvOptions.getMaxCharsPerColumn()); + settings.setSkipEmptyLines(csvOptions.isSkipEmptyLines()); + settings.setIgnoreLeadingWhitespaces(csvOptions.isIgnoreLeadingWhitespaces()); + settings.setIgnoreTrailingWhitespaces(csvOptions.isIgnoreTrailingWhitespaces()); + + return settings; } @Override @@ -96,17 +122,18 @@ public class HttpCSVBatchReader extends HttpBatchReader { // Http client setup SimpleHttp http = SimpleHttp.builder() - .scanDefn(subScan) - .url(url) - .tempDir(new File(tempDirPath)) - .paginator(paginator) - .proxyConfig(proxySettings(negotiator.drillConfig(), url)) - .errorContext(errorContext) - .build(); + .scanDefn(subScan) + .url(url) + .tempDir(new File(tempDirPath)) + .paginator(paginator) + .proxyConfig(proxySettings(negotiator.drillConfig(), url)) + .errorContext(errorContext) + .build(); // CSV loader setup inStream = http.getInputStream(); + CsvParserSettings csvSettings = buildCsvSettings(); this.csvReader = new CsvParser(csvSettings); csvReader.beginParsing(inStream); @@ -181,7 +208,7 @@ public class HttpCSVBatchReader extends HttpBatchReader { if (nextRow == null) { if (paginator != null && - maxRecords < 0 && (resultLoader.totalRowCount()) < paginator.getPageSize()) { + maxRecords < 0 && (resultLoader.totalRowCount()) < paginator.getPageSize()) { paginator.notifyPartialPage(); } return false; @@ -211,7 +238,8 @@ public class HttpCSVBatchReader extends HttpBatchReader { this.columnIndex = columnIndex; } - public void load(String[] record) {} + public void load(String[] record) { + } } public static class StringColumnWriter extends ColumnWriter { diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVOptions.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVOptions.java new file mode 100644 index 0000000000..c8d5da1271 --- /dev/null +++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVOptions.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.http; + + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import org.apache.drill.common.PlanStringBuilder; + +import java.util.Objects; + +@JsonInclude(JsonInclude.Include.NON_DEFAULT) +@JsonDeserialize(builder = HttpCSVOptions.HttpCSVOptionsBuilder.class) +public class HttpCSVOptions { + + + @JsonProperty + private final String delimiter; + + @JsonProperty + private final char quote; + + @JsonProperty + private final char quoteEscape; + + @JsonProperty + private final String lineSeparator; + + @JsonProperty + private final Boolean headerExtractionEnabled; + + @JsonProperty + private final long numberOfRowsToSkip; + + @JsonProperty + private final long numberOfRecordsToRead; + + @JsonProperty + private final boolean lineSeparatorDetectionEnabled; + + @JsonProperty + private final int maxColumns; + + @JsonProperty + private final int maxCharsPerColumn; + + @JsonProperty + private final boolean skipEmptyLines; + + @JsonProperty + private final boolean ignoreLeadingWhitespaces; + + @JsonProperty + private final boolean ignoreTrailingWhitespaces; + + @JsonProperty + private final String nullValue; + + HttpCSVOptions(HttpCSVOptionsBuilder builder) { + this.delimiter = builder.delimiter; + this.quote = builder.quote; + this.quoteEscape = builder.quoteEscape; + this.lineSeparator = builder.lineSeparator; + this.headerExtractionEnabled = builder.headerExtractionEnabled; + this.numberOfRowsToSkip = builder.numberOfRowsToSkip; + this.numberOfRecordsToRead = builder.numberOfRecordsToRead; + this.lineSeparatorDetectionEnabled = builder.lineSeparatorDetectionEnabled; + this.maxColumns = builder.maxColumns; + this.maxCharsPerColumn = builder.maxCharsPerColumn; + this.skipEmptyLines = builder.skipEmptyLines; + this.ignoreLeadingWhitespaces = builder.ignoreLeadingWhitespaces; + this.ignoreTrailingWhitespaces = builder.ignoreTrailingWhitespaces; + this.nullValue = builder.nullValue; + } + + public static HttpCSVOptionsBuilder builder() { + return new HttpCSVOptionsBuilder(); + } + + public String getDelimiter() { + return delimiter; + } + + public char getQuote() { + return quote; + } + + public char getQuoteEscape() { + return quoteEscape; + } + + public String getLineSeparator() { + return lineSeparator; + } + + public Boolean getHeaderExtractionEnabled() { + return headerExtractionEnabled; + } + + public long getNumberOfRowsToSkip() { + return numberOfRowsToSkip; + } + + public long getNumberOfRecordsToRead() { + return numberOfRecordsToRead; + } + + public boolean isLineSeparatorDetectionEnabled() { + return lineSeparatorDetectionEnabled; + } + + public int getMaxColumns() { + return maxColumns; + } + + public int getMaxCharsPerColumn() { + return maxCharsPerColumn; + } + + public boolean isSkipEmptyLines() { + return skipEmptyLines; + } + + public boolean isIgnoreLeadingWhitespaces() { + return ignoreLeadingWhitespaces; + } + + public boolean isIgnoreTrailingWhitespaces() { + return ignoreTrailingWhitespaces; + } + + public String getNullValue() { + return nullValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + HttpCSVOptions that = (HttpCSVOptions) o; + return quote == that.quote + && quoteEscape == that.quoteEscape + && numberOfRowsToSkip == that.numberOfRowsToSkip + && numberOfRecordsToRead == that.numberOfRecordsToRead + && lineSeparatorDetectionEnabled == that.lineSeparatorDetectionEnabled + && maxColumns == that.maxColumns && maxCharsPerColumn == that.maxCharsPerColumn + && skipEmptyLines == that.skipEmptyLines + && ignoreLeadingWhitespaces == that.ignoreLeadingWhitespaces + && ignoreTrailingWhitespaces == that.ignoreTrailingWhitespaces + && delimiter.equals(that.delimiter) + && lineSeparator.equals(that.lineSeparator) + && Objects.equals(headerExtractionEnabled, that.headerExtractionEnabled) + && nullValue.equals(that.nullValue); + } + + @Override + public int hashCode() { + return Objects.hash(delimiter, quote, quoteEscape, lineSeparator, headerExtractionEnabled, + numberOfRowsToSkip, numberOfRecordsToRead, lineSeparatorDetectionEnabled, maxColumns, + maxCharsPerColumn, skipEmptyLines, ignoreLeadingWhitespaces, ignoreTrailingWhitespaces, + nullValue); + } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("delimiter", delimiter) + .field("quote", quote) + .field("quoteEscape", quoteEscape) + .field("lineSeparator", lineSeparator) + .field("headerExtractionEnabled", headerExtractionEnabled) + .field("numberOfRowsToSkip", numberOfRowsToSkip) + .field("numberOfRecordsToRead", numberOfRecordsToRead) + .field("lineSeparatorDetectionEnabled", lineSeparatorDetectionEnabled) + .field("maxColumns", maxColumns) + .field("maxCharsPerColumn", maxCharsPerColumn) + .field("skipEmptyLines", skipEmptyLines) + .field("ignoreLeadingWhitespaces", ignoreLeadingWhitespaces) + .field("ignoreTrailingWhitespaces", ignoreTrailingWhitespaces) + .field("nullValue", nullValue) + .toString(); + } + + + @JsonPOJOBuilder(withPrefix = "") + public static class HttpCSVOptionsBuilder { + private String delimiter = ","; + private char quote = '"'; + + private char quoteEscape = '"'; + + private String lineSeparator = "\n"; + + private Boolean headerExtractionEnabled = null; + + private long numberOfRowsToSkip = 0; + + private long numberOfRecordsToRead = -1; + + private boolean lineSeparatorDetectionEnabled = true; + + private int maxColumns = 512; + + private int maxCharsPerColumn = 4096; + + private boolean skipEmptyLines = true; + + private boolean ignoreLeadingWhitespaces = true; + + private boolean ignoreTrailingWhitespaces = true; + + private String nullValue = null; + + + public HttpCSVOptionsBuilder delimiter(String delimiter) { + this.delimiter = delimiter; + return this; + } + + public HttpCSVOptionsBuilder quote(char quote) { + this.quote = quote; + return this; + } + + public HttpCSVOptionsBuilder quoteEscape(char quoteEscape) { + this.quoteEscape = quoteEscape; + return this; + } + + public HttpCSVOptionsBuilder lineSeparator(String lineSeparator) { + this.lineSeparator = lineSeparator; + return this; + } + + public HttpCSVOptionsBuilder headerExtractionEnabled(Boolean headerExtractionEnabled) { + this.headerExtractionEnabled = headerExtractionEnabled; + return this; + } + + public HttpCSVOptionsBuilder numberOfRowsToSkip(long numberOfRowsToSkip) { + this.numberOfRowsToSkip = numberOfRowsToSkip; + return this; + } + + public HttpCSVOptionsBuilder numberOfRecordsToRead(long numberOfRecordsToRead) { + this.numberOfRecordsToRead = numberOfRecordsToRead; + return this; + } + + public HttpCSVOptionsBuilder lineSeparatorDetectionEnabled(boolean lineSeparatorDetectionEnabled) { + this.lineSeparatorDetectionEnabled = lineSeparatorDetectionEnabled; + return this; + } + + public HttpCSVOptionsBuilder maxColumns(int maxColumns) { + this.maxColumns = maxColumns; + return this; + } + + public HttpCSVOptionsBuilder maxCharsPerColumn(int maxCharsPerColumn) { + this.maxCharsPerColumn = maxCharsPerColumn; + return this; + } + + public HttpCSVOptionsBuilder skipEmptyLines(boolean skipEmptyLines) { + this.skipEmptyLines = skipEmptyLines; + return this; + } + + public HttpCSVOptionsBuilder ignoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { + this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; + return this; + } + + public HttpCSVOptionsBuilder ignoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) { + this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces; + return this; + } + + public HttpCSVOptionsBuilder nullValue(String nullValue) { + this.nullValue = nullValue; + return this; + } + + + public HttpCSVOptions build() { + + return new HttpCSVOptions(this); + } + } +} diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpApiConfig.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpApiConfig.java new file mode 100644 index 0000000000..35e5f3bfa0 --- /dev/null +++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpApiConfig.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.http; + + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Charsets; +import com.google.common.io.Files; +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.common.util.DrillFileUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class TestHttpApiConfig { + + private final ObjectMapper objectMapper = new ObjectMapper(); + + private static String EXAMPLE_HTTP_API_CONFIG_JSON; + + private static Map<String, String> EXAMPLE_HEADERS; + + @BeforeAll + public static void setup() throws Exception { + EXAMPLE_HTTP_API_CONFIG_JSON = Files.asCharSource( + DrillFileUtils.getResourceAsFile("/data/exampleHttpApiConfig.json"), Charsets.UTF_8 + ).read().trim(); + + EXAMPLE_HEADERS = new HashMap<>(); + EXAMPLE_HEADERS.put("Authorization", "Bearer token"); + } + + @Test + public void testBuilderDefaults() { + HttpApiConfig config = HttpApiConfig.builder().url("http://example.com").build(); + + assertEquals("http://example.com", config.url()); + assertEquals("GET", config.method()); + assertTrue(config.verifySSLCert()); + assertTrue(config.requireTail()); + assertEquals(HttpApiConfig.DEFAULT_INPUT_FORMAT, config.inputType()); + assertEquals("QUERY_STRING", config.getPostParameterLocation()); + assertEquals("none", config.authType()); + + assertNull(config.postBody()); + assertNull(config.headers()); + assertNull(config.params()); + assertNull(config.dataPath()); + assertNull(config.jsonOptions()); + assertNull(config.xmlOptions()); + assertNull(config.csvOptions()); + assertNull(config.paginator()); + assertNull(config.userName()); + assertNull(config.password()); + assertNull(config.credentialsProvider()); + } + + @Test + public void testBuilder() { + Map<String, String> headers = new HashMap<>(); + headers.put("Authorization", "Bearer token"); + + HttpApiConfig.HttpApiConfigBuilder builder = HttpApiConfig.builder() + .url("http://example.com") + .method("GET") + .postBody("testBody") + .postParameterLocation(HttpApiConfig.POST_BODY_POST_LOCATION) + .headers(headers) + .params(Arrays.asList("param1", "param2")) + .dataPath("/data/path") + .authType("none") + .inputType("json") + .limitQueryParam("limit") + .errorOn400(true) + .jsonOptions(null) + .xmlOptions(null) + .csvOptions(null) + .paginator(null) + .requireTail(true) + .verifySSLCert(true) + .caseSensitiveFilters(true); + + HttpApiConfig config = builder.build(); + + assertEquals("http://example.com", config.url()); + assertEquals("GET", config.method()); + assertEquals("testBody", config.postBody()); + assertEquals("POST_BODY", config.getPostParameterLocation()); + assertEquals(headers, config.headers()); + assertEquals(Arrays.asList("param1", "param2"), config.params()); + assertEquals("/data/path", config.dataPath()); + assertEquals("none", config.authType()); + assertEquals("json", config.inputType()); + assertEquals("limit", config.limitQueryParam()); + assertTrue(config.errorOn400()); + assertNull(config.jsonOptions()); + assertNull(config.xmlOptions()); + assertNull(config.csvOptions()); + assertNull(config.paginator()); + assertTrue(config.verifySSLCert()); + assertTrue(config.requireTail()); + assertTrue(config.caseSensitiveFilters()); + } + + @Test + public void testUserExceptionOnNoURL() { + HttpApiConfig config = HttpApiConfig.builder().url("http://example.com").build(); + + assertEquals("http://example.com", config.url()); + assertEquals("GET", config.method()); + assertTrue(config.verifySSLCert()); + assertTrue(config.requireTail()); + assertEquals(HttpApiConfig.DEFAULT_INPUT_FORMAT, config.inputType()); + assertEquals("QUERY_STRING", config.getPostParameterLocation()); + assertEquals("none", config.authType()); + + assertNull(config.postBody()); + assertNull(config.headers()); + assertNull(config.params()); + assertNull(config.dataPath()); + assertNull(config.jsonOptions()); + assertNull(config.xmlOptions()); + assertNull(config.csvOptions()); + assertNull(config.paginator()); + assertNull(config.userName()); + assertNull(config.password()); + assertNull(config.credentialsProvider()); + } + + @Test + public void testInvalidHttpMethod() { + String invalidMethod = "INVALID"; + + assertThrows(UserException.class, () -> { + HttpApiConfig.builder() + .method(invalidMethod) + .build(); + }); + } + + @Test + public void testErrorOnEmptyURL() { + + assertThrows(UserException.class, () -> { + HttpApiConfig.builder() + .url(null) + .build(); + }); + + assertThrows(UserException.class, () -> { + HttpApiConfig.builder() + .url("") + .build(); + }); + } + + @Test + public void testJSONSerialization() throws JsonProcessingException { + HttpApiConfig.HttpApiConfigBuilder builder = HttpApiConfig.builder() + .url("http://example.com") + .method("GET") + .postBody("testBody") + .postParameterLocation(HttpApiConfig.POST_BODY_POST_LOCATION) + .headers(EXAMPLE_HEADERS) + .params(Arrays.asList("param1", "param2")) + .dataPath("/data/path") + .authType("none") + .inputType("json") + .limitQueryParam("limit") + .errorOn400(true) + .jsonOptions(null) + .xmlOptions(null) + .csvOptions(null) + .paginator(null) + .requireTail(true) + .verifySSLCert(true) + .caseSensitiveFilters(true); + + HttpApiConfig config = builder.build(); + String json = objectMapper.writeValueAsString(config); + + assertEquals(EXAMPLE_HTTP_API_CONFIG_JSON, json); + } + + @Test + public void testJSONDeserialization() throws JsonProcessingException { + HttpApiConfig config = objectMapper.readValue(EXAMPLE_HTTP_API_CONFIG_JSON, + HttpApiConfig.class); + + assertEquals("http://example.com", config.url()); + assertEquals("GET", config.method()); + assertEquals("testBody", config.postBody()); + assertEquals("POST_BODY", config.getPostParameterLocation()); + assertEquals(EXAMPLE_HEADERS, config.headers()); + assertEquals(Arrays.asList("param1", "param2"), config.params()); + assertEquals("/data/path", config.dataPath()); + assertEquals("none", config.authType()); + assertEquals("json", config.inputType()); + assertEquals("limit", config.limitQueryParam()); + assertTrue(config.errorOn400()); + assertNull(config.jsonOptions()); + assertNull(config.xmlOptions()); + assertNull(config.csvOptions()); + assertNull(config.paginator()); + assertTrue(config.verifySSLCert()); + assertTrue(config.requireTail()); + assertTrue(config.caseSensitiveFilters()); + } +} diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpCSVOptions.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpCSVOptions.java new file mode 100644 index 0000000000..383bb0dacd --- /dev/null +++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpCSVOptions.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.http; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Charsets; +import com.google.common.io.Files; +import org.apache.drill.common.util.DrillFileUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +public class TestHttpCSVOptions { + + private final ObjectMapper objectMapper = new ObjectMapper(); + + private static String CSV_OPTIONS_JSON; + + @BeforeAll + public static void setup() throws Exception { + CSV_OPTIONS_JSON = Files.asCharSource( + DrillFileUtils.getResourceAsFile("/data/csvOptions.json"), Charsets.UTF_8 + ).read().trim(); + } + + + @Test + void testBuilderDefaults() { + HttpCSVOptions options = HttpCSVOptions.builder().build(); + + assertEquals(",", options.getDelimiter()); + assertEquals('"', options.getQuote()); + assertEquals('"', options.getQuoteEscape()); + assertEquals("\n", options.getLineSeparator()); + assertNull(options.getHeaderExtractionEnabled()); + assertEquals(0, options.getNumberOfRowsToSkip()); + assertEquals(-1, options.getNumberOfRecordsToRead()); + assertTrue(options.isLineSeparatorDetectionEnabled()); + assertEquals(512, options.getMaxColumns()); + assertEquals(4096, options.getMaxCharsPerColumn()); + assertTrue(options.isSkipEmptyLines()); + assertTrue(options.isIgnoreLeadingWhitespaces()); + assertTrue(options.isIgnoreTrailingWhitespaces()); + assertNull(options.getNullValue()); + } + + @Test + void testBuilderOverride() { + HttpCSVOptions options = HttpCSVOptions.builder() + .delimiter(";") + .quote('\'') + .quoteEscape('\\') + .lineSeparator("\r\n") + .headerExtractionEnabled(false) + .numberOfRowsToSkip(5) + .numberOfRecordsToRead(10) + .lineSeparatorDetectionEnabled(false) + .maxColumns(1024) + .maxCharsPerColumn(8192) + .skipEmptyLines(false) + .ignoreLeadingWhitespaces(false) + .ignoreTrailingWhitespaces(false) + .nullValue("NULL") + .build(); + + assertEquals(";", options.getDelimiter()); + assertEquals('\'', options.getQuote()); + assertEquals('\\', options.getQuoteEscape()); + assertEquals("\r\n", options.getLineSeparator()); + assertFalse(options.getHeaderExtractionEnabled()); + assertEquals(5, options.getNumberOfRowsToSkip()); + assertEquals(10, options.getNumberOfRecordsToRead()); + assertFalse(options.isLineSeparatorDetectionEnabled()); + assertEquals(1024, options.getMaxColumns()); + assertEquals(8192, options.getMaxCharsPerColumn()); + assertFalse(options.isSkipEmptyLines()); + assertFalse(options.isIgnoreLeadingWhitespaces()); + assertFalse(options.isIgnoreTrailingWhitespaces()); + assertEquals("NULL", options.getNullValue()); + } + + @Test + void testJSONSerialization() throws Exception { + HttpCSVOptions options = HttpCSVOptions.builder() + .delimiter(";") + .quote('\'') + .quoteEscape('\\') + .lineSeparator("\r\n") + .headerExtractionEnabled(false) + .numberOfRowsToSkip(5) + .numberOfRecordsToRead(10) + .lineSeparatorDetectionEnabled(false) + .maxColumns(1024) + .maxCharsPerColumn(8192) + .skipEmptyLines(false) + .ignoreLeadingWhitespaces(false) + .ignoreTrailingWhitespaces(false) + .nullValue("NULL") + .build(); + + String json = objectMapper.writeValueAsString(options); + + assertNotNull(json); + assertEquals(CSV_OPTIONS_JSON, json); + } + + @Test + public void testJSONDeserialization() throws JsonProcessingException { + HttpCSVOptions options = objectMapper.readValue(CSV_OPTIONS_JSON, HttpCSVOptions.class); + + assertEquals(";", options.getDelimiter()); + assertEquals('\'', options.getQuote()); + assertEquals('\\', options.getQuoteEscape()); + assertEquals("\r\n", options.getLineSeparator()); + assertNull(options.getHeaderExtractionEnabled()); + assertEquals(5, options.getNumberOfRowsToSkip()); + assertEquals(10, options.getNumberOfRecordsToRead()); + assertTrue(options.isLineSeparatorDetectionEnabled()); + assertEquals(1024, options.getMaxColumns()); + assertEquals(8192, options.getMaxCharsPerColumn()); + assertTrue(options.isSkipEmptyLines()); + assertTrue(options.isIgnoreLeadingWhitespaces()); + assertTrue(options.isIgnoreTrailingWhitespaces()); + assertEquals("NULL", options.getNullValue()); + + } + +} diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java index 4ef5e45ee2..9c66855fb7 100644 --- a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java +++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java @@ -79,6 +79,8 @@ public class TestHttpPlugin extends ClusterTest { private static String TEST_XML_RESPONSE; private static String TEST_JSON_RESPONSE_WITH_DATATYPES; + private static String TEST_TSV_RESPONSE; + public static String makeUrl(String url) { return String.format(url, MOCK_SERVER_PORT); } @@ -92,6 +94,7 @@ public class TestHttpPlugin extends ClusterTest { TEST_CSV_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.csv"), Charsets.UTF_8).read(); TEST_XML_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.xml"), Charsets.UTF_8).read(); TEST_JSON_RESPONSE_WITH_DATATYPES = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response2.json"), Charsets.UTF_8).read(); + TEST_TSV_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.tsv"), Charsets.UTF_8).read(); dirTestWatcher.copyResourceToRoot(Paths.get("data/")); makeEnhancedLiveConfig(); @@ -611,6 +614,22 @@ public class TestHttpPlugin extends ClusterTest { .inputType("csv") .build(); + HttpCSVOptions tsvOptions = HttpCSVOptions.builder() + .delimiter("\t") + .quote('"') + .build(); + HttpApiConfig mockTsvConfig = HttpApiConfig.builder() + .url(makeUrl("http://localhost:%d/csv")) + .method("GET") + .headers(headers) + .authType("basic") + .userName("user") + .password("pass") + .dataPath("results") + .inputType("csv") + .csvOptions(tsvOptions) + .build(); + HttpApiConfig mockCsvConfigWithPaginator = HttpApiConfig.builder() .url(makeUrl("http://localhost:%d/csv")) .method("get") @@ -715,6 +734,7 @@ public class TestHttpPlugin extends ClusterTest { configs.put("mockJsonNullBodyPost", mockJsonNullBodyPost); configs.put("mockPostPushdown", mockPostPushdown); configs.put("mockPostPushdownWithStaticParams", mockPostPushdownWithStaticParams); + configs.put("mocktsv", mockTsvConfig); configs.put("mockcsv", mockCsvConfig); configs.put("mockxml", mockXmlConfig); configs.put("mockxml_with_schema", mockXmlConfigWithSchema); @@ -756,6 +776,7 @@ public class TestHttpPlugin extends ClusterTest { .addRow("local", "http") .addRow("local.mockcsv", "http") .addRow("local.mockpost", "http") + .addRow("local.mocktsv", "http") .addRow("local.mockxml", "http") .addRow("local.mockxml_with_schema", "http") .addRow("local.nullpost", "http") @@ -1676,6 +1697,36 @@ public class TestHttpPlugin extends ClusterTest { } } + @Test + public void testTsvResponse() throws Exception { + String sql = "SELECT * FROM local.mocktsv.`tsv?arg1=4`"; + try (MockWebServer server = startServer()) { + + server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_TSV_RESPONSE)); + + RowSet results = client.queryBuilder().sql(sql).rowSet(); + + TupleMetadata expectedSchema = new SchemaBuilder() + .add("col1", TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL) + .add("col2", TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL) + .add("col3", TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL) + .build(); + + RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema) + .addRow("1", "2", "3") + .addRow("4", "5", "6") + .build(); + + RowSetUtilities.verify(expected, results); + + // Verify correct username/password from endpoint configuration + RecordedRequest recordedRequest = server.takeRequest(); + assertNotNull(recordedRequest.getHeader("Authorization")); + assertEquals("Basic dXNlcjpwYXNz", recordedRequest.getHeader("Authorization")); + } + } + + @Test public void testCsvResponseWithEnhancedMode() throws Exception { String sql = "SELECT * FROM local2.mockcsv.`csv?arg1=4`"; diff --git a/contrib/storage-http/src/test/resources/data/csvOptions.json b/contrib/storage-http/src/test/resources/data/csvOptions.json new file mode 100644 index 0000000000..6fe17c1f22 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/csvOptions.json @@ -0,0 +1 @@ +{"delimiter":";","quote":"'","quoteEscape":"\\","lineSeparator":"\r\n","numberOfRowsToSkip":5,"numberOfRecordsToRead":10,"maxColumns":1024,"maxCharsPerColumn":8192,"nullValue":"NULL"} diff --git a/contrib/storage-http/src/test/resources/data/exampleHttpApiConfig.json b/contrib/storage-http/src/test/resources/data/exampleHttpApiConfig.json new file mode 100644 index 0000000000..a72c9cc937 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/exampleHttpApiConfig.json @@ -0,0 +1 @@ +{"url":"http://example.com","requireTail":true,"method":"GET","postBody":"testBody","headers":{"Authorization":"Bearer token"},"params":["param1","param2"],"dataPath":"/data/path","authType":"none","inputType":"json","xmlDataLevel":1,"limitQueryParam":"limit","postParameterLocation":"POST_BODY","errorOn400":true,"caseSensitiveFilters":true,"verifySSLCert":true} diff --git a/contrib/storage-http/src/test/resources/data/response.tsv b/contrib/storage-http/src/test/resources/data/response.tsv new file mode 100644 index 0000000000..bf29da1dd8 --- /dev/null +++ b/contrib/storage-http/src/test/resources/data/response.tsv @@ -0,0 +1,3 @@ +"col1" "col2" "col3" +"1" "2" "3" +"4" "5" "6"