This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new cc6715e37f DRILL-8327: GoogleSheets not Reporting Schemata to
Info_Schema (#2667)
cc6715e37f is described below
commit cc6715e37f766548a13c79e2f82ba81b43a1bc61
Author: Charles S. Givre <[email protected]>
AuthorDate: Thu Oct 6 07:47:57 2022 -0400
DRILL-8327: GoogleSheets not Reporting Schemata to Info_Schema (#2667)
---
contrib/storage-googlesheets/.gitignore | 1 +
contrib/storage-googlesheets/README.md | 17 +++++
contrib/storage-googlesheets/pom.xml | 13 +++-
.../exec/store/googlesheets/DrillDataStore.java | 10 ++-
.../googlesheets/GoogleSheetsBatchReader.java | 6 +-
.../googlesheets/GoogleSheetsStoragePlugin.java | 32 ++++++++
.../schema/GoogleSheetsDrillSchema.java | 76 ++++++++++---------
.../schema/GoogleSheetsRootSchema.java | 42 ++++-------
.../schema/GoogleSheetsSchemaFactory.java | 3 +-
.../googlesheets/utils/GoogleSheetsUtils.java | 86 +++++++++++++++++++++-
.../main/resources/bootstrap-storage-plugins.json | 4 +-
.../googlesheets/TestGoogleSheetsQueries.java | 29 ++++++++
12 files changed, 240 insertions(+), 79 deletions(-)
diff --git a/contrib/storage-googlesheets/.gitignore
b/contrib/storage-googlesheets/.gitignore
index 9f011a5b9e..af2dd8e869 100644
--- a/contrib/storage-googlesheets/.gitignore
+++ b/contrib/storage-googlesheets/.gitignore
@@ -1,2 +1,3 @@
# Directory to store oauth tokens for testing Googlesheets Storage plugin
src/test/resources/tokens/*
+/src/test/resources/logback-test.xml
diff --git a/contrib/storage-googlesheets/README.md
b/contrib/storage-googlesheets/README.md
index 14a367ebe6..4bba995140 100644
--- a/contrib/storage-googlesheets/README.md
+++ b/contrib/storage-googlesheets/README.md
@@ -92,6 +92,23 @@ FROM <plugin name>.<sheet ID>.<tab name>
```
Note that you must specify the tab name to successfully query Google Sheets.
+### Metadata
+You can obtain a list of available sheets by querying the `INFORMATION_SCHEMA`
as shown below. Assuming that you have a connection to Google Sheets called
`googlesheets`:
+
+```sql
+SELECT *
+FROM `INFORMATION_SCHEMA`.`SCHEMATA`
+WHERE SCHEMA_NAME LIKE 'googlesheets%'
+```
+
+Due to rate limits from Google, the tabs are not reported to the
`INFORMATION_SCHEMA`. However, it is possible to obtain a list of all
available tabs with the following query:
+
+```sql
+SELECT _sheets
+FROM googlesheets.`<token>`.`<sheet>`
+LIMIT 1
+```
+
### Using Aliases
Since the sheet IDs from Google are not human readable, one way to make your
life easier is to use Drill's aliasing features to provide a better name for
the actual sheet name.
diff --git a/contrib/storage-googlesheets/pom.xml
b/contrib/storage-googlesheets/pom.xml
index 1d7f8809ef..bc6ce511f1 100644
--- a/contrib/storage-googlesheets/pom.xml
+++ b/contrib/storage-googlesheets/pom.xml
@@ -43,7 +43,7 @@
<dependency>
<groupId>com.google.api-client</groupId>
<artifactId>google-api-client</artifactId>
- <version>1.35.2</version>
+ <version>2.0.0</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
@@ -54,7 +54,7 @@
<dependency>
<groupId>com.google.api-client</groupId>
<artifactId>google-api-client-jackson2</artifactId>
- <version>1.35.2</version>
+ <version>2.0.0</version>
</dependency>
<dependency>
<groupId>com.google.oauth-client</groupId>
@@ -64,12 +64,17 @@
<dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-oauth2-http</artifactId>
- <version>1.8.0</version>
+ <version>1.11.0</version>
</dependency>
<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-sheets</artifactId>
- <version>v4-rev20220606-1.32.1</version>
+ <version>v4-rev20220620-2.0.0</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.apis</groupId>
+ <artifactId>google-api-services-drive</artifactId>
+ <version>v3-rev20220815-2.0.0</version>
</dependency>
<dependency>
diff --git
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
index ec147de0b4..5ef932e494 100644
---
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
+++
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
@@ -30,7 +30,6 @@ import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
public class DrillDataStore<V extends Serializable> extends
AbstractMemoryDataStore<V> {
@@ -47,7 +46,6 @@ public class DrillDataStore<V extends Serializable> extends
AbstractMemoryDataSt
if (hasValidTokens(tokenTable)) {
keyValueMap.put(tokenTable.ACCESS_TOKEN_KEY,
tokenTable.getAccessToken().getBytes(StandardCharsets.UTF_8));
keyValueMap.put(tokenTable.REFRESH_TOKEN_KEY,
tokenTable.getRefreshToken().getBytes(StandardCharsets.UTF_8));
-
if (tokenTable.getExpiresIn() != null) {
keyValueMap.put(tokenTable.EXPIRES_IN_KEY,
tokenTable.getExpiresIn().getBytes(StandardCharsets.UTF_8));
}
@@ -60,8 +58,12 @@ public class DrillDataStore<V extends Serializable> extends
AbstractMemoryDataSt
@Override
public void save() {
logger.debug("Saving credentials to token table");
-
tokenTable.setAccessToken(Arrays.toString(keyValueMap.get(tokenTable.ACCESS_TOKEN_KEY)));
-
tokenTable.setRefreshToken(Arrays.toString(keyValueMap.get(tokenTable.REFRESH_TOKEN_KEY)));
+ String accessToken = new
String(keyValueMap.get(tokenTable.ACCESS_TOKEN_KEY), StandardCharsets.UTF_8);
+ String refreshToken = new
String(keyValueMap.get(tokenTable.REFRESH_TOKEN_KEY), StandardCharsets.UTF_8);
+ String expiresIn = new String(keyValueMap.get(tokenTable.EXPIRES_IN_KEY),
StandardCharsets.UTF_8);
+ tokenTable.setAccessToken(accessToken);
+ tokenTable.setRefreshToken(refreshToken);
+ tokenTable.setExpiresIn(expiresIn);
}
/**
diff --git
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
index 367c32eadf..48020f355d 100644
---
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
+++
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
@@ -90,8 +90,8 @@ public class GoogleSheetsBatchReader implements
ManagedReader<SchemaNegotiator>
this.sheetID = subScan.getScanSpec().getSheetID();
this.sheetNames = new ArrayList<>();
try {
- List<Sheet> sheetList = GoogleSheetsUtils.getSheetList(service, sheetID);
- this.sheet = sheetList.get(subScan.getScanSpec().getTabIndex());
+ List<Sheet> tabList = GoogleSheetsUtils.getTabList(service, sheetID);
+ this.sheet = tabList.get(subScan.getScanSpec().getTabIndex());
} catch (IOException e) {
throw UserException.connectionError(e)
.message("Could not find tab with index " +
subScan.getScanSpec().getTabIndex())
@@ -124,7 +124,7 @@ public class GoogleSheetsBatchReader implements
ManagedReader<SchemaNegotiator>
columnMap =
GoogleSheetsUtils.getColumnMap(GoogleSheetsUtils.getFirstRows(service,
pluginName, tableName), projectedColumns, config.allTextMode());
// Get sheet list for metadata.
- List<Sheet> sheetList = GoogleSheetsUtils.getSheetList(service,
pluginName);
+ List<Sheet> sheetList = GoogleSheetsUtils.getTabList(service,
pluginName);
for (Sheet sheet : sheetList) {
sheetNames.add(sheet.getProperties().getTitle());
}
diff --git
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsStoragePlugin.java
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsStoragePlugin.java
index 379f610b16..b8670d7b04 100644
---
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsStoragePlugin.java
+++
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsStoragePlugin.java
@@ -21,6 +21,7 @@ package org.apache.drill.exec.store.googlesheets;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.api.client.auth.oauth2.StoredCredential;
import com.google.api.client.util.store.DataStore;
+import com.google.api.services.drive.Drive;
import com.google.api.services.sheets.v4.Sheets;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.schema.SchemaPlus;
@@ -61,6 +62,7 @@ public class GoogleSheetsStoragePlugin extends
AbstractStoragePlugin {
private final OAuthTokenProvider tokenProvider;
private DataStore<StoredCredential> dataStore;
private Sheets service;
+ private Drive driveService;
private TokenRegistry tokenRegistry;
private String username;
@@ -216,4 +218,34 @@ public class GoogleSheetsStoragePlugin extends
AbstractStoragePlugin {
}
}
}
+
+ /**
+ * This method gets (and caches) the Google Drive Service needed for mapping
Google Sheet names
+ * to file tokens.
+ * @param queryUser A {@link String} of the current query user.
+ * @return A validated and authenticated {@link Drive} instance.
+ */
+ public Drive getDriveService(String queryUser) {
+ if (driveService != null && dataStore != null) {
+ return driveService;
+ } else {
+ // Check if datastore is null and initialize if so.
+ if (dataStore == null) {
+ this.dataStore = getDataStore(queryUser);
+ }
+
+ try {
+ if (config.getAuthMode() == AuthMode.USER_TRANSLATION) {
+ driveService = GoogleSheetsUtils.getDriveService(config, dataStore,
queryUser);
+ } else {
+ driveService = GoogleSheetsUtils.getDriveService(config, dataStore,
SHARED_USERNAME);
+ }
+ return driveService;
+ } catch (IOException | GeneralSecurityException e) {
+ throw UserException.connectionError(e)
+ .message("Error connecting to Google Drive Service: " +
e.getMessage())
+ .build(logger);
+ }
+ }
+ }
}
diff --git
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
index 531337ff87..d84132f654 100644
---
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
+++
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
@@ -18,8 +18,8 @@
package org.apache.drill.exec.store.googlesheets.schema;
+import com.google.api.services.sheets.v4.Sheets;
import com.google.api.services.sheets.v4.model.Sheet;
-import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.schema.Table;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.map.CaseInsensitiveMap;
@@ -34,9 +34,11 @@ import
org.apache.drill.exec.store.googlesheets.GoogleSheetsScanSpec;
import org.apache.drill.exec.store.googlesheets.GoogleSheetsStoragePlugin;
import
org.apache.drill.exec.store.googlesheets.GoogleSheetsStoragePluginConfig;
import org.apache.drill.exec.store.googlesheets.GoogleSheetsWriter;
+import org.apache.drill.exec.store.googlesheets.utils.GoogleSheetsUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -50,37 +52,21 @@ public class GoogleSheetsDrillSchema extends AbstractSchema
{
private final Map<String, DynamicDrillTable> activeTables =
CaseInsensitiveMap.newHashMap();
private final GoogleSheetsStoragePlugin plugin;
-
+ private final Sheets sheetsService;
private final SchemaConfig schemaConfig;
+ private final GoogleSheetsRootSchema parent;
+ private final String fileToken;
- public GoogleSheetsDrillSchema(AbstractSchema parent, String name,
+ public GoogleSheetsDrillSchema(AbstractSchema parent, String fileToken,
GoogleSheetsStoragePlugin plugin,
- List<Sheet> subSchemas, SchemaConfig
schemaConfig) {
- super(parent.getSchemaPath(), name);
+ SchemaConfig schemaConfig,
+ Sheets sheetsService) {
+ super(parent.getSchemaPath(), fileToken);
this.plugin = plugin;
this.schemaConfig = schemaConfig;
-
- // Add sub schemas to list, then create tables
- for (Sheet sheet : subSchemas) {
- registerTable(sheet.getProperties().getTitle(),
- new DynamicDrillTable(plugin, plugin.getName(),
- new GoogleSheetsScanSpec(
- name,
- (GoogleSheetsStoragePluginConfig) plugin.getConfig(),
- sheet.getProperties().getTitle(),
- plugin.getName(),
- subSchemas.indexOf(sheet))
- )
- );
- }
- }
-
- public void setHolder(SchemaPlus plusOfThis) {
- for (String s : getSubSchemaNames()) {
- GoogleSheetsDrillSchema inner = getSubSchema(s);
- SchemaPlus holder = plusOfThis.add(s, inner);
- inner.setHolder(holder);
- }
+ this.fileToken = fileToken;
+ this.parent = (GoogleSheetsRootSchema) parent;
+ this.sheetsService = sheetsService;
}
@Override
@@ -90,6 +76,11 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
@Override
public Table getTable(String tableName) {
+ // If the tables map is empty, populate it
+ if (activeTables.isEmpty() &&
GoogleSheetsUtils.isProbableFileToken(fileToken)) {
+ populateActiveTables();
+ }
+
logger.debug("Getting table: {}", tableName);
DynamicDrillTable table = activeTables.computeIfAbsent(tableName,
this::getDrillTable);
if (table != null) {
@@ -108,12 +99,7 @@ public class GoogleSheetsDrillSchema extends AbstractSchema
{
@Override
public Set<String> getTableNames() {
- return activeTables.keySet();
- }
-
- @Override
- public GoogleSheetsDrillSchema getSubSchema(String name) {
- return null;
+ return Collections.emptySet();
}
@Override
@@ -121,6 +107,30 @@ public class GoogleSheetsDrillSchema extends
AbstractSchema {
return plugin.supportsWrite();
}
+ private void populateActiveTables() {
+ List<Sheet> tabList;
+ try {
+ tabList = GoogleSheetsUtils.getTabList(sheetsService, fileToken);
+ } catch (IOException e) {
+ throw UserException.connectionError(e)
+ .message("Unable to obtain tab list for Google Sheet document " +
fileToken)
+ .build(logger);
+ }
+ // Add sub schemas to list, then create tables
+ for (Sheet sheet : tabList) {
+ registerTable(sheet.getProperties().getTitle(),
+ new DynamicDrillTable(plugin, plugin.getName(),
+ new GoogleSheetsScanSpec(this.fileToken,
+ (GoogleSheetsStoragePluginConfig) plugin.getConfig(),
+ sheet.getProperties().getTitle(),
+ plugin.getName(),
+ tabList.indexOf(sheet)
+ )
+ )
+ );
+ }
+ }
+
@Override
public CreateTableEntry createNewTable(String tableName,
List<String> partitionColumns,
diff --git
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsRootSchema.java
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsRootSchema.java
index 494a08c455..01b5cfa45d 100644
---
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsRootSchema.java
+++
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsRootSchema.java
@@ -18,10 +18,10 @@
package org.apache.drill.exec.store.googlesheets.schema;
+import com.google.api.services.drive.Drive;
import com.google.api.services.sheets.v4.Sheets;
-import com.google.api.services.sheets.v4.model.Sheet;
-import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.schema.Table;
+import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.planner.logical.DynamicDrillTable;
import org.apache.drill.exec.store.AbstractSchema;
import org.apache.drill.exec.store.SchemaConfig;
@@ -32,10 +32,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -44,53 +42,39 @@ public class GoogleSheetsRootSchema extends AbstractSchema {
private final Map<String, DynamicDrillTable> activeTables = new HashMap<>();
private final Map<String, GoogleSheetsDrillSchema> schemas = new HashMap<>();
-
- private List<Sheet> sheetList = new ArrayList<>();
private final GoogleSheetsStoragePlugin plugin;
private final SchemaConfig schemaConfig;
-
+ private final Map<String, String> tokenMap;
public GoogleSheetsRootSchema(GoogleSheetsStoragePlugin plugin, SchemaConfig
schemaConfig) {
super(Collections.emptyList(), plugin.getName());
this.schemaConfig = schemaConfig;
this.plugin = plugin;
- }
-
- void setHolder(SchemaPlus plusOfThis) {
- for (String s : getSubSchemaNames()) {
- GoogleSheetsDrillSchema inner = getSubSchema(s);
- SchemaPlus holder = plusOfThis.add(s, inner);
- inner.setHolder(holder);
+ Drive driveService = plugin.getDriveService(schemaConfig.getUserName());
+ try {
+ this.tokenMap = GoogleSheetsUtils.getTokenToNameMap(driveService);
+ } catch (IOException e) {
+ throw UserException.internalError()
+ .message("Could not connect to Drive service: " + e.getMessage())
+ .build(logger);
}
}
@Override
public Set<String> getSubSchemaNames() {
- return schemas.keySet();
+ return tokenMap.keySet();
}
@Override
- public GoogleSheetsDrillSchema getSubSchema(String name) {
+ public AbstractSchema getSubSchema(String name) {
GoogleSheetsDrillSchema schema = schemas.get(name);
// This level here represents the actual Google document. Attempt to
validate that it exists, and
// if so, add it to the schema list. If not, throw an exception.
- //
- // TODO In the future, we could add a check here to see whether the user
has the DRIVE permission, and if so,
- // retrieve the actual "file" name to use in the query instead of the
non-readable ID.
if (schema == null) {
Sheets service = plugin.getSheetsService(schemaConfig.getUserName());
- try {
- // This is needed for stored credentials. In theory while we aren't
impersonating the user
- // we are storing separate access tokens for each user.
- logger.debug("Accessing credentials for {}",
schemaConfig.getUserName());
-
- sheetList = GoogleSheetsUtils.getSheetList(service, name);
- } catch (IOException e) {
- // Do nothing
- }
// At this point we know we have a valid sheet because we obtained the
Sheet list, so we need to
// add the schema to the schemas list and return it.
- schema = new GoogleSheetsDrillSchema(this, name, plugin, sheetList,
schemaConfig);
+ schema = new GoogleSheetsDrillSchema(this, name, plugin, schemaConfig,
service);
schemas.put(name, schema);
}
return schema;
diff --git
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsSchemaFactory.java
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsSchemaFactory.java
index 8835fcbdf7..bf4450e173 100644
---
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsSchemaFactory.java
+++
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsSchemaFactory.java
@@ -35,7 +35,6 @@ public class GoogleSheetsSchemaFactory extends
AbstractSchemaFactory {
@Override
public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) {
GoogleSheetsRootSchema schema = new GoogleSheetsRootSchema(plugin,
schemaConfig);
- SchemaPlus holder = parent.add(getName(), schema);
- schema.setHolder(holder);
+ parent.add(getName(), schema);
}
}
diff --git
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsUtils.java
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsUtils.java
index a121568f9f..971c149c73 100644
---
a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsUtils.java
+++
b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsUtils.java
@@ -28,6 +28,9 @@ import
com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory;
import com.google.api.client.util.store.DataStore;
+import com.google.api.services.drive.Drive;
+import com.google.api.services.drive.model.File;
+import com.google.api.services.drive.model.FileList;
import com.google.api.services.sheets.v4.Sheets;
import com.google.api.services.sheets.v4.Sheets.Spreadsheets.Values.BatchGet;
import com.google.api.services.sheets.v4.SheetsScopes;
@@ -63,6 +66,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.regex.Pattern;
import static com.google.api.client.util.Strings.isNullOrEmpty;
@@ -73,6 +77,7 @@ public class GoogleSheetsUtils {
private static final int SAMPLE_SIZE = 5;
private static final JsonFactory JSON_FACTORY =
GsonFactory.getDefaultInstance();
private static final String UNKNOWN_HEADER = "field_";
+ private static final String APPLICATION_NAME = "Drill";
/**
* Represents the possible data types found in a GoogleSheets document
@@ -199,10 +204,86 @@ public class GoogleSheetsUtils {
Credential credential = GoogleSheetsUtils.authorize(config, dataStore,
queryUser);
return new Sheets.Builder(
GoogleNetHttpTransport.newTrustedTransport(),
GsonFactory.getDefaultInstance(), credential)
- .setApplicationName("Drill")
+ .setApplicationName(APPLICATION_NAME)
.build();
}
+ /** Returns an authenticated {@link Drive} service.
+ * @param config The {@link GoogleSheetsStoragePluginConfig} for the plugin
+ * @param dataStore A {@link DrillDataStore} for the stored credentials
+ * @param queryUser A {@link String} containing the current query user.
+ * @return An authenticated {@link Drive} service.
+ * @throws IOException If anything goes wrong throw an IOException
+ * @throws GeneralSecurityException If the creds are invalid or
+ */
+ public static Drive getDriveService(GoogleSheetsStoragePluginConfig config,
+ DataStore<StoredCredential> dataStore,
+ String queryUser) throws IOException,
GeneralSecurityException {
+ Credential credential = GoogleSheetsUtils.authorize(config, dataStore,
queryUser);
+ return new Drive.Builder(
+ GoogleNetHttpTransport.newTrustedTransport(),
GsonFactory.getDefaultInstance(), credential)
+ .setApplicationName(APPLICATION_NAME)
+ .build();
+ }
+
+ /**
+ * In GoogleSheets, the file is uniquely identified by a non-human readable
token. The Sheets SDK does
+ * not provide a means to map tokens to file names. To do this, we need to
use the Google Drive SDK.
+ *
+ * Google Drive's concept of folders is more similar to that of S3 which
doesn't really have folders or
+ * directories. So the user is not able to include any file paths in the
query string.
+ *
+ * More importantly however, is that Google Drive allows duplicate file
names, even within the same directory.
+ * Thus, it is entirely possible to have an entire directory of files with
the same name. The tokens for these files
+ * will be different, but the human-readable names can be the same. This
creates an obvious problem for Drill as we
+ * need unique file names to include in a query.
+ *
+ * @param driveService An authenticated {@link Drive} service.
+ * @return A {@link HashMap} containing the tokens as keys and the file
names as values.
+ * @throws IOException If anything goes wrong, throw an IOException.
+ */
+ public static Map<String,String> getTokenToNameMap(Drive driveService)
throws IOException {
+ Map<String, String> sheetMapping = new HashMap<>();
+ String pageToken = null;
+
+ do {
+ FileList result = driveService.files().list()
+ .setQ("mimeType='application/vnd.google-apps.spreadsheet'")
+ .setSpaces("drive")
+ .setPageToken(pageToken)
+ .execute();
+
+ for (File file : result.getFiles()) {
+ sheetMapping.put(file.getId(), file.getName());
+ }
+
+ pageToken = result.getNextPageToken();
+ } while (pageToken != null);
+
+ return sheetMapping;
+ }
+
+ /**
+ * Google Sheets tokens are strings of length 44 that contain upper and
lower case letters, numbers and underscores.
+ * This function will attempt to identify file tokens.
+ *
+ * Given that Google's spec for file IDs is not officially published, and
can change at any time, we will keep the
+ * validation as light as possible to prevent future issues, in the event
Google changes their file Id structure.
+ * @param id A {@link String} containing an unknown identifier
+ * @return True if the string is a file probable file token, false if not.
+ */
+ public static boolean isProbableFileToken(String id) {
+ logger.debug("Checking token {}", id);
+ if (StringUtils.isEmpty(id)) {
+ return false;
+ } else if (id.length() != 44) {
+ return false;
+ } else {
+ Pattern pattern = Pattern.compile("[0-9][a-zA-Z0-9_-]{43}");
+ return pattern.matcher(id).find();
+ }
+ }
+
/**
* Returns a list of the titles of the available spreadsheets within a given
Google sheet.
* @param service The Google Sheets service
@@ -210,7 +291,8 @@ public class GoogleSheetsUtils {
* @return A list of spreadsheet names within a given Google Sheet
* @throws IOException If the Google sheet is unreachable or invalid.
*/
- public static List<Sheet> getSheetList(Sheets service, String sheetID)
throws IOException {
+ public static List<Sheet> getTabList(Sheets service, String sheetID) throws
IOException {
+ logger.debug("Getting tabs for: {}", sheetID);
Spreadsheet spreadsheet = service.spreadsheets().get(sheetID).execute();
return spreadsheet.getSheets();
}
diff --git
a/contrib/storage-googlesheets/src/main/resources/bootstrap-storage-plugins.json
b/contrib/storage-googlesheets/src/main/resources/bootstrap-storage-plugins.json
index 92d9eb4703..73569bd7ad 100644
---
a/contrib/storage-googlesheets/src/main/resources/bootstrap-storage-plugins.json
+++
b/contrib/storage-googlesheets/src/main/resources/bootstrap-storage-plugins.json
@@ -9,7 +9,7 @@
"authorizationURL": "https://accounts.google.com/o/oauth2/auth",
"authorizationParams": {
"response_type": "code",
- "scope": "https://www.googleapis.com/auth/spreadsheets"
+ "scope": "https://www.googleapis.com/auth/spreadsheets
https://www.googleapis.com/auth/drive.readonly
https://www.googleapis.com/auth/drive.metadata.readonly"
}
},
"credentialsProvider": {
@@ -17,7 +17,7 @@
"credentials": {
"clientID": "<YOUR CLIENT ID>",
"clientSecret": "<YOUR CLIENT SECRET>",
- "tokenURI": "https://oauth2.googleapis.com/token
https://www.googleapis.com/auth/drive.readonly"
+ "tokenURI": "https://oauth2.googleapis.com/token"
},
"userCredentials": {}
},
diff --git
a/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
b/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
index bfc529a106..1f1d8be142 100644
---
a/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
+++
b/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
@@ -46,6 +46,7 @@ import java.util.Map;
import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
@@ -133,6 +134,34 @@ public class TestGoogleSheetsQueries extends ClusterTest {
new RowSetComparison(expected).verifyAndClearAll(results);
}
+ @Test
+ public void testSchemataInformationSchema() throws Exception {
+ try {
+ initializeTokens("googlesheets");
+ } catch (PluginException e) {
+ fail(e.getMessage());
+ }
+ // Makes sure that the root level plugin shows up in the information schema
+ String sql = "SELECT * FROM `INFORMATION_SCHEMA`.`SCHEMATA` WHERE
SCHEMA_NAME LIKE 'googlesheets.%'";
+ RowSet results = queryBuilder().sql(sql).rowSet();
+ assertTrue(results.rowCount() > 1);
+ results.clear();
+ }
+
+ @Test
+ public void testTablesInfoSchema() throws Exception {
+ try {
+ initializeTokens("googlesheets");
+ } catch (PluginException e) {
+ fail(e.getMessage());
+ }
+
+ String sql = "SELECT * FROM `INFORMATION_SCHEMA`.`TABLES` WHERE
TABLE_SCHEMA LIKE 'googlesheets.%'";
+ RowSet results = queryBuilder().sql(sql).rowSet();
+ assertEquals(0, results.rowCount());
+ results.clear();
+ }
+
@Test
public void testImplicitFields() throws Exception {
// Tests special case of only implicit metadata fields being projected.