This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 1315494fc TIKA-4605: Add Google Drive fetcher plugin (#2504)
1315494fc is described below
commit 1315494fc670d1a9db97ac59e8ae3c9c39ce8224
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Mon Dec 29 09:57:38 2025 -0600
TIKA-4605: Add Google Drive fetcher plugin (#2504)
* TIKA-4605: Add Google Drive fetcher plugin
- Created plugin structure (pom.xml, plugin.properties, assembly.xml)
- Implemented GoogleDrivePipesPlugin and GoogleDriveFetcherFactory
- Refactored GoogleDriveFetcherConfig to Apache Tika pattern
- Refactored GoogleDriveFetcher to extend AbstractTikaExtension
- Added static build() method
- Changed fetch() signature to use Metadata and ParseContext
- Replaced Map usage with Metadata
- Added initialize() method
- Extracted createDriveService() helper method
- Fixed dependency convergence issues with Google libraries
- Updated parent pom.xml to include new module
Build tested: mvn clean install -DskipTests
* TIKA-4605: Bump Google API dependencies to latest versions
- Updated google-api-client: 1.33.0 -> 2.8.1
- Updated google-auth-library-oauth2-http: 1.30.0 -> 1.41.0
- Updated google-api-services-drive: v3-rev20241027-2.0.0 ->
v3-rev20251210-2.0.0
- Added google-http-client: 2.0.0
- Added dependency management for:
- google-http-client and google-http-client-gson
- google-api-client
- google-auth-library-oauth2-http and google-auth-library-credentials
- io.grpc:grpc-context 1.69.0
- com.google.j2objc:j2objc-annotations 3.0.0
Build tested: mvn clean install -DskipTests
---
tika-pipes/tika-pipes-plugins/pom.xml | 1 +
.../tika-pipes-google-drive/pom.xml | 178 +++++++++++++++++++++
.../src/main/assembly/assembly.xml | 55 +++++++
.../fetcher/googledrive/GoogleDriveFetcher.java | 158 ++++++++++++++++++
.../googledrive/GoogleDriveFetcherFactory.java | 57 +++++++
.../config/GoogleDriveFetcherConfig.java | 101 ++++++++++++
.../plugin/googledrive/GoogleDrivePipesPlugin.java | 48 ++++++
.../src/main/resources/plugin.properties | 21 +++
8 files changed, 619 insertions(+)
diff --git a/tika-pipes/tika-pipes-plugins/pom.xml
b/tika-pipes/tika-pipes-plugins/pom.xml
index 4d45f0281..2f4d76229 100644
--- a/tika-pipes/tika-pipes-plugins/pom.xml
+++ b/tika-pipes/tika-pipes-plugins/pom.xml
@@ -37,6 +37,7 @@
<module>tika-pipes-csv</module>
<module>tika-pipes-file-system</module>
<module>tika-pipes-gcs</module>
+ <module>tika-pipes-google-drive</module>
<module>tika-pipes-http</module>
<module>tika-pipes-jdbc</module>
<module>tika-pipes-json</module>
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/pom.xml
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/pom.xml
new file mode 100644
index 000000000..eb0e5e452
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/pom.xml
@@ -0,0 +1,178 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-pipes-plugins</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>4.0.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-pipes-google-drive</artifactId>
+ <name>Apache Tika Pipes Google Drive</name>
+ <properties>
+
<plugin.excluded.artifactIds>tika-core,tika-pipes-api,tika-serialization,tika-plugins-core</plugin.excluded.artifactIds>
+
<plugin.excluded.groupIds>org.apache.logging.log4j,org.slf4j</plugin.excluded.groupIds>
+
<google-api-services-drive.version>v3-rev20251210-2.0.0</google-api-services-drive.version>
+
<google-auth-library-oauth2-http.version>1.41.0</google-auth-library-oauth2-http.version>
+ <google-api-client.version>2.8.1</google-api-client.version>
+ <google-http-client.version>2.0.0</google-http-client.version>
+ </properties>
+
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>com.google.http-client</groupId>
+ <artifactId>google-http-client</artifactId>
+ <version>${google-http-client.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.http-client</groupId>
+ <artifactId>google-http-client-gson</artifactId>
+ <version>${google-http-client.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.api-client</groupId>
+ <artifactId>google-api-client</artifactId>
+ <version>${google-api-client.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.auth</groupId>
+ <artifactId>google-auth-library-oauth2-http</artifactId>
+ <version>${google-auth-library-oauth2-http.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.auth</groupId>
+ <artifactId>google-auth-library-credentials</artifactId>
+ <version>${google-auth-library-oauth2-http.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-context</artifactId>
+ <version>1.69.0</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.j2objc</groupId>
+ <artifactId>j2objc-annotations</artifactId>
+ <version>3.0.0</version>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j2-impl</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-pipes-api</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.api-client</groupId>
+ <artifactId>google-api-client</artifactId>
+ <version>${google-api-client.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.auth</groupId>
+ <artifactId>google-auth-library-oauth2-http</artifactId>
+ <version>${google-auth-library-oauth2-http.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.apis</groupId>
+ <artifactId>google-api-services-drive</artifactId>
+ <version>${google-api-services-drive.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-pipes-core</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${project.build.directory}/lib</outputDirectory>
+ <includeScope>compile</includeScope>
+
<excludeArtifactIds>${plugin.excluded.artifactIds}</excludeArtifactIds>
+ <excludeGroupIds>${plugin.excluded.groupIds}</excludeGroupIds>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </descriptors>
+ <appendAssemblyId>false</appendAssemblyId>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-assembly</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/assembly/assembly.xml
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/assembly/assembly.xml
new file mode 100644
index 000000000..ea0f8b4a1
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/assembly/assembly.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
+ xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0
+ http://maven.apache.org/xsd/assembly-2.0.0.xsd">
+ <id>dependencies-zip</id>
+ <formats>
+ <format>zip</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>${project.build.directory}/lib</directory>
+ <outputDirectory>/lib</outputDirectory>
+ </fileSet>
+ <fileSet>
+ <directory>${project.build.directory}</directory>
+ <outputDirectory>/lib</outputDirectory>
+ <includes>
+ <include>${project.artifactId}-${project.version}.jar</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>${project.build.directory}</directory>
+ <outputDirectory>/</outputDirectory>
+ <includes>
+ <include>classes/META-INF/extensions.idx</include>
+ <include>classes/META-INF/MANIFEST.MF</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>${project.basedir}/src/main/resources</directory>
+ <outputDirectory>/</outputDirectory>
+ <includes>
+ <include>plugin.properties</include>
+ </includes>
+ </fileSet>
+ </fileSets>
+</assembly>
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/GoogleDriveFetcher.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/GoogleDriveFetcher.java
new file mode 100644
index 000000000..57b340a32
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/GoogleDriveFetcher.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.googledrive;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.Base64;
+import java.util.List;
+
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.http.javanet.NetHttpTransport;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.gson.GsonFactory;
+import com.google.api.services.drive.Drive;
+import com.google.api.services.drive.DriveScopes;
+import com.google.auth.http.HttpCredentialsAdapter;
+import com.google.auth.oauth2.GoogleCredentials;
+import org.apache.commons.io.FileUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.pipes.api.fetcher.Fetcher;
+import
org.apache.tika.pipes.fetcher.googledrive.config.GoogleDriveFetcherConfig;
+import org.apache.tika.plugins.AbstractTikaExtension;
+import org.apache.tika.plugins.ExtensionConfig;
+
+public class GoogleDriveFetcher extends AbstractTikaExtension implements
Fetcher {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(GoogleDriveFetcher.class);
+ private static final JsonFactory JSON_FACTORY =
GsonFactory.getDefaultInstance();
+
+ public static GoogleDriveFetcher build(ExtensionConfig pluginConfig)
+ throws TikaConfigException, IOException {
+ GoogleDriveFetcherConfig config =
+ GoogleDriveFetcherConfig.load(pluginConfig.json());
+ GoogleDriveFetcher fetcher = new GoogleDriveFetcher(pluginConfig,
config);
+ fetcher.initialize();
+ return fetcher;
+ }
+
+ private GoogleDriveFetcherConfig config;
+
+ public GoogleDriveFetcher(ExtensionConfig pluginConfig,
GoogleDriveFetcherConfig config) {
+ super(pluginConfig);
+ this.config = config;
+ }
+
+ public void initialize() throws IOException, TikaConfigException {
+ // Initialization if needed
+ }
+
+ @Override
+ public TikaInputStream fetch(String fetchKey, Metadata metadata,
ParseContext parseContext)
+ throws IOException, TikaException {
+ int tries = 0;
+ Exception ex = null;
+ List<Long> throttleSeconds = config.getThrottleSeconds();
+ int maxTries = (throttleSeconds != null && !throttleSeconds.isEmpty())
?
+ throttleSeconds.size() : 1;
+
+ do {
+ long start = System.currentTimeMillis();
+ try {
+ String[] fetchKeySplit = fetchKey.split(",");
+ if (fetchKeySplit.length != 2) {
+ throw new TikaException(
+ "Invalid fetch key, expected format
${fileId},${subjectUser}: " +
+ fetchKey);
+ }
+
+ String fileId = fetchKeySplit[0];
+ String subjectUser = fetchKeySplit[1];
+
+ Drive driveService = createDriveService(subjectUser);
+ Drive.Files.Get get = driveService.files().get(fileId);
+ InputStream is = get.executeMediaAsInputStream();
+
+ if (is == null) {
+ throw new IOException(
+ "Empty input stream when we tried to parse " +
fetchKey);
+ }
+
+ if (config.isSpoolToTemp()) {
+ TemporaryResources tmp = new TemporaryResources();
+ Path tmpPath = tmp.createTempFile(metadata);
+ FileUtils.copyInputStreamToFile(is, tmpPath.toFile());
+ return TikaInputStream.get(tmpPath);
+ }
+ return TikaInputStream.get(is);
+
+ } catch (Exception e) {
+ LOG.warn("Exception fetching on retry=" + tries, e);
+ ex = e;
+ } finally {
+ long elapsed = System.currentTimeMillis() - start;
+ LOG.debug("Total to fetch {}", elapsed);
+ }
+
+ if (throttleSeconds != null && tries < throttleSeconds.size()) {
+ LOG.warn("Sleeping for {} seconds before retry",
throttleSeconds.get(tries));
+ try {
+ Thread.sleep(throttleSeconds.get(tries) * 1000);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ }
+ } while (++tries < maxTries);
+
+ if (ex instanceof TikaException) {
+ throw (TikaException) ex;
+ } else if (ex instanceof IOException) {
+ throw (IOException) ex;
+ }
+ throw new TikaException("Could not fetch " + fetchKey, ex);
+ }
+
+ private Drive createDriveService(String subjectUser) throws IOException {
+ List<String> scopes = config.getScopes();
+ if (scopes == null || scopes.isEmpty()) {
+ scopes = List.of(DriveScopes.DRIVE_READONLY);
+ }
+
+ GoogleCredentials baseCredentials = GoogleCredentials.fromStream(
+ new ByteArrayInputStream(
+
Base64.getDecoder().decode(config.getServiceAccountKeyBase64())))
+ .createScoped(scopes);
+
+ GoogleCredentials delegatedCredentials =
baseCredentials.createDelegated(subjectUser);
+ final HttpRequestInitializer requestInitializer =
+ new HttpCredentialsAdapter(delegatedCredentials);
+
+ return new Drive.Builder(new NetHttpTransport(), JSON_FACTORY,
requestInitializer)
+ .setApplicationName(config.getApplicationName())
+ .build();
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/GoogleDriveFetcherFactory.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/GoogleDriveFetcherFactory.java
new file mode 100644
index 000000000..c1cc483ca
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/GoogleDriveFetcherFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.googledrive;
+
+import java.io.IOException;
+
+import org.pf4j.Extension;
+
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.pipes.api.fetcher.Fetcher;
+import org.apache.tika.pipes.api.fetcher.FetcherFactory;
+import org.apache.tika.plugins.ExtensionConfig;
+
+/**
+ * Factory for creating Google Drive fetchers.
+ *
+ * <p>Example JSON configuration:
+ * <pre>
+ * "fetchers": {
+ * "google-drive-fetcher": {
+ * "my-drive-fetcher": {
+ * "serviceAccountCredentialsPath": "/path/to/credentials.json",
+ * "impersonatedUser": "[email protected]"
+ * }
+ * }
+ * }
+ * </pre>
+ */
+@Extension
+public class GoogleDriveFetcherFactory implements FetcherFactory {
+ private static final String NAME = "google-drive-fetcher";
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public Fetcher buildExtension(ExtensionConfig extensionConfig)
+ throws IOException, TikaConfigException {
+ return GoogleDriveFetcher.build(extensionConfig);
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/config/GoogleDriveFetcherConfig.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/config/GoogleDriveFetcherConfig.java
new file mode 100644
index 000000000..1375dda11
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/fetcher/googledrive/config/GoogleDriveFetcherConfig.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.googledrive.config;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.tika.exception.TikaConfigException;
+
+public class GoogleDriveFetcherConfig {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static GoogleDriveFetcherConfig load(final String json)
+ throws TikaConfigException {
+ try {
+ return OBJECT_MAPPER.readValue(json,
GoogleDriveFetcherConfig.class);
+ } catch (JsonProcessingException e) {
+ throw new TikaConfigException(
+ "Failed to parse GoogleDriveFetcherConfig from JSON", e);
+ }
+ }
+
+ private List<Long> throttleSeconds;
+ private boolean spoolToTemp;
+ private String serviceAccountKeyBase64;
+ private String subjectUser;
+ private String applicationName = "tika-pipes";
+ private List<String> scopes = new ArrayList<>();
+
+ public List<Long> getThrottleSeconds() {
+ return throttleSeconds;
+ }
+
+ public GoogleDriveFetcherConfig setThrottleSeconds(List<Long>
throttleSeconds) {
+ this.throttleSeconds = throttleSeconds;
+ return this;
+ }
+
+ public boolean isSpoolToTemp() {
+ return spoolToTemp;
+ }
+
+ public GoogleDriveFetcherConfig setSpoolToTemp(boolean spoolToTemp) {
+ this.spoolToTemp = spoolToTemp;
+ return this;
+ }
+
+ public String getServiceAccountKeyBase64() {
+ return serviceAccountKeyBase64;
+ }
+
+ public GoogleDriveFetcherConfig setServiceAccountKeyBase64(String
serviceAccountKeyBase64) {
+ this.serviceAccountKeyBase64 = serviceAccountKeyBase64;
+ return this;
+ }
+
+ public String getSubjectUser() {
+ return subjectUser;
+ }
+
+ public GoogleDriveFetcherConfig setSubjectUser(String subjectUser) {
+ this.subjectUser = subjectUser;
+ return this;
+ }
+
+ public String getApplicationName() {
+ return applicationName;
+ }
+
+ public GoogleDriveFetcherConfig setApplicationName(String applicationName)
{
+ this.applicationName = applicationName;
+ return this;
+ }
+
+ public List<String> getScopes() {
+ return scopes;
+ }
+
+ public GoogleDriveFetcherConfig setScopes(List<String> scopes) {
+ this.scopes = scopes;
+ return this;
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/plugin/googledrive/GoogleDrivePipesPlugin.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/plugin/googledrive/GoogleDrivePipesPlugin.java
new file mode 100644
index 000000000..70d446aea
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/java/org/apache/tika/pipes/plugin/googledrive/GoogleDrivePipesPlugin.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.plugin.googledrive;
+
+import org.pf4j.Plugin;
+import org.pf4j.PluginWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class GoogleDrivePipesPlugin extends Plugin {
+ private static final Logger LOG =
LoggerFactory.getLogger(GoogleDrivePipesPlugin.class);
+
+ public GoogleDrivePipesPlugin(PluginWrapper wrapper) {
+ super(wrapper);
+ }
+
+ @Override
+ public void start() {
+ LOG.info("Starting Google Drive Pipes Plugin");
+ super.start();
+ }
+
+ @Override
+ public void stop() {
+ LOG.info("Stopping Google Drive Pipes Plugin");
+ super.stop();
+ }
+
+ @Override
+ public void delete() {
+ LOG.info("Deleting Google Drive Pipes Plugin");
+ super.delete();
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/resources/plugin.properties
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/resources/plugin.properties
new file mode 100644
index 000000000..cdefcc4d1
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/main/resources/plugin.properties
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin.id=tika-pipes-google-drive-plugin
+plugin.class=org.apache.tika.pipes.plugin.googledrive.GoogleDrivePipesPlugin
+plugin.version=4.0.0-SNAPSHOT
+plugin.provider=Apache Tika
+plugin.description=Pipes for Google Drive with OAuth2 authentication