This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 88e435087 TIKA-4604: Add Atlassian JWT fetcher plugin (#2502)
88e435087 is described below
commit 88e43508734eed43f23351959c35a09a5ce0eb57
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Mon Dec 29 07:47:45 2025 -0600
TIKA-4604: Add Atlassian JWT fetcher plugin (#2502)
* TIKA-4604: WIP - Add Atlassian JWT fetcher plugin structure
- Created plugin directory structure and build files
- Added pom.xml with dependencies
- Created plugin.properties and assembly.xml
- Added AtlassianJwtPipesPlugin and AtlassianJwtFetcherFactory
- Refactored AtlassianJwtFetcherConfig to Apache Tika pattern
- Copied AtlassianJwtFetcher and AtlassianJwtGenerator (needs refactoring)
- Updated parent pom.xml to include new module
TODO: Complete refactoring of AtlassianJwtFetcher class:
- Extend AbstractTikaExtension
- Add static build() method
- Change fetch() signature to use Metadata instead of Maps
- Update all method signatures
- Test compilation
* TIKA-4604: Complete refactoring of AtlassianJwtFetcher to Apache Tika
pattern
- Extended AbstractTikaExtension instead of implementing Fetcher directly
- Added static build() method and constructor
- Changed fetch() signature to use Metadata instead of Maps
- Updated all method signatures throughout
- Replaced Map operations with Metadata.set() and Metadata.add()
- Added initialize() method for HTTP client and JWT generator setup
- Removed old initIfNeeded and checkInitialization methods
- Fixed all imports and added missing ones (List, URL,
MalformedURLException)
- Replaced Lombok log with slf4j LOG
- Code now compiles successfully
Build tested: mvn clean compile -DskipTests
* TIKA-4604: Fix forbidden API violation - use Locale.ROOT for toUpperCase()
- Added Locale import to AtlassianJwtGenerator
- Changed httpMethod.toUpperCase() to httpMethod.toUpperCase(Locale.ROOT)
- Fixes forbiddenapis check violation
Build now passes all checks including forbiddenapis
---
tika-pipes/tika-pipes-plugins/pom.xml | 1 +
.../tika-pipes-atlassian-jwt/pom.xml | 130 ++++++++
.../src/main/assembly/assembly.xml | 55 ++++
.../fetcher/atlassianjwt/AtlassianJwtFetcher.java | 331 +++++++++++++++++++
.../atlassianjwt/AtlassianJwtFetcher.java.backup | 351 +++++++++++++++++++++
.../atlassianjwt/AtlassianJwtFetcherFactory.java | 58 ++++
.../atlassianjwt/AtlassianJwtFetcherPlugin.java | 43 +++
.../atlassianjwt/AtlassianJwtGenerator.java | 190 +++++++++++
.../config/AtlassianJwtFetcherConfig.java | 205 ++++++++++++
.../atlassianjwt/AtlassianJwtPipesPlugin.java | 48 +++
.../src/main/resources/plugin.properties | 21 ++
11 files changed, 1433 insertions(+)
diff --git a/tika-pipes/tika-pipes-plugins/pom.xml
b/tika-pipes/tika-pipes-plugins/pom.xml
index d33378351..16561b5d0 100644
--- a/tika-pipes/tika-pipes-plugins/pom.xml
+++ b/tika-pipes/tika-pipes-plugins/pom.xml
@@ -32,6 +32,7 @@
<packaging>pom</packaging>
<modules>
+ <module>tika-pipes-atlassian-jwt</module>
<module>tika-pipes-az-blob</module>
<module>tika-pipes-csv</module>
<module>tika-pipes-file-system</module>
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/pom.xml
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/pom.xml
new file mode 100644
index 000000000..da622ec8d
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/pom.xml
@@ -0,0 +1,130 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-pipes-plugins</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>4.0.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-pipes-atlassian-jwt</artifactId>
+ <name>Apache Tika Pipes Atlassian JWT</name>
+ <properties>
+ <!-- Never include the core artifacts in your plugin lib directory. If you
do, it will cause the classloading
+ to get messed up when finding your plugins. -->
+
<plugin.excluded.artifactIds>tika-core,tika-pipes-api,tika-serialization,tika-plugins-core</plugin.excluded.artifactIds>
+
<plugin.excluded.groupIds>org.apache.logging.log4j,org.slf4j</plugin.excluded.groupIds>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j2-impl</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-pipes-api</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-httpclient-commons</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.nimbusds</groupId>
+ <artifactId>nimbus-jose-jwt</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-pipes-core</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${project.build.directory}/lib</outputDirectory>
+ <includeScope>compile</includeScope>
+
<excludeArtifactIds>${plugin.excluded.artifactIds}</excludeArtifactIds>
+ <excludeGroupIds>${plugin.excluded.groupIds}</excludeGroupIds>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </descriptors>
+ <appendAssemblyId>false</appendAssemblyId>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-assembly</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/assembly/assembly.xml
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/assembly/assembly.xml
new file mode 100644
index 000000000..ea0f8b4a1
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/assembly/assembly.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
+ xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0
+ http://maven.apache.org/xsd/assembly-2.0.0.xsd">
+ <id>dependencies-zip</id>
+ <formats>
+ <format>zip</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>${project.build.directory}/lib</directory>
+ <outputDirectory>/lib</outputDirectory>
+ </fileSet>
+ <fileSet>
+ <directory>${project.build.directory}</directory>
+ <outputDirectory>/lib</outputDirectory>
+ <includes>
+ <include>${project.artifactId}-${project.version}.jar</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>${project.build.directory}</directory>
+ <outputDirectory>/</outputDirectory>
+ <includes>
+ <include>classes/META-INF/extensions.idx</include>
+ <include>classes/META-INF/MANIFEST.MF</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>${project.basedir}/src/main/resources</directory>
+ <outputDirectory>/</outputDirectory>
+ <includes>
+ <include>plugin.properties</include>
+ </includes>
+ </fileSet>
+ </fileSets>
+</assembly>
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
new file mode 100644
index 000000000..e1594650b
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.atlassianjwt;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.InetAddress;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.security.NoSuchAlgorithmException;
+import java.util.List;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.nimbusds.jose.JOSEException;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
+import org.apache.http.ConnectionClosedException;
+import org.apache.http.Header;
+import org.apache.http.HttpConnection;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpInetConnection;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.protocol.HttpClientContext;
+import org.apache.http.impl.conn.ConnectionShutdownException;
+import org.apache.http.util.EntityUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.client.HttpClientFactory;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaTimeoutException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.pipes.api.fetcher.Fetcher;
+import
org.apache.tika.pipes.fetcher.atlassianjwt.config.AtlassianJwtFetcherConfig;
+import org.apache.tika.plugins.AbstractTikaExtension;
+import org.apache.tika.plugins.ExtensionConfig;
+import org.apache.tika.utils.StringUtils;
+
+public class AtlassianJwtFetcher extends AbstractTikaExtension implements
Fetcher {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(AtlassianJwtFetcher.class);
+
+ public static AtlassianJwtFetcher build(ExtensionConfig pluginConfig)
+ throws TikaConfigException, IOException {
+ AtlassianJwtFetcherConfig config =
+ AtlassianJwtFetcherConfig.load(pluginConfig.json());
+ AtlassianJwtFetcher fetcher = new AtlassianJwtFetcher(pluginConfig,
config);
+ fetcher.initialize();
+ return fetcher;
+ }
+
+ private final HttpClientFactory httpClientFactory = new
HttpClientFactory();
+ public static String HTTP_HEADER_PREFIX = "http-header:";
+ public static String HTTP_FETCH_PREFIX = "http-connection:";
+
+ public static Property HTTP_STATUS_CODE =
Property.externalInteger(HTTP_HEADER_PREFIX + "status-code");
+ public static Property HTTP_NUM_REDIRECTS =
Property.externalInteger(HTTP_FETCH_PREFIX + "num-redirects");
+ public static Property HTTP_TARGET_URL =
Property.externalText(HTTP_FETCH_PREFIX + "target-url");
+ public static Property HTTP_TARGET_IP_ADDRESS =
Property.externalText(HTTP_FETCH_PREFIX + "target-ip-address");
+ public static Property HTTP_FETCH_TRUNCATED =
Property.externalBoolean(HTTP_FETCH_PREFIX + "fetch-truncated");
+ public static Property HTTP_CONTENT_ENCODING =
Property.externalText(HTTP_HEADER_PREFIX + "content-encoding");
+ public static Property HTTP_CONTENT_TYPE =
Property.externalText(HTTP_HEADER_PREFIX + "content-type");
+
+ private static final String USER_AGENT = "User-Agent";
+
+ private AtlassianJwtFetcherConfig config;
+ private HttpClient httpClient;
+ private HttpClient noCompressHttpClient;
+ private AtlassianJwtGenerator jwtGenerator;
+
+ public AtlassianJwtFetcher(ExtensionConfig pluginConfig,
+ AtlassianJwtFetcherConfig config) {
+ super(pluginConfig);
+ this.config = config;
+ }
+
+ public void initialize() throws IOException, TikaConfigException {
+ // Configure HTTP client factory
+ if (config.getSocketTimeout() != null) {
+ httpClientFactory.setSocketTimeout(config.getSocketTimeout());
+ }
+ if (config.getRequestTimeout() != null) {
+ httpClientFactory.setRequestTimeout(config.getRequestTimeout());
+ }
+ if (config.getConnectTimeout() != null) {
+ httpClientFactory.setConnectTimeout(config.getConnectTimeout());
+ }
+ if (config.getMaxConnections() != null) {
+ httpClientFactory.setMaxConnections(config.getMaxConnections());
+ }
+ if (config.getMaxConnectionsPerRoute() != null) {
+
httpClientFactory.setMaxConnectionsPerRoute(config.getMaxConnectionsPerRoute());
+ }
+
+ // Initialize HTTP client
+ httpClient = httpClientFactory.build();
+ HttpClientFactory cp = httpClientFactory.copy();
+ cp.setDisableContentCompression(true);
+ noCompressHttpClient = cp.build();
+
+ // Initialize JWT generator if configured
+ if (!StringUtils.isBlank(config.getSharedSecret())) {
+ jwtGenerator = new AtlassianJwtGenerator(config.getSharedSecret(),
+ config.getIssuer(), config.getSubject(),
+ config.getJwtExpiresInSeconds());
+ }
+ }
+
+ @Override
+ public TikaInputStream fetch(String fetchKey, Metadata metadata,
ParseContext parseContext)
+ throws IOException, TikaException {
+ HttpGet get = new HttpGet(fetchKey);
+ RequestConfig requestConfig = RequestConfig.custom()
+ .setMaxRedirects(config.getMaxRedirects())
+ .setRedirectsEnabled(config.getMaxRedirects() > 0).build();
+ get.setConfig(requestConfig);
+ putAdditionalHeadersOnRequest(get, fetchKey);
+ return execute(get, metadata, httpClient, true);
+ }
+
+ private void putAdditionalHeadersOnRequest(HttpGet httpGet, String url)
+ throws TikaException {
+ if (!StringUtils.isBlank(config.getUserAgent())) {
+ httpGet.setHeader(USER_AGENT, config.getUserAgent());
+ }
+ if (config.getHttpRequestHeaders() != null) {
+ config.getHttpRequestHeaders().forEach((header, values) -> {
+ for (String value : values) {
+ httpGet.addHeader(header, value);
+ }
+ });
+ }
+ if (jwtGenerator != null) {
+ try {
+ String jwt = jwtGenerator.generateJwt("GET", url);
+ httpGet.setHeader("Authorization", "JWT " + jwt);
+ } catch (JOSEException | URISyntaxException |
NoSuchAlgorithmException e) {
+ throw new TikaException("Failed to generate JWT token", e);
+ }
+ } else {
+ LOG.warn("No JWT generator available - authorization header not
set");
+ }
+ }
+
+ private TikaInputStream execute(HttpGet get, Metadata metadata, HttpClient
client,
+ boolean retryOnBadLength)
+ throws IOException, TikaException {
+ HttpClientContext context = HttpClientContext.create();
+ HttpResponse response = null;
+ final AtomicBoolean timeout = new AtomicBoolean(false);
+ Timer timer = null;
+ long overallTimeout = config.getOverallTimeout() == null ? -1 :
config.getOverallTimeout();
+ try {
+ if (overallTimeout > -1) {
+ TimerTask task = new TimerTask() {
+ @Override
+ public void run() {
+ timeout.set(true);
+ if (get != null) {
+ get.abort();
+ }
+ }
+ };
+ timer = new Timer(false);
+ timer.schedule(task, overallTimeout);
+ }
+ response = client.execute(get, context);
+
+ updateMetadata(get.getURI().toString(), response, context,
metadata);
+
+ int code = response.getStatusLine().getStatusCode();
+ LOG.info("Fetch id {} status code {}", get.getURI(), code);
+ if (code < 200 || code > 299) {
+ throw new IOException("bad status code: " + code + " :: " +
responseToString(response));
+ }
+ try (InputStream is = response.getEntity().getContent()) {
+ return spool(is, metadata);
+ }
+ } catch (ConnectionClosedException e) {
+ if (retryOnBadLength && e.getMessage() != null &&
e.getMessage().contains("Premature end of Content-Length delimited message")) {
+ LOG.warn("premature end of content-length delimited message;
retrying with content compression disabled for {}", get.getURI());
+ return execute(get, metadata, noCompressHttpClient, false);
+ }
+ throw e;
+ } catch (IOException e) {
+ if (timeout.get()) {
+ throw new TikaTimeoutException("Overall timeout after " +
overallTimeout + "ms");
+ } else {
+ throw e;
+ }
+ } finally {
+ if (timer != null) {
+ timer.cancel();
+ timer.purge();
+ }
+ if (response != null) {
+ EntityUtils.consumeQuietly(response.getEntity());
+ }
+ if (response instanceof CloseableHttpResponse) {
+ ((CloseableHttpResponse) response).close();
+ }
+ }
+ }
+
+ private TikaInputStream spool(InputStream content, Metadata metadata)
throws IOException {
+ long start = System.currentTimeMillis();
+ TemporaryResources tmp = new TemporaryResources();
+ Path tmpFile = tmp.createTempFile(metadata);
+ if (config.getMaxSpoolSize() < 0) {
+ Files.copy(content, tmpFile);
+ } else {
+ try (OutputStream os = Files.newOutputStream(tmpFile)) {
+ long totalRead = IOUtils.copyLarge(content, os, 0,
config.getMaxSpoolSize());
+ if (totalRead == config.getMaxSpoolSize() && content.read() !=
-1) {
+ metadata.set(HTTP_FETCH_TRUNCATED, true);
+ }
+ }
+ }
+ long elapsed = System.currentTimeMillis() - start;
+ LOG.debug("took {} ms to copy to local tmp file", elapsed);
+ return TikaInputStream.get(tmpFile);
+ }
+
+ private void updateMetadata(String url, HttpResponse response,
HttpClientContext context,
+ Metadata metadata) {
+ if (response == null) {
+ return;
+ }
+
+ if (response.getStatusLine() != null) {
+ metadata.set(HTTP_STATUS_CODE,
response.getStatusLine().getStatusCode());
+ }
+
+ HttpEntity entity = response.getEntity();
+ if (entity != null && entity.getContentEncoding() != null) {
+ metadata.set(HTTP_CONTENT_ENCODING,
entity.getContentEncoding().getValue());
+ }
+ if (entity != null && entity.getContentType() != null) {
+ metadata.set(HTTP_CONTENT_TYPE,
entity.getContentType().getValue());
+ }
+
+ if (config.getHttpHeaders() != null) {
+ for (String h : config.getHttpHeaders()) {
+ Header[] headers = response.getHeaders(h);
+ if (headers != null && headers.length > 0) {
+ for (Header header : headers) {
+ metadata.add(HTTP_HEADER_PREFIX + h,
header.getValue());
+ }
+ }
+ }
+ }
+ List<URI> uriList = context.getRedirectLocations();
+ if (uriList == null) {
+ metadata.set(HTTP_NUM_REDIRECTS, 0);
+ metadata.set(HTTP_TARGET_URL, url);
+ } else {
+ metadata.set(HTTP_NUM_REDIRECTS, uriList.size());
+ try {
+ URI uri = uriList.get(uriList.size() - 1);
+ if (uri != null) {
+ URL u = uri.toURL();
+ metadata.set(HTTP_TARGET_URL, u.toString());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY,
u.getFile());
+ }
+ } catch (MalformedURLException e) {
+ // swallow
+ }
+ }
+ HttpConnection connection = context.getConnection();
+ if (connection instanceof HttpInetConnection) {
+ try {
+ InetAddress inetAddress = ((HttpInetConnection)
connection).getRemoteAddress();
+ if (inetAddress != null) {
+ metadata.set(HTTP_TARGET_IP_ADDRESS,
inetAddress.getHostAddress());
+ }
+ } catch (ConnectionShutdownException e) {
+ LOG.warn("connection shutdown while trying to get target URL:
" + url);
+ }
+ }
+ }
+
+ private String responseToString(HttpResponse response) {
+ if (response.getEntity() == null) {
+ return "";
+ }
+ try (InputStream is = response.getEntity().getContent()) {
+ UnsynchronizedByteArrayOutputStream bos =
UnsynchronizedByteArrayOutputStream.builder().get();
+ IOUtils.copyLarge(is, bos, 0, config.getMaxErrMsgSize());
+ return bos.toString(StandardCharsets.UTF_8);
+ } catch (IOException e) {
+ LOG.warn("IOException trying to read error message", e);
+ return "";
+ } catch (NullPointerException e) {
+ return "";
+ } finally {
+ EntityUtils.consumeQuietly(response.getEntity());
+ }
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java.backup
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java.backup
new file mode 100644
index 000000000..d459b0093
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java.backup
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.atlassianjwt;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.InetAddress;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.nimbusds.jose.JOSEException;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
+import org.apache.http.ConnectionClosedException;
+import org.apache.http.Header;
+import org.apache.http.HttpConnection;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpInetConnection;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.protocol.HttpClientContext;
+import org.apache.http.impl.conn.ConnectionShutdownException;
+import org.apache.http.util.EntityUtils;
+import org.pf4j.Extension;
+
+import org.apache.tika.client.HttpClientFactory;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaTimeoutException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.pipes.api.fetcher.Fetcher;
+import org.apache.tika.pipes.api.fetcher.FetcherConfig;
+import
org.apache.tika.pipes.fetcher.atlassianjwt.config.AtlassianJwtFetcherConfig;
+import org.apache.tika.utils.StringUtils;
+
+@Extension
+@Slf4j
+public class AtlassianJwtFetcher implements Fetcher {
+ private final HttpClientFactory httpClientFactory = new
HttpClientFactory();
+ public static String HTTP_HEADER_PREFIX = "http-header:";
+ public static String HTTP_FETCH_PREFIX = "http-connection:";
+
+ public static Property HTTP_STATUS_CODE =
Property.externalInteger(HTTP_HEADER_PREFIX + "status-code");
+ public static Property HTTP_NUM_REDIRECTS =
Property.externalInteger(HTTP_FETCH_PREFIX + "num-redirects");
+ public static Property HTTP_TARGET_URL =
Property.externalText(HTTP_FETCH_PREFIX + "target-url");
+ public static Property HTTP_TARGET_IP_ADDRESS =
Property.externalText(HTTP_FETCH_PREFIX + "target-ip-address");
+ public static Property HTTP_FETCH_TRUNCATED =
Property.externalBoolean(HTTP_FETCH_PREFIX + "fetch-truncated");
+ public static Property HTTP_CONTENT_ENCODING =
Property.externalText(HTTP_HEADER_PREFIX + "content-encoding");
+ public static Property HTTP_CONTENT_TYPE =
Property.externalText(HTTP_HEADER_PREFIX + "content-type");
+
+ private static final String USER_AGENT = "User-Agent";
+
+ private HttpClient httpClient;
+ private HttpClient noCompressHttpClient;
+ private AtlassianJwtGenerator jwtGenerator;
+ private boolean isInit = false;
+
+ @Override
+ public InputStream fetch(FetcherConfig fetcherConfig, String fetchKey,
Map<String, Object> fetchMetadata, Map<String, Object> responseMetadata) {
+ try {
+ AtlassianJwtFetcherConfig atlassianJwtFetcherConfig =
(AtlassianJwtFetcherConfig) fetcherConfig;
+ initIfNeeded(atlassianJwtFetcherConfig);
+ HttpGet get = new HttpGet(fetchKey);
+ RequestConfig requestConfig = RequestConfig
+ .custom()
+
.setMaxRedirects(atlassianJwtFetcherConfig.getMaxRedirects())
+
.setRedirectsEnabled(atlassianJwtFetcherConfig.getMaxRedirects() > 0)
+ .build();
+ get.setConfig(requestConfig);
+ putAdditionalHeadersOnRequest(atlassianJwtFetcherConfig, get,
fetchKey);
+ return execute(get, atlassianJwtFetcherConfig, fetchMetadata,
httpClient, true);
+ } catch (TikaException | IOException | JOSEException |
URISyntaxException | NoSuchAlgorithmException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void putAdditionalHeadersOnRequest(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig, HttpGet httpGet, String url)
+ throws TikaException, JOSEException, URISyntaxException,
NoSuchAlgorithmException {
+
+ if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getUserAgent())) {
+ httpGet.setHeader(USER_AGENT,
atlassianJwtFetcherConfig.getUserAgent());
+ }
+ if (atlassianJwtFetcherConfig.getHttpRequestHeaders() != null) {
+ atlassianJwtFetcherConfig.getHttpRequestHeaders()
+ .forEach((header, values) -> {
+ for (String value : values) {
+ httpGet.addHeader(header, value);
+ }
+ });
+ }
+ if (jwtGenerator != null) {
+ String jwt = jwtGenerator.generateJwt("GET", url);
+ httpGet.setHeader("Authorization", "JWT " + jwt);
+ } else {
+ log.warn("No JWT generator available - authorization header not
set");
+ }
+ }
+
+ private InputStream execute(HttpGet get, AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig,
+ Map<String, Object> fetchMetadata, HttpClient
client,
+ boolean retryOnBadLength) throws IOException {
+ HttpClientContext context = HttpClientContext.create();
+ HttpResponse response = null;
+ final AtomicBoolean timeout = new AtomicBoolean(false);
+ Timer timer = null;
+ long overallTimeout = atlassianJwtFetcherConfig.getOverallTimeout() ==
null ? -1 : atlassianJwtFetcherConfig.getOverallTimeout();
+ try {
+ if (overallTimeout > -1) {
+ TimerTask task = new TimerTask() {
+ @Override
+ public void run() {
+ timeout.set(true);
+ if (get != null) {
+ get.abort();
+ }
+ }
+ };
+ timer = new Timer(false);
+ timer.schedule(task, overallTimeout);
+ }
+ response = client.execute(get, context);
+
+ updateMetadata(get.getURI().toString(), response, context,
fetchMetadata, atlassianJwtFetcherConfig);
+
+ int code = response.getStatusLine().getStatusCode();
+ log.info("Fetch id {} status code {}", get.getURI(), code);
+ if (code < 200 || code > 299) {
+ throw new IOException("bad status code: " + code + " :: " +
responseToString(atlassianJwtFetcherConfig, response));
+ }
+ try (InputStream is = response.getEntity().getContent()) {
+ return spool(atlassianJwtFetcherConfig, is, fetchMetadata);
+ }
+ } catch (ConnectionClosedException e) {
+ if (retryOnBadLength && e.getMessage() != null &&
e.getMessage().contains("Premature end of Content-Length delimited message")) {
+ log.warn("premature end of content-length delimited message;
retrying with content compression disabled for {}", get.getURI());
+ return execute(get, atlassianJwtFetcherConfig, fetchMetadata,
noCompressHttpClient, false);
+ }
+ throw e;
+ } catch (IOException e) {
+ if (timeout.get()) {
+ throw new TikaTimeoutException("Overall timeout after " +
overallTimeout + "ms");
+ } else {
+ throw e;
+ }
+ } finally {
+ if (timer != null) {
+ timer.cancel();
+ timer.purge();
+ }
+ if (response != null) {
+ EntityUtils.consumeQuietly(response.getEntity());
+ }
+ if (response instanceof CloseableHttpResponse) {
+ ((CloseableHttpResponse) response).close();
+ }
+ }
+ }
+
+ private InputStream spool(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig, InputStream content, Map<String, Object>
fetchMetadata) throws IOException {
+ long start = System.currentTimeMillis();
+ TemporaryResources tmp = new TemporaryResources();
+ Path tmpFile = tmp.createTempFile();
+ if (atlassianJwtFetcherConfig.getMaxSpoolSize() < 0) {
+ Files.copy(content, tmpFile, StandardCopyOption.REPLACE_EXISTING);
+ } else {
+ try (OutputStream os = Files.newOutputStream(tmpFile)) {
+ long totalRead = IOUtils.copyLarge(content, os, 0,
atlassianJwtFetcherConfig.getMaxSpoolSize());
+ if (totalRead == atlassianJwtFetcherConfig.getMaxSpoolSize()
&& content.read() != -1) {
+ fetchMetadata.put(HTTP_FETCH_TRUNCATED.getName(), "true");
+ }
+ }
+ }
+ long elapsed = System.currentTimeMillis() - start;
+ log.debug("took {} ms to copy to local tmp file", elapsed);
+ return TikaInputStream.get(tmpFile);
+ }
+
+ private void updateMetadata(String url, HttpResponse response,
HttpClientContext context,
+ Map<String, Object> fetchMetadata,
+ AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig) {
+ if (response == null) {
+ return;
+ }
+
+ if (response.getStatusLine() != null) {
+ fetchMetadata.put(HTTP_STATUS_CODE.getName(),
response.getStatusLine().getStatusCode());
+ }
+
+ HttpEntity entity = response.getEntity();
+ if (entity != null && entity.getContentEncoding() != null) {
+ fetchMetadata.put(HTTP_CONTENT_ENCODING.getName(),
entity.getContentEncoding().getValue());
+ }
+ if (entity != null && entity.getContentType() != null) {
+ fetchMetadata.put(HTTP_CONTENT_TYPE.getName(),
entity.getContentType().getValue());
+ }
+
+ if (atlassianJwtFetcherConfig.getHttpHeaders() != null) {
+ for (String h : atlassianJwtFetcherConfig.getHttpHeaders()) {
+ Header[] headers = response.getHeaders(h);
+ if (headers != null && headers.length > 0) {
+ String name = HTTP_HEADER_PREFIX + h;
+ List<String> headerList = new ArrayList<>();
+ fetchMetadata.put(name, headerList);
+ for (Header header : headers) {
+ headerList.add(header.getValue());
+ }
+ fetchMetadata.put(name, headerList);
+ }
+ }
+ }
+ List<URI> uriList = context.getRedirectLocations();
+ if (uriList == null) {
+ fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), 0);
+ fetchMetadata.put(HTTP_TARGET_URL.getName(), url);
+ } else {
+ fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), uriList.size());
+ try {
+ URI uri = uriList.get(uriList.size() - 1);
+ if (uri != null) {
+ URL u = uri.toURL();
+ fetchMetadata.put(HTTP_TARGET_URL.getName(), u.toString());
+ fetchMetadata.put(TikaCoreProperties.RESOURCE_NAME_KEY,
u.getFile());
+ }
+ } catch (MalformedURLException e) {
+ // swallow
+ }
+ }
+ HttpConnection connection = context.getConnection();
+ if (connection instanceof HttpInetConnection) {
+ try {
+ InetAddress inetAddress = ((HttpInetConnection)
connection).getRemoteAddress();
+ if (inetAddress != null) {
+ fetchMetadata.put(HTTP_TARGET_IP_ADDRESS.getName(),
inetAddress.getHostAddress());
+ }
+ } catch (ConnectionShutdownException e) {
+ log.warn("connection shutdown while trying to get target URL:
" + url);
+ }
+ }
+ }
+
+ private String responseToString(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig, HttpResponse response) {
+ if (response.getEntity() == null) {
+ return "";
+ }
+ try (InputStream is = response.getEntity().getContent()) {
+ UnsynchronizedByteArrayOutputStream bos =
UnsynchronizedByteArrayOutputStream.builder().get();
+ IOUtils.copyLarge(is, bos, 0,
atlassianJwtFetcherConfig.getMaxErrMsgSize());
+ return bos.toString(StandardCharsets.UTF_8);
+ } catch (IOException e) {
+ log.warn("IOException trying to read error message", e);
+ return "";
+ } catch (NullPointerException e) {
+ return "";
+ } finally {
+ EntityUtils.consumeQuietly(response.getEntity());
+ }
+ }
+
+ public void initIfNeeded(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig) throws TikaConfigException {
+ if (isInit) {
+ return;
+ }
+
+ log.info("AtlassianJwtFetcher initialization:");
+ log.info("Shared Secret: {}",
atlassianJwtFetcherConfig.getSharedSecret() != null ? "[PRESENT]" :
"[MISSING]");
+ log.info("Issuer: {}", atlassianJwtFetcherConfig.getIssuer());
+ log.info("Subject: {}", atlassianJwtFetcherConfig.getSubject());
+ log.info("JWT Expires In Seconds: {}",
atlassianJwtFetcherConfig.getJwtExpiresInSeconds());
+
+ checkInitialization(atlassianJwtFetcherConfig);
+
+ if (atlassianJwtFetcherConfig.getSocketTimeout() != null) {
+
httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getSocketTimeout());
+ }
+ if (atlassianJwtFetcherConfig.getRequestTimeout() != null) {
+
httpClientFactory.setRequestTimeout(atlassianJwtFetcherConfig.getRequestTimeout());
+ }
+ if (atlassianJwtFetcherConfig.getConnectTimeout() != null) {
+
httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getConnectTimeout());
+ }
+ if (atlassianJwtFetcherConfig.getMaxConnections() != null) {
+
httpClientFactory.setMaxConnections(atlassianJwtFetcherConfig.getMaxConnections());
+ }
+ if (atlassianJwtFetcherConfig.getMaxConnectionsPerRoute() != null) {
+
httpClientFactory.setMaxConnectionsPerRoute(atlassianJwtFetcherConfig.getMaxConnectionsPerRoute());
+ }
+
+ httpClient = httpClientFactory.build();
+ HttpClientFactory cp = httpClientFactory.copy();
+ cp.setDisableContentCompression(true);
+ noCompressHttpClient = cp.build();
+
+ if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret())
&&
+ !StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) {
+ jwtGenerator = new AtlassianJwtGenerator(
+ atlassianJwtFetcherConfig.getSharedSecret(),
+ atlassianJwtFetcherConfig.getIssuer(),
+ atlassianJwtFetcherConfig.getSubject(),
+ atlassianJwtFetcherConfig.getJwtExpiresInSeconds()
+ );
+ } else {
+ log.warn("JWT generator not created. missing required
configuration");
+ }
+
+ isInit = true;
+ }
+
+ public void checkInitialization(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig) throws TikaConfigException {
+ if (StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret())) {
+ throw new TikaConfigException("Atlassian JWT Fetcher requires a
shared secret");
+ }
+ if (StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) {
+ throw new TikaConfigException("Atlassian JWT Fetcher requires an
issuer");
+ }
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherFactory.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherFactory.java
new file mode 100644
index 000000000..728cb7b6f
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherFactory.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.atlassianjwt;
+
+import java.io.IOException;
+
+import org.pf4j.Extension;
+
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.pipes.api.fetcher.Fetcher;
+import org.apache.tika.pipes.api.fetcher.FetcherFactory;
+import org.apache.tika.plugins.ExtensionConfig;
+
+/**
+ * Factory for creating Atlassian JWT fetchers.
+ *
+ * <p>Example JSON configuration:
+ * <pre>
+ * "fetchers": {
+ * "atlassian-jwt-fetcher": {
+ * "my-atlassian-fetcher": {
+ * "sharedSecret": "your-shared-secret",
+ * "issuer": "your-app-key",
+ * "connectTimeout": 30000,
+ * "socketTimeout": 120000
+ * }
+ * }
+ * }
+ * </pre>
+ */
+@Extension
+public class AtlassianJwtFetcherFactory implements FetcherFactory {
+ private static final String NAME = "atlassian-jwt-fetcher";
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public Fetcher buildExtension(ExtensionConfig extensionConfig) throws
IOException, TikaConfigException {
+ return AtlassianJwtFetcher.build(extensionConfig);
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherPlugin.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherPlugin.java
new file mode 100644
index 000000000..849436d93
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherPlugin.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.atlassianjwt;
+
+import org.pf4j.Plugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class AtlassianJwtFetcherPlugin extends Plugin {
+ private static final Logger LOG =
LoggerFactory.getLogger(AtlassianJwtFetcherPlugin.class);
+
+ @Override
+ public void start() {
+ LOG.info("Starting Atlassian JWT Fetcher Plugin");
+ super.start();
+ }
+
+ @Override
+ public void stop() {
+ LOG.info("Stopping Atlassian JWT Fetcher Plugin");
+ super.stop();
+ }
+
+ @Override
+ public void delete() {
+ LOG.info("Deleting Atlassian JWT Fetcher Plugin");
+ super.delete();
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtGenerator.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtGenerator.java
new file mode 100644
index 000000000..bacd905e1
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtGenerator.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.atlassianjwt;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import com.nimbusds.jose.JOSEException;
+import com.nimbusds.jose.JOSEObjectType;
+import com.nimbusds.jose.JWSAlgorithm;
+import com.nimbusds.jose.JWSHeader;
+import com.nimbusds.jose.JWSSigner;
+import com.nimbusds.jose.crypto.MACSigner;
+import com.nimbusds.jwt.JWTClaimsSet;
+import com.nimbusds.jwt.SignedJWT;
+
+public class AtlassianJwtGenerator {
+ private final String sharedSecret;
+ private final String issuer;
+ private final String subject;
+ private final int expiresInSeconds;
+
+ public AtlassianJwtGenerator(String sharedSecret, String issuer, String
subject, int expiresInSeconds) {
+ this.sharedSecret = sharedSecret;
+ this.issuer = issuer;
+ this.subject = subject;
+ this.expiresInSeconds = expiresInSeconds;
+ }
+
+ public String generateJwt(String method, String url) throws JOSEException,
URISyntaxException, NoSuchAlgorithmException {
+ String qsh = generateQueryStringHash(method, url);
+
+ JWTClaimsSet.Builder claimsBuilder = new JWTClaimsSet.Builder()
+ .issuer(issuer)
+ .issueTime(Date.from(Instant.now()))
+ .expirationTime(Date.from(Instant.now().plus(expiresInSeconds,
ChronoUnit.SECONDS)))
+ .claim("qsh", qsh);
+
+ // Only add subject if it's not null or empty
+ if (subject != null && !subject.trim().isEmpty()) {
+ claimsBuilder.subject(subject);
+ }
+
+ JWTClaimsSet claimsSet = claimsBuilder.build();
+
+ JWSSigner signer = new
MACSigner(sharedSecret.getBytes(StandardCharsets.UTF_8));
+ JWSHeader header = new JWSHeader.Builder(JWSAlgorithm.HS256)
+ .type(JOSEObjectType.JWT)
+ .build();
+ SignedJWT signedJWT = new SignedJWT(header, claimsSet);
+ signedJWT.sign(signer);
+
+ String jwt = signedJWT.serialize();
+
+ return jwt;
+ }
+
+ private String generateQueryStringHash(String method, String url) throws
URISyntaxException, NoSuchAlgorithmException {
+ URI uri = new URI(url);
+ String canonicalRequest = createCanonicalRequestString(method, uri);
+
+ MessageDigest digest = MessageDigest.getInstance("SHA-256");
+ byte[] hash =
digest.digest(canonicalRequest.getBytes(StandardCharsets.UTF_8));
+
+ StringBuilder hexString = new StringBuilder();
+ for (byte b : hash) {
+ String hex = Integer.toHexString(0xff & b);
+ if (hex.length() == 1) {
+ hexString.append('0');
+ }
+ hexString.append(hex);
+ }
+
+ String qsh = hexString.toString();
+
+ return qsh;
+ }
+
+ private String createCanonicalRequestString(String httpMethod, URI url) {
+ String urlPath = url.getRawPath();
+ if (urlPath == null) {
+ urlPath = "/";
+ }
+
+ // Split on "?" and take first part
+ String[] pathParts = urlPath.split("\\?");
+ urlPath = pathParts[0];
+
+ // Build path: ensure leading slash, trim trailing slashes, decode &
with %26
+ String path = "/" + urlPath.replaceAll("^/+", "").replaceAll("/+$",
"").replace("&", "%26");
+ if (path.equals("//")) {
+ path = "/";
+ }
+
+ // Confluence paths are prefixed with "/wiki" however that prefix
should not be used
+ // for calculating the canonical so we strip it off in case it is
present
+ if (path.startsWith("/wiki")) {
+ path = path.substring(5); // Remove "/wiki"
+ if (path.isEmpty()) {
+ path = "/";
+ }
+ }
+
+ String canonicalQueryString =
generateCanonicalQueryString(url.getQuery());
+ String canonicalRequest = httpMethod.toUpperCase(Locale.ROOT) + "&" +
path + "&" + canonicalQueryString;
+
+ return canonicalRequest;
+ }
+
+ private String generateCanonicalQueryString(String query) {
+ if (query == null || query.isEmpty()) {
+ return "";
+ }
+
+ // Query parameters go into a map for uniqueness + further iteration
+ Map<String, List<String>> queryParams = new HashMap<>();
+ String[] params = query.split("&");
+
+ for (String param : params) {
+ String[] keyValue = param.split("=", 2);
+ String key = keyValue[0];
+ String value = keyValue.length > 1 ? keyValue[1] : "";
+
+ // Skip jwt param, unneeded but present
+ if ("jwt".equals(key)) {
+ continue;
+ }
+
+ queryParams.computeIfAbsent(key, k -> new
ArrayList<>()).add(value);
+ }
+
+ List<String> canonicalParams = new ArrayList<>();
+
+ for (Map.Entry<String, List<String>> entry : queryParams.entrySet()) {
+ String key = URLEncoder.encode(entry.getKey(),
StandardCharsets.UTF_8);
+ List<String> values = entry.getValue();
+
+ // URL encode all values
+ List<String> encodedValues = values.stream()
+ .map(v -> URLEncoder.encode(v, StandardCharsets.UTF_8))
+ .collect(Collectors.toList());
+
+ // Query parameter values need to be sorted in alphabetical order
+ Collections.sort(encodedValues);
+
+ // Individual parameter values are comma separated
+ String joinedValues = String.join(",", encodedValues);
+ String pair = key + "=" + joinedValues;
+ // Decode + -> %20
+ pair = pair.replace("+", "%20");
+
+ canonicalParams.add(pair);
+ }
+
+ // And the whole collection must be sorted
+ //
(https://developer.atlassian.com/cloud/bitbucket/query-string-hash/#sort-query-parameter-value-lists)
+ Collections.sort(canonicalParams);
+
+ // And finally rejoined to create the canonical query string
+ return String.join("&", canonicalParams);
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/config/AtlassianJwtFetcherConfig.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/config/AtlassianJwtFetcherConfig.java
new file mode 100644
index 000000000..d8f969263
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/config/AtlassianJwtFetcherConfig.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.fetcher.atlassianjwt.config;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.apache.tika.exception.TikaConfigException;
+
+public class AtlassianJwtFetcherConfig {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static AtlassianJwtFetcherConfig load(final String json)
+ throws TikaConfigException {
+ try {
+ return OBJECT_MAPPER.readValue(json,
AtlassianJwtFetcherConfig.class);
+ } catch (JsonProcessingException e) {
+ throw new TikaConfigException(
+ "Failed to parse AtlassianJwtFetcherConfig from JSON", e);
+ }
+ }
+
+ private Integer maxConnectionsPerRoute = 1000;
+ private Integer maxConnections = 2000;
+ private Integer requestTimeout = 120000;
+ private Integer connectTimeout = 120000;
+ private Integer socketTimeout = 120000;
+ private Long maxSpoolSize = -1L;
+ private Integer maxRedirects = 0;
+ private List<String> httpHeaders = new ArrayList<>();
+ private Map<String, List<String>> httpRequestHeaders = new
LinkedHashMap<>();
+ private Long overallTimeout = 120000L;
+ private Integer maxErrMsgSize = 10000000;
+ private String userAgent;
+
+ private String sharedSecret;
+ private String issuer;
+ private String subject;
+ private Integer jwtExpiresInSeconds = 3600;
+
+ public Integer getMaxConnectionsPerRoute() {
+ return maxConnectionsPerRoute;
+ }
+
+ public AtlassianJwtFetcherConfig setMaxConnectionsPerRoute(Integer
maxConnectionsPerRoute) {
+ this.maxConnectionsPerRoute = maxConnectionsPerRoute;
+ return this;
+ }
+
+ public Integer getMaxConnections() {
+ return maxConnections;
+ }
+
+ public AtlassianJwtFetcherConfig setMaxConnections(Integer maxConnections)
{
+ this.maxConnections = maxConnections;
+ return this;
+ }
+
+ public Integer getRequestTimeout() {
+ return requestTimeout;
+ }
+
+ public AtlassianJwtFetcherConfig setRequestTimeout(Integer requestTimeout)
{
+ this.requestTimeout = requestTimeout;
+ return this;
+ }
+
+ public Integer getConnectTimeout() {
+ return connectTimeout;
+ }
+
+ public AtlassianJwtFetcherConfig setConnectTimeout(Integer connectTimeout)
{
+ this.connectTimeout = connectTimeout;
+ return this;
+ }
+
+ public Integer getSocketTimeout() {
+ return socketTimeout;
+ }
+
+ public AtlassianJwtFetcherConfig setSocketTimeout(Integer socketTimeout) {
+ this.socketTimeout = socketTimeout;
+ return this;
+ }
+
+ public Long getMaxSpoolSize() {
+ return maxSpoolSize;
+ }
+
+ public AtlassianJwtFetcherConfig setMaxSpoolSize(Long maxSpoolSize) {
+ this.maxSpoolSize = maxSpoolSize;
+ return this;
+ }
+
+ public Integer getMaxRedirects() {
+ return maxRedirects;
+ }
+
+ public AtlassianJwtFetcherConfig setMaxRedirects(Integer maxRedirects) {
+ this.maxRedirects = maxRedirects;
+ return this;
+ }
+
+ public List<String> getHttpHeaders() {
+ return httpHeaders;
+ }
+
+ public AtlassianJwtFetcherConfig setHttpHeaders(List<String> httpHeaders) {
+ this.httpHeaders = httpHeaders;
+ return this;
+ }
+
+ public Map<String, List<String>> getHttpRequestHeaders() {
+ return httpRequestHeaders;
+ }
+
+ public AtlassianJwtFetcherConfig setHttpRequestHeaders(
+ Map<String, List<String>> httpRequestHeaders) {
+ this.httpRequestHeaders = httpRequestHeaders;
+ return this;
+ }
+
+ public Long getOverallTimeout() {
+ return overallTimeout;
+ }
+
+ public AtlassianJwtFetcherConfig setOverallTimeout(Long overallTimeout) {
+ this.overallTimeout = overallTimeout;
+ return this;
+ }
+
+ public Integer getMaxErrMsgSize() {
+ return maxErrMsgSize;
+ }
+
+ public AtlassianJwtFetcherConfig setMaxErrMsgSize(Integer maxErrMsgSize) {
+ this.maxErrMsgSize = maxErrMsgSize;
+ return this;
+ }
+
+ public String getUserAgent() {
+ return userAgent;
+ }
+
+ public AtlassianJwtFetcherConfig setUserAgent(String userAgent) {
+ this.userAgent = userAgent;
+ return this;
+ }
+
+ public String getSharedSecret() {
+ return sharedSecret;
+ }
+
+ public AtlassianJwtFetcherConfig setSharedSecret(String sharedSecret) {
+ this.sharedSecret = sharedSecret;
+ return this;
+ }
+
+ public String getIssuer() {
+ return issuer;
+ }
+
+ public AtlassianJwtFetcherConfig setIssuer(String issuer) {
+ this.issuer = issuer;
+ return this;
+ }
+
+ public String getSubject() {
+ return subject;
+ }
+
+ public AtlassianJwtFetcherConfig setSubject(String subject) {
+ this.subject = subject;
+ return this;
+ }
+
+ public Integer getJwtExpiresInSeconds() {
+ return jwtExpiresInSeconds;
+ }
+
+ public AtlassianJwtFetcherConfig setJwtExpiresInSeconds(Integer
jwtExpiresInSeconds) {
+ this.jwtExpiresInSeconds = jwtExpiresInSeconds;
+ return this;
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/plugin/atlassianjwt/AtlassianJwtPipesPlugin.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/plugin/atlassianjwt/AtlassianJwtPipesPlugin.java
new file mode 100644
index 000000000..f5914c726
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/plugin/atlassianjwt/AtlassianJwtPipesPlugin.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.plugin.atlassianjwt;
+
+import org.pf4j.Plugin;
+import org.pf4j.PluginWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class AtlassianJwtPipesPlugin extends Plugin {
+ private static final Logger LOG =
LoggerFactory.getLogger(AtlassianJwtPipesPlugin.class);
+
+ public AtlassianJwtPipesPlugin(PluginWrapper wrapper) {
+ super(wrapper);
+ }
+
+ @Override
+ public void start() {
+ LOG.info("Starting Atlassian JWT Pipes Plugin");
+ super.start();
+ }
+
+ @Override
+ public void stop() {
+ LOG.info("Stopping Atlassian JWT Pipes Plugin");
+ super.stop();
+ }
+
+ @Override
+ public void delete() {
+ LOG.info("Deleting Atlassian JWT Pipes Plugin");
+ super.delete();
+ }
+}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/resources/plugin.properties
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/resources/plugin.properties
new file mode 100644
index 000000000..fd674daec
--- /dev/null
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/resources/plugin.properties
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin.id=tika-pipes-atlassian-jwt-plugin
+plugin.class=org.apache.tika.pipes.plugin.atlassianjwt.AtlassianJwtPipesPlugin
+plugin.version=4.0.0-SNAPSHOT
+plugin.provider=Apache Tika
+plugin.description=Pipes for Atlassian products with JWT authentication