This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch TIKA-4604-atlassian-fetcher in repository https://gitbox.apache.org/repos/asf/tika.git
commit f982d82ad53768fcd8c9a14d733d86ade8449ec2 Author: Nicholas DiPiazza <[email protected]> AuthorDate: Mon Dec 29 05:26:24 2025 -0600 TIKA-4604: WIP - Add Atlassian JWT fetcher plugin structure - Created plugin directory structure and build files - Added pom.xml with dependencies - Created plugin.properties and assembly.xml - Added AtlassianJwtPipesPlugin and AtlassianJwtFetcherFactory - Refactored AtlassianJwtFetcherConfig to Apache Tika pattern - Copied AtlassianJwtFetcher and AtlassianJwtGenerator (needs refactoring) - Updated parent pom.xml to include new module TODO: Complete refactoring of AtlassianJwtFetcher class: - Extend AbstractTikaExtension - Add static build() method - Change fetch() signature to use Metadata instead of Maps - Update all method signatures - Test compilation --- tika-pipes/tika-pipes-plugins/pom.xml | 1 + .../tika-pipes-atlassian-jwt/pom.xml | 130 ++++++++ .../src/main/assembly/assembly.xml | 55 ++++ .../fetcher/atlassianjwt/AtlassianJwtFetcher.java | 351 +++++++++++++++++++++ .../atlassianjwt/AtlassianJwtFetcherFactory.java | 58 ++++ .../atlassianjwt/AtlassianJwtFetcherPlugin.java | 43 +++ .../atlassianjwt/AtlassianJwtGenerator.java | 189 +++++++++++ .../config/AtlassianJwtFetcherConfig.java | 205 ++++++++++++ .../atlassianjwt/AtlassianJwtPipesPlugin.java | 48 +++ .../src/main/resources/plugin.properties | 21 ++ 10 files changed, 1101 insertions(+) diff --git a/tika-pipes/tika-pipes-plugins/pom.xml b/tika-pipes/tika-pipes-plugins/pom.xml index d33378351..16561b5d0 100644 --- a/tika-pipes/tika-pipes-plugins/pom.xml +++ b/tika-pipes/tika-pipes-plugins/pom.xml @@ -32,6 +32,7 @@ <packaging>pom</packaging> <modules> + <module>tika-pipes-atlassian-jwt</module> <module>tika-pipes-az-blob</module> <module>tika-pipes-csv</module> <module>tika-pipes-file-system</module> diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/pom.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/pom.xml new file mode 100644 index 000000000..da622ec8d --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/pom.xml @@ -0,0 +1,130 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>tika-pipes-plugins</artifactId> + <groupId>org.apache.tika</groupId> + <version>4.0.0-SNAPSHOT</version> + </parent> + <modelVersion>4.0.0</modelVersion> + + <artifactId>tika-pipes-atlassian-jwt</artifactId> + <name>Apache Tika Pipes Atlassian JWT</name> + <properties> + <!-- Never include the core artifacts in your plugin lib directory. If you do, it will cause the classloading + to get messed up when finding your plugins. --> + <plugin.excluded.artifactIds>tika-core,tika-pipes-api,tika-serialization,tika-plugins-core</plugin.excluded.artifactIds> + <plugin.excluded.groupIds>org.apache.logging.log4j,org.slf4j</plugin.excluded.groupIds> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-slf4j2-impl</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>tika-pipes-api</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>tika-httpclient-commons</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>com.nimbusds</groupId> + <artifactId>nimbus-jose-jwt</artifactId> + </dependency> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>tika-pipes-core</artifactId> + <version>${project.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>copy-dependencies</id> + <phase>package</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/lib</outputDirectory> + <includeScope>compile</includeScope> + <excludeArtifactIds>${plugin.excluded.artifactIds}</excludeArtifactIds> + <excludeGroupIds>${plugin.excluded.groupIds}</excludeGroupIds> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <configuration> + <descriptors> + <descriptor>src/main/assembly/assembly.xml</descriptor> + </descriptors> + <appendAssemblyId>false</appendAssemblyId> + </configuration> + <executions> + <execution> + <id>make-assembly</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/assembly/assembly.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/assembly/assembly.xml new file mode 100644 index 000000000..ea0f8b4a1 --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/assembly/assembly.xml @@ -0,0 +1,55 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns="http://maven.apache.org/ASSEMBLY/2.0.0" + xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 + http://maven.apache.org/xsd/assembly-2.0.0.xsd"> + <id>dependencies-zip</id> + <formats> + <format>zip</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + <fileSets> + <fileSet> + <directory>${project.build.directory}/lib</directory> + <outputDirectory>/lib</outputDirectory> + </fileSet> + <fileSet> + <directory>${project.build.directory}</directory> + <outputDirectory>/lib</outputDirectory> + <includes> + <include>${project.artifactId}-${project.version}.jar</include> + </includes> + </fileSet> + <fileSet> + <directory>${project.build.directory}</directory> + <outputDirectory>/</outputDirectory> + <includes> + <include>classes/META-INF/extensions.idx</include> + <include>classes/META-INF/MANIFEST.MF</include> + </includes> + </fileSet> + <fileSet> + <directory>${project.basedir}/src/main/resources</directory> + <outputDirectory>/</outputDirectory> + <includes> + <include>plugin.properties</include> + </includes> + </fileSet> + </fileSets> +</assembly> diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java new file mode 100644 index 000000000..d459b0093 --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java @@ -0,0 +1,351 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.atlassianjwt; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetAddress; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Timer; +import java.util.TimerTask; +import java.util.concurrent.atomic.AtomicBoolean; + +import com.nimbusds.jose.JOSEException; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; +import org.apache.http.ConnectionClosedException; +import org.apache.http.Header; +import org.apache.http.HttpConnection; +import org.apache.http.HttpEntity; +import org.apache.http.HttpInetConnection; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.impl.conn.ConnectionShutdownException; +import org.apache.http.util.EntityUtils; +import org.pf4j.Extension; + +import org.apache.tika.client.HttpClientFactory; +import org.apache.tika.exception.TikaConfigException; +import org.apache.tika.exception.TikaException; +import org.apache.tika.exception.TikaTimeoutException; +import org.apache.tika.io.TemporaryResources; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Property; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.pipes.api.fetcher.Fetcher; +import org.apache.tika.pipes.api.fetcher.FetcherConfig; +import org.apache.tika.pipes.fetcher.atlassianjwt.config.AtlassianJwtFetcherConfig; +import org.apache.tika.utils.StringUtils; + +@Extension +@Slf4j +public class AtlassianJwtFetcher implements Fetcher { + private final HttpClientFactory httpClientFactory = new HttpClientFactory(); + public static String HTTP_HEADER_PREFIX = "http-header:"; + public static String HTTP_FETCH_PREFIX = "http-connection:"; + + public static Property HTTP_STATUS_CODE = Property.externalInteger(HTTP_HEADER_PREFIX + "status-code"); + public static Property HTTP_NUM_REDIRECTS = Property.externalInteger(HTTP_FETCH_PREFIX + "num-redirects"); + public static Property HTTP_TARGET_URL = Property.externalText(HTTP_FETCH_PREFIX + "target-url"); + public static Property HTTP_TARGET_IP_ADDRESS = Property.externalText(HTTP_FETCH_PREFIX + "target-ip-address"); + public static Property HTTP_FETCH_TRUNCATED = Property.externalBoolean(HTTP_FETCH_PREFIX + "fetch-truncated"); + public static Property HTTP_CONTENT_ENCODING = Property.externalText(HTTP_HEADER_PREFIX + "content-encoding"); + public static Property HTTP_CONTENT_TYPE = Property.externalText(HTTP_HEADER_PREFIX + "content-type"); + + private static final String USER_AGENT = "User-Agent"; + + private HttpClient httpClient; + private HttpClient noCompressHttpClient; + private AtlassianJwtGenerator jwtGenerator; + private boolean isInit = false; + + @Override + public InputStream fetch(FetcherConfig fetcherConfig, String fetchKey, Map<String, Object> fetchMetadata, Map<String, Object> responseMetadata) { + try { + AtlassianJwtFetcherConfig atlassianJwtFetcherConfig = (AtlassianJwtFetcherConfig) fetcherConfig; + initIfNeeded(atlassianJwtFetcherConfig); + HttpGet get = new HttpGet(fetchKey); + RequestConfig requestConfig = RequestConfig + .custom() + .setMaxRedirects(atlassianJwtFetcherConfig.getMaxRedirects()) + .setRedirectsEnabled(atlassianJwtFetcherConfig.getMaxRedirects() > 0) + .build(); + get.setConfig(requestConfig); + putAdditionalHeadersOnRequest(atlassianJwtFetcherConfig, get, fetchKey); + return execute(get, atlassianJwtFetcherConfig, fetchMetadata, httpClient, true); + } catch (TikaException | IOException | JOSEException | URISyntaxException | NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + + private void putAdditionalHeadersOnRequest(AtlassianJwtFetcherConfig atlassianJwtFetcherConfig, HttpGet httpGet, String url) + throws TikaException, JOSEException, URISyntaxException, NoSuchAlgorithmException { + + if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getUserAgent())) { + httpGet.setHeader(USER_AGENT, atlassianJwtFetcherConfig.getUserAgent()); + } + if (atlassianJwtFetcherConfig.getHttpRequestHeaders() != null) { + atlassianJwtFetcherConfig.getHttpRequestHeaders() + .forEach((header, values) -> { + for (String value : values) { + httpGet.addHeader(header, value); + } + }); + } + if (jwtGenerator != null) { + String jwt = jwtGenerator.generateJwt("GET", url); + httpGet.setHeader("Authorization", "JWT " + jwt); + } else { + log.warn("No JWT generator available - authorization header not set"); + } + } + + private InputStream execute(HttpGet get, AtlassianJwtFetcherConfig atlassianJwtFetcherConfig, + Map<String, Object> fetchMetadata, HttpClient client, + boolean retryOnBadLength) throws IOException { + HttpClientContext context = HttpClientContext.create(); + HttpResponse response = null; + final AtomicBoolean timeout = new AtomicBoolean(false); + Timer timer = null; + long overallTimeout = atlassianJwtFetcherConfig.getOverallTimeout() == null ? -1 : atlassianJwtFetcherConfig.getOverallTimeout(); + try { + if (overallTimeout > -1) { + TimerTask task = new TimerTask() { + @Override + public void run() { + timeout.set(true); + if (get != null) { + get.abort(); + } + } + }; + timer = new Timer(false); + timer.schedule(task, overallTimeout); + } + response = client.execute(get, context); + + updateMetadata(get.getURI().toString(), response, context, fetchMetadata, atlassianJwtFetcherConfig); + + int code = response.getStatusLine().getStatusCode(); + log.info("Fetch id {} status code {}", get.getURI(), code); + if (code < 200 || code > 299) { + throw new IOException("bad status code: " + code + " :: " + responseToString(atlassianJwtFetcherConfig, response)); + } + try (InputStream is = response.getEntity().getContent()) { + return spool(atlassianJwtFetcherConfig, is, fetchMetadata); + } + } catch (ConnectionClosedException e) { + if (retryOnBadLength && e.getMessage() != null && e.getMessage().contains("Premature end of Content-Length delimited message")) { + log.warn("premature end of content-length delimited message; retrying with content compression disabled for {}", get.getURI()); + return execute(get, atlassianJwtFetcherConfig, fetchMetadata, noCompressHttpClient, false); + } + throw e; + } catch (IOException e) { + if (timeout.get()) { + throw new TikaTimeoutException("Overall timeout after " + overallTimeout + "ms"); + } else { + throw e; + } + } finally { + if (timer != null) { + timer.cancel(); + timer.purge(); + } + if (response != null) { + EntityUtils.consumeQuietly(response.getEntity()); + } + if (response instanceof CloseableHttpResponse) { + ((CloseableHttpResponse) response).close(); + } + } + } + + private InputStream spool(AtlassianJwtFetcherConfig atlassianJwtFetcherConfig, InputStream content, Map<String, Object> fetchMetadata) throws IOException { + long start = System.currentTimeMillis(); + TemporaryResources tmp = new TemporaryResources(); + Path tmpFile = tmp.createTempFile(); + if (atlassianJwtFetcherConfig.getMaxSpoolSize() < 0) { + Files.copy(content, tmpFile, StandardCopyOption.REPLACE_EXISTING); + } else { + try (OutputStream os = Files.newOutputStream(tmpFile)) { + long totalRead = IOUtils.copyLarge(content, os, 0, atlassianJwtFetcherConfig.getMaxSpoolSize()); + if (totalRead == atlassianJwtFetcherConfig.getMaxSpoolSize() && content.read() != -1) { + fetchMetadata.put(HTTP_FETCH_TRUNCATED.getName(), "true"); + } + } + } + long elapsed = System.currentTimeMillis() - start; + log.debug("took {} ms to copy to local tmp file", elapsed); + return TikaInputStream.get(tmpFile); + } + + private void updateMetadata(String url, HttpResponse response, HttpClientContext context, + Map<String, Object> fetchMetadata, + AtlassianJwtFetcherConfig atlassianJwtFetcherConfig) { + if (response == null) { + return; + } + + if (response.getStatusLine() != null) { + fetchMetadata.put(HTTP_STATUS_CODE.getName(), response.getStatusLine().getStatusCode()); + } + + HttpEntity entity = response.getEntity(); + if (entity != null && entity.getContentEncoding() != null) { + fetchMetadata.put(HTTP_CONTENT_ENCODING.getName(), entity.getContentEncoding().getValue()); + } + if (entity != null && entity.getContentType() != null) { + fetchMetadata.put(HTTP_CONTENT_TYPE.getName(), entity.getContentType().getValue()); + } + + if (atlassianJwtFetcherConfig.getHttpHeaders() != null) { + for (String h : atlassianJwtFetcherConfig.getHttpHeaders()) { + Header[] headers = response.getHeaders(h); + if (headers != null && headers.length > 0) { + String name = HTTP_HEADER_PREFIX + h; + List<String> headerList = new ArrayList<>(); + fetchMetadata.put(name, headerList); + for (Header header : headers) { + headerList.add(header.getValue()); + } + fetchMetadata.put(name, headerList); + } + } + } + List<URI> uriList = context.getRedirectLocations(); + if (uriList == null) { + fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), 0); + fetchMetadata.put(HTTP_TARGET_URL.getName(), url); + } else { + fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), uriList.size()); + try { + URI uri = uriList.get(uriList.size() - 1); + if (uri != null) { + URL u = uri.toURL(); + fetchMetadata.put(HTTP_TARGET_URL.getName(), u.toString()); + fetchMetadata.put(TikaCoreProperties.RESOURCE_NAME_KEY, u.getFile()); + } + } catch (MalformedURLException e) { + // swallow + } + } + HttpConnection connection = context.getConnection(); + if (connection instanceof HttpInetConnection) { + try { + InetAddress inetAddress = ((HttpInetConnection) connection).getRemoteAddress(); + if (inetAddress != null) { + fetchMetadata.put(HTTP_TARGET_IP_ADDRESS.getName(), inetAddress.getHostAddress()); + } + } catch (ConnectionShutdownException e) { + log.warn("connection shutdown while trying to get target URL: " + url); + } + } + } + + private String responseToString(AtlassianJwtFetcherConfig atlassianJwtFetcherConfig, HttpResponse response) { + if (response.getEntity() == null) { + return ""; + } + try (InputStream is = response.getEntity().getContent()) { + UnsynchronizedByteArrayOutputStream bos = UnsynchronizedByteArrayOutputStream.builder().get(); + IOUtils.copyLarge(is, bos, 0, atlassianJwtFetcherConfig.getMaxErrMsgSize()); + return bos.toString(StandardCharsets.UTF_8); + } catch (IOException e) { + log.warn("IOException trying to read error message", e); + return ""; + } catch (NullPointerException e) { + return ""; + } finally { + EntityUtils.consumeQuietly(response.getEntity()); + } + } + + public void initIfNeeded(AtlassianJwtFetcherConfig atlassianJwtFetcherConfig) throws TikaConfigException { + if (isInit) { + return; + } + + log.info("AtlassianJwtFetcher initialization:"); + log.info("Shared Secret: {}", atlassianJwtFetcherConfig.getSharedSecret() != null ? "[PRESENT]" : "[MISSING]"); + log.info("Issuer: {}", atlassianJwtFetcherConfig.getIssuer()); + log.info("Subject: {}", atlassianJwtFetcherConfig.getSubject()); + log.info("JWT Expires In Seconds: {}", atlassianJwtFetcherConfig.getJwtExpiresInSeconds()); + + checkInitialization(atlassianJwtFetcherConfig); + + if (atlassianJwtFetcherConfig.getSocketTimeout() != null) { + httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getSocketTimeout()); + } + if (atlassianJwtFetcherConfig.getRequestTimeout() != null) { + httpClientFactory.setRequestTimeout(atlassianJwtFetcherConfig.getRequestTimeout()); + } + if (atlassianJwtFetcherConfig.getConnectTimeout() != null) { + httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getConnectTimeout()); + } + if (atlassianJwtFetcherConfig.getMaxConnections() != null) { + httpClientFactory.setMaxConnections(atlassianJwtFetcherConfig.getMaxConnections()); + } + if (atlassianJwtFetcherConfig.getMaxConnectionsPerRoute() != null) { + httpClientFactory.setMaxConnectionsPerRoute(atlassianJwtFetcherConfig.getMaxConnectionsPerRoute()); + } + + httpClient = httpClientFactory.build(); + HttpClientFactory cp = httpClientFactory.copy(); + cp.setDisableContentCompression(true); + noCompressHttpClient = cp.build(); + + if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret()) && + !StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) { + jwtGenerator = new AtlassianJwtGenerator( + atlassianJwtFetcherConfig.getSharedSecret(), + atlassianJwtFetcherConfig.getIssuer(), + atlassianJwtFetcherConfig.getSubject(), + atlassianJwtFetcherConfig.getJwtExpiresInSeconds() + ); + } else { + log.warn("JWT generator not created. missing required configuration"); + } + + isInit = true; + } + + public void checkInitialization(AtlassianJwtFetcherConfig atlassianJwtFetcherConfig) throws TikaConfigException { + if (StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret())) { + throw new TikaConfigException("Atlassian JWT Fetcher requires a shared secret"); + } + if (StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) { + throw new TikaConfigException("Atlassian JWT Fetcher requires an issuer"); + } + } +} diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherFactory.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherFactory.java new file mode 100644 index 000000000..728cb7b6f --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherFactory.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.atlassianjwt; + +import java.io.IOException; + +import org.pf4j.Extension; + +import org.apache.tika.exception.TikaConfigException; +import org.apache.tika.pipes.api.fetcher.Fetcher; +import org.apache.tika.pipes.api.fetcher.FetcherFactory; +import org.apache.tika.plugins.ExtensionConfig; + +/** + * Factory for creating Atlassian JWT fetchers. + * + * <p>Example JSON configuration: + * <pre> + * "fetchers": { + * "atlassian-jwt-fetcher": { + * "my-atlassian-fetcher": { + * "sharedSecret": "your-shared-secret", + * "issuer": "your-app-key", + * "connectTimeout": 30000, + * "socketTimeout": 120000 + * } + * } + * } + * </pre> + */ +@Extension +public class AtlassianJwtFetcherFactory implements FetcherFactory { + private static final String NAME = "atlassian-jwt-fetcher"; + + @Override + public String getName() { + return NAME; + } + + @Override + public Fetcher buildExtension(ExtensionConfig extensionConfig) throws IOException, TikaConfigException { + return AtlassianJwtFetcher.build(extensionConfig); + } +} diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherPlugin.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherPlugin.java new file mode 100644 index 000000000..849436d93 --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcherPlugin.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.atlassianjwt; + +import org.pf4j.Plugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AtlassianJwtFetcherPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(AtlassianJwtFetcherPlugin.class); + + @Override + public void start() { + LOG.info("Starting Atlassian JWT Fetcher Plugin"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping Atlassian JWT Fetcher Plugin"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting Atlassian JWT Fetcher Plugin"); + super.delete(); + } +} diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtGenerator.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtGenerator.java new file mode 100644 index 000000000..d0fa77a62 --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtGenerator.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.atlassianjwt; + +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import com.nimbusds.jose.JOSEException; +import com.nimbusds.jose.JOSEObjectType; +import com.nimbusds.jose.JWSAlgorithm; +import com.nimbusds.jose.JWSHeader; +import com.nimbusds.jose.JWSSigner; +import com.nimbusds.jose.crypto.MACSigner; +import com.nimbusds.jwt.JWTClaimsSet; +import com.nimbusds.jwt.SignedJWT; + +public class AtlassianJwtGenerator { + private final String sharedSecret; + private final String issuer; + private final String subject; + private final int expiresInSeconds; + + public AtlassianJwtGenerator(String sharedSecret, String issuer, String subject, int expiresInSeconds) { + this.sharedSecret = sharedSecret; + this.issuer = issuer; + this.subject = subject; + this.expiresInSeconds = expiresInSeconds; + } + + public String generateJwt(String method, String url) throws JOSEException, URISyntaxException, NoSuchAlgorithmException { + String qsh = generateQueryStringHash(method, url); + + JWTClaimsSet.Builder claimsBuilder = new JWTClaimsSet.Builder() + .issuer(issuer) + .issueTime(Date.from(Instant.now())) + .expirationTime(Date.from(Instant.now().plus(expiresInSeconds, ChronoUnit.SECONDS))) + .claim("qsh", qsh); + + // Only add subject if it's not null or empty + if (subject != null && !subject.trim().isEmpty()) { + claimsBuilder.subject(subject); + } + + JWTClaimsSet claimsSet = claimsBuilder.build(); + + JWSSigner signer = new MACSigner(sharedSecret.getBytes(StandardCharsets.UTF_8)); + JWSHeader header = new JWSHeader.Builder(JWSAlgorithm.HS256) + .type(JOSEObjectType.JWT) + .build(); + SignedJWT signedJWT = new SignedJWT(header, claimsSet); + signedJWT.sign(signer); + + String jwt = signedJWT.serialize(); + + return jwt; + } + + private String generateQueryStringHash(String method, String url) throws URISyntaxException, NoSuchAlgorithmException { + URI uri = new URI(url); + String canonicalRequest = createCanonicalRequestString(method, uri); + + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] hash = digest.digest(canonicalRequest.getBytes(StandardCharsets.UTF_8)); + + StringBuilder hexString = new StringBuilder(); + for (byte b : hash) { + String hex = Integer.toHexString(0xff & b); + if (hex.length() == 1) { + hexString.append('0'); + } + hexString.append(hex); + } + + String qsh = hexString.toString(); + + return qsh; + } + + private String createCanonicalRequestString(String httpMethod, URI url) { + String urlPath = url.getRawPath(); + if (urlPath == null) { + urlPath = "/"; + } + + // Split on "?" and take first part + String[] pathParts = urlPath.split("\\?"); + urlPath = pathParts[0]; + + // Build path: ensure leading slash, trim trailing slashes, decode & with %26 + String path = "/" + urlPath.replaceAll("^/+", "").replaceAll("/+$", "").replace("&", "%26"); + if (path.equals("//")) { + path = "/"; + } + + // Confluence paths are prefixed with "/wiki" however that prefix should not be used + // for calculating the canonical so we strip it off in case it is present + if (path.startsWith("/wiki")) { + path = path.substring(5); // Remove "/wiki" + if (path.isEmpty()) { + path = "/"; + } + } + + String canonicalQueryString = generateCanonicalQueryString(url.getQuery()); + String canonicalRequest = httpMethod.toUpperCase() + "&" + path + "&" + canonicalQueryString; + + return canonicalRequest; + } + + private String generateCanonicalQueryString(String query) { + if (query == null || query.isEmpty()) { + return ""; + } + + // Query parameters go into a map for uniqueness + further iteration + Map<String, List<String>> queryParams = new HashMap<>(); + String[] params = query.split("&"); + + for (String param : params) { + String[] keyValue = param.split("=", 2); + String key = keyValue[0]; + String value = keyValue.length > 1 ? keyValue[1] : ""; + + // Skip jwt param, unneeded but present + if ("jwt".equals(key)) { + continue; + } + + queryParams.computeIfAbsent(key, k -> new ArrayList<>()).add(value); + } + + List<String> canonicalParams = new ArrayList<>(); + + for (Map.Entry<String, List<String>> entry : queryParams.entrySet()) { + String key = URLEncoder.encode(entry.getKey(), StandardCharsets.UTF_8); + List<String> values = entry.getValue(); + + // URL encode all values + List<String> encodedValues = values.stream() + .map(v -> URLEncoder.encode(v, StandardCharsets.UTF_8)) + .collect(Collectors.toList()); + + // Query parameter values need to be sorted in alphabetical order + Collections.sort(encodedValues); + + // Individual parameter values are comma separated + String joinedValues = String.join(",", encodedValues); + String pair = key + "=" + joinedValues; + // Decode + -> %20 + pair = pair.replace("+", "%20"); + + canonicalParams.add(pair); + } + + // And the whole collection must be sorted + // (https://developer.atlassian.com/cloud/bitbucket/query-string-hash/#sort-query-parameter-value-lists) + Collections.sort(canonicalParams); + + // And finally rejoined to create the canonical query string + return String.join("&", canonicalParams); + } +} diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/config/AtlassianJwtFetcherConfig.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/config/AtlassianJwtFetcherConfig.java new file mode 100644 index 000000000..d8f969263 --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/config/AtlassianJwtFetcherConfig.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.fetcher.atlassianjwt.config; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.tika.exception.TikaConfigException; + +public class AtlassianJwtFetcherConfig { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static AtlassianJwtFetcherConfig load(final String json) + throws TikaConfigException { + try { + return OBJECT_MAPPER.readValue(json, AtlassianJwtFetcherConfig.class); + } catch (JsonProcessingException e) { + throw new TikaConfigException( + "Failed to parse AtlassianJwtFetcherConfig from JSON", e); + } + } + + private Integer maxConnectionsPerRoute = 1000; + private Integer maxConnections = 2000; + private Integer requestTimeout = 120000; + private Integer connectTimeout = 120000; + private Integer socketTimeout = 120000; + private Long maxSpoolSize = -1L; + private Integer maxRedirects = 0; + private List<String> httpHeaders = new ArrayList<>(); + private Map<String, List<String>> httpRequestHeaders = new LinkedHashMap<>(); + private Long overallTimeout = 120000L; + private Integer maxErrMsgSize = 10000000; + private String userAgent; + + private String sharedSecret; + private String issuer; + private String subject; + private Integer jwtExpiresInSeconds = 3600; + + public Integer getMaxConnectionsPerRoute() { + return maxConnectionsPerRoute; + } + + public AtlassianJwtFetcherConfig setMaxConnectionsPerRoute(Integer maxConnectionsPerRoute) { + this.maxConnectionsPerRoute = maxConnectionsPerRoute; + return this; + } + + public Integer getMaxConnections() { + return maxConnections; + } + + public AtlassianJwtFetcherConfig setMaxConnections(Integer maxConnections) { + this.maxConnections = maxConnections; + return this; + } + + public Integer getRequestTimeout() { + return requestTimeout; + } + + public AtlassianJwtFetcherConfig setRequestTimeout(Integer requestTimeout) { + this.requestTimeout = requestTimeout; + return this; + } + + public Integer getConnectTimeout() { + return connectTimeout; + } + + public AtlassianJwtFetcherConfig setConnectTimeout(Integer connectTimeout) { + this.connectTimeout = connectTimeout; + return this; + } + + public Integer getSocketTimeout() { + return socketTimeout; + } + + public AtlassianJwtFetcherConfig setSocketTimeout(Integer socketTimeout) { + this.socketTimeout = socketTimeout; + return this; + } + + public Long getMaxSpoolSize() { + return maxSpoolSize; + } + + public AtlassianJwtFetcherConfig setMaxSpoolSize(Long maxSpoolSize) { + this.maxSpoolSize = maxSpoolSize; + return this; + } + + public Integer getMaxRedirects() { + return maxRedirects; + } + + public AtlassianJwtFetcherConfig setMaxRedirects(Integer maxRedirects) { + this.maxRedirects = maxRedirects; + return this; + } + + public List<String> getHttpHeaders() { + return httpHeaders; + } + + public AtlassianJwtFetcherConfig setHttpHeaders(List<String> httpHeaders) { + this.httpHeaders = httpHeaders; + return this; + } + + public Map<String, List<String>> getHttpRequestHeaders() { + return httpRequestHeaders; + } + + public AtlassianJwtFetcherConfig setHttpRequestHeaders( + Map<String, List<String>> httpRequestHeaders) { + this.httpRequestHeaders = httpRequestHeaders; + return this; + } + + public Long getOverallTimeout() { + return overallTimeout; + } + + public AtlassianJwtFetcherConfig setOverallTimeout(Long overallTimeout) { + this.overallTimeout = overallTimeout; + return this; + } + + public Integer getMaxErrMsgSize() { + return maxErrMsgSize; + } + + public AtlassianJwtFetcherConfig setMaxErrMsgSize(Integer maxErrMsgSize) { + this.maxErrMsgSize = maxErrMsgSize; + return this; + } + + public String getUserAgent() { + return userAgent; + } + + public AtlassianJwtFetcherConfig setUserAgent(String userAgent) { + this.userAgent = userAgent; + return this; + } + + public String getSharedSecret() { + return sharedSecret; + } + + public AtlassianJwtFetcherConfig setSharedSecret(String sharedSecret) { + this.sharedSecret = sharedSecret; + return this; + } + + public String getIssuer() { + return issuer; + } + + public AtlassianJwtFetcherConfig setIssuer(String issuer) { + this.issuer = issuer; + return this; + } + + public String getSubject() { + return subject; + } + + public AtlassianJwtFetcherConfig setSubject(String subject) { + this.subject = subject; + return this; + } + + public Integer getJwtExpiresInSeconds() { + return jwtExpiresInSeconds; + } + + public AtlassianJwtFetcherConfig setJwtExpiresInSeconds(Integer jwtExpiresInSeconds) { + this.jwtExpiresInSeconds = jwtExpiresInSeconds; + return this; + } +} diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/plugin/atlassianjwt/AtlassianJwtPipesPlugin.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/plugin/atlassianjwt/AtlassianJwtPipesPlugin.java new file mode 100644 index 000000000..f5914c726 --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/plugin/atlassianjwt/AtlassianJwtPipesPlugin.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.plugin.atlassianjwt; + +import org.pf4j.Plugin; +import org.pf4j.PluginWrapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AtlassianJwtPipesPlugin extends Plugin { + private static final Logger LOG = LoggerFactory.getLogger(AtlassianJwtPipesPlugin.class); + + public AtlassianJwtPipesPlugin(PluginWrapper wrapper) { + super(wrapper); + } + + @Override + public void start() { + LOG.info("Starting Atlassian JWT Pipes Plugin"); + super.start(); + } + + @Override + public void stop() { + LOG.info("Stopping Atlassian JWT Pipes Plugin"); + super.stop(); + } + + @Override + public void delete() { + LOG.info("Deleting Atlassian JWT Pipes Plugin"); + super.delete(); + } +} diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/resources/plugin.properties b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/resources/plugin.properties new file mode 100644 index 000000000..fd674daec --- /dev/null +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/resources/plugin.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin.id=tika-pipes-atlassian-jwt-plugin +plugin.class=org.apache.tika.pipes.plugin.atlassianjwt.AtlassianJwtPipesPlugin +plugin.version=4.0.0-SNAPSHOT +plugin.provider=Apache Tika +plugin.description=Pipes for Atlassian products with JWT authentication
