This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a commit to branch TIKA-4604-atlassian-fetcher
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/TIKA-4604-atlassian-fetcher by
this push:
new de0fd4b4f TIKA-4604: Complete refactoring of AtlassianJwtFetcher to
Apache Tika pattern
de0fd4b4f is described below
commit de0fd4b4f18d44827c940d7496d58b1432ba008e
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Mon Dec 29 05:43:00 2025 -0600
TIKA-4604: Complete refactoring of AtlassianJwtFetcher to Apache Tika
pattern
- Extended AbstractTikaExtension instead of implementing Fetcher directly
- Added static build() method and constructor
- Changed fetch() signature to use Metadata instead of Maps
- Updated all method signatures throughout
- Replaced Map operations with Metadata.set() and Metadata.add()
- Added initialize() method for HTTP client and JWT generator setup
- Removed old initIfNeeded and checkInitialization methods
- Fixed all imports and added missing ones (List, URL,
MalformedURLException)
- Replaced Lombok log with slf4j LOG
- Code now compiles successfully
Build tested: mvn clean compile -DskipTests
---
.../fetcher/atlassianjwt/AtlassianJwtFetcher.java | 256 ++++++++++-----------
...etcher.java => AtlassianJwtFetcher.java.backup} | 0
2 files changed, 118 insertions(+), 138 deletions(-)
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
index d459b0093..e1594650b 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
@@ -27,17 +27,13 @@ import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicBoolean;
import com.nimbusds.jose.JOSEException;
-import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
import org.apache.http.ConnectionClosedException;
@@ -53,7 +49,8 @@ import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.conn.ConnectionShutdownException;
import org.apache.http.util.EntityUtils;
-import org.pf4j.Extension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.apache.tika.client.HttpClientFactory;
import org.apache.tika.exception.TikaConfigException;
@@ -61,16 +58,29 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaTimeoutException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
import org.apache.tika.pipes.api.fetcher.Fetcher;
-import org.apache.tika.pipes.api.fetcher.FetcherConfig;
import
org.apache.tika.pipes.fetcher.atlassianjwt.config.AtlassianJwtFetcherConfig;
+import org.apache.tika.plugins.AbstractTikaExtension;
+import org.apache.tika.plugins.ExtensionConfig;
import org.apache.tika.utils.StringUtils;
-@Extension
-@Slf4j
-public class AtlassianJwtFetcher implements Fetcher {
+public class AtlassianJwtFetcher extends AbstractTikaExtension implements
Fetcher {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(AtlassianJwtFetcher.class);
+
+ public static AtlassianJwtFetcher build(ExtensionConfig pluginConfig)
+ throws TikaConfigException, IOException {
+ AtlassianJwtFetcherConfig config =
+ AtlassianJwtFetcherConfig.load(pluginConfig.json());
+ AtlassianJwtFetcher fetcher = new AtlassianJwtFetcher(pluginConfig,
config);
+ fetcher.initialize();
+ return fetcher;
+ }
+
private final HttpClientFactory httpClientFactory = new
HttpClientFactory();
public static String HTTP_HEADER_PREFIX = "http-header:";
public static String HTTP_FETCH_PREFIX = "http-connection:";
@@ -85,60 +95,93 @@ public class AtlassianJwtFetcher implements Fetcher {
private static final String USER_AGENT = "User-Agent";
+ private AtlassianJwtFetcherConfig config;
private HttpClient httpClient;
private HttpClient noCompressHttpClient;
private AtlassianJwtGenerator jwtGenerator;
- private boolean isInit = false;
- @Override
- public InputStream fetch(FetcherConfig fetcherConfig, String fetchKey,
Map<String, Object> fetchMetadata, Map<String, Object> responseMetadata) {
- try {
- AtlassianJwtFetcherConfig atlassianJwtFetcherConfig =
(AtlassianJwtFetcherConfig) fetcherConfig;
- initIfNeeded(atlassianJwtFetcherConfig);
- HttpGet get = new HttpGet(fetchKey);
- RequestConfig requestConfig = RequestConfig
- .custom()
-
.setMaxRedirects(atlassianJwtFetcherConfig.getMaxRedirects())
-
.setRedirectsEnabled(atlassianJwtFetcherConfig.getMaxRedirects() > 0)
- .build();
- get.setConfig(requestConfig);
- putAdditionalHeadersOnRequest(atlassianJwtFetcherConfig, get,
fetchKey);
- return execute(get, atlassianJwtFetcherConfig, fetchMetadata,
httpClient, true);
- } catch (TikaException | IOException | JOSEException |
URISyntaxException | NoSuchAlgorithmException e) {
- throw new RuntimeException(e);
+ public AtlassianJwtFetcher(ExtensionConfig pluginConfig,
+ AtlassianJwtFetcherConfig config) {
+ super(pluginConfig);
+ this.config = config;
+ }
+
+ public void initialize() throws IOException, TikaConfigException {
+ // Configure HTTP client factory
+ if (config.getSocketTimeout() != null) {
+ httpClientFactory.setSocketTimeout(config.getSocketTimeout());
+ }
+ if (config.getRequestTimeout() != null) {
+ httpClientFactory.setRequestTimeout(config.getRequestTimeout());
+ }
+ if (config.getConnectTimeout() != null) {
+ httpClientFactory.setConnectTimeout(config.getConnectTimeout());
+ }
+ if (config.getMaxConnections() != null) {
+ httpClientFactory.setMaxConnections(config.getMaxConnections());
+ }
+ if (config.getMaxConnectionsPerRoute() != null) {
+
httpClientFactory.setMaxConnectionsPerRoute(config.getMaxConnectionsPerRoute());
+ }
+
+ // Initialize HTTP client
+ httpClient = httpClientFactory.build();
+ HttpClientFactory cp = httpClientFactory.copy();
+ cp.setDisableContentCompression(true);
+ noCompressHttpClient = cp.build();
+
+ // Initialize JWT generator if configured
+ if (!StringUtils.isBlank(config.getSharedSecret())) {
+ jwtGenerator = new AtlassianJwtGenerator(config.getSharedSecret(),
+ config.getIssuer(), config.getSubject(),
+ config.getJwtExpiresInSeconds());
}
}
- private void putAdditionalHeadersOnRequest(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig, HttpGet httpGet, String url)
- throws TikaException, JOSEException, URISyntaxException,
NoSuchAlgorithmException {
+ @Override
+ public TikaInputStream fetch(String fetchKey, Metadata metadata,
ParseContext parseContext)
+ throws IOException, TikaException {
+ HttpGet get = new HttpGet(fetchKey);
+ RequestConfig requestConfig = RequestConfig.custom()
+ .setMaxRedirects(config.getMaxRedirects())
+ .setRedirectsEnabled(config.getMaxRedirects() > 0).build();
+ get.setConfig(requestConfig);
+ putAdditionalHeadersOnRequest(get, fetchKey);
+ return execute(get, metadata, httpClient, true);
+ }
- if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getUserAgent())) {
- httpGet.setHeader(USER_AGENT,
atlassianJwtFetcherConfig.getUserAgent());
+ private void putAdditionalHeadersOnRequest(HttpGet httpGet, String url)
+ throws TikaException {
+ if (!StringUtils.isBlank(config.getUserAgent())) {
+ httpGet.setHeader(USER_AGENT, config.getUserAgent());
}
- if (atlassianJwtFetcherConfig.getHttpRequestHeaders() != null) {
- atlassianJwtFetcherConfig.getHttpRequestHeaders()
- .forEach((header, values) -> {
- for (String value : values) {
- httpGet.addHeader(header, value);
- }
- });
+ if (config.getHttpRequestHeaders() != null) {
+ config.getHttpRequestHeaders().forEach((header, values) -> {
+ for (String value : values) {
+ httpGet.addHeader(header, value);
+ }
+ });
}
if (jwtGenerator != null) {
- String jwt = jwtGenerator.generateJwt("GET", url);
- httpGet.setHeader("Authorization", "JWT " + jwt);
+ try {
+ String jwt = jwtGenerator.generateJwt("GET", url);
+ httpGet.setHeader("Authorization", "JWT " + jwt);
+ } catch (JOSEException | URISyntaxException |
NoSuchAlgorithmException e) {
+ throw new TikaException("Failed to generate JWT token", e);
+ }
} else {
- log.warn("No JWT generator available - authorization header not
set");
+ LOG.warn("No JWT generator available - authorization header not
set");
}
}
- private InputStream execute(HttpGet get, AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig,
- Map<String, Object> fetchMetadata, HttpClient
client,
- boolean retryOnBadLength) throws IOException {
+ private TikaInputStream execute(HttpGet get, Metadata metadata, HttpClient
client,
+ boolean retryOnBadLength)
+ throws IOException, TikaException {
HttpClientContext context = HttpClientContext.create();
HttpResponse response = null;
final AtomicBoolean timeout = new AtomicBoolean(false);
Timer timer = null;
- long overallTimeout = atlassianJwtFetcherConfig.getOverallTimeout() ==
null ? -1 : atlassianJwtFetcherConfig.getOverallTimeout();
+ long overallTimeout = config.getOverallTimeout() == null ? -1 :
config.getOverallTimeout();
try {
if (overallTimeout > -1) {
TimerTask task = new TimerTask() {
@@ -155,20 +198,20 @@ public class AtlassianJwtFetcher implements Fetcher {
}
response = client.execute(get, context);
- updateMetadata(get.getURI().toString(), response, context,
fetchMetadata, atlassianJwtFetcherConfig);
+ updateMetadata(get.getURI().toString(), response, context,
metadata);
int code = response.getStatusLine().getStatusCode();
- log.info("Fetch id {} status code {}", get.getURI(), code);
+ LOG.info("Fetch id {} status code {}", get.getURI(), code);
if (code < 200 || code > 299) {
- throw new IOException("bad status code: " + code + " :: " +
responseToString(atlassianJwtFetcherConfig, response));
+ throw new IOException("bad status code: " + code + " :: " +
responseToString(response));
}
try (InputStream is = response.getEntity().getContent()) {
- return spool(atlassianJwtFetcherConfig, is, fetchMetadata);
+ return spool(is, metadata);
}
} catch (ConnectionClosedException e) {
if (retryOnBadLength && e.getMessage() != null &&
e.getMessage().contains("Premature end of Content-Length delimited message")) {
- log.warn("premature end of content-length delimited message;
retrying with content compression disabled for {}", get.getURI());
- return execute(get, atlassianJwtFetcherConfig, fetchMetadata,
noCompressHttpClient, false);
+ LOG.warn("premature end of content-length delimited message;
retrying with content compression disabled for {}", get.getURI());
+ return execute(get, metadata, noCompressHttpClient, false);
}
throw e;
} catch (IOException e) {
@@ -191,70 +234,65 @@ public class AtlassianJwtFetcher implements Fetcher {
}
}
- private InputStream spool(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig, InputStream content, Map<String, Object>
fetchMetadata) throws IOException {
+ private TikaInputStream spool(InputStream content, Metadata metadata)
throws IOException {
long start = System.currentTimeMillis();
TemporaryResources tmp = new TemporaryResources();
- Path tmpFile = tmp.createTempFile();
- if (atlassianJwtFetcherConfig.getMaxSpoolSize() < 0) {
- Files.copy(content, tmpFile, StandardCopyOption.REPLACE_EXISTING);
+ Path tmpFile = tmp.createTempFile(metadata);
+ if (config.getMaxSpoolSize() < 0) {
+ Files.copy(content, tmpFile);
} else {
try (OutputStream os = Files.newOutputStream(tmpFile)) {
- long totalRead = IOUtils.copyLarge(content, os, 0,
atlassianJwtFetcherConfig.getMaxSpoolSize());
- if (totalRead == atlassianJwtFetcherConfig.getMaxSpoolSize()
&& content.read() != -1) {
- fetchMetadata.put(HTTP_FETCH_TRUNCATED.getName(), "true");
+ long totalRead = IOUtils.copyLarge(content, os, 0,
config.getMaxSpoolSize());
+ if (totalRead == config.getMaxSpoolSize() && content.read() !=
-1) {
+ metadata.set(HTTP_FETCH_TRUNCATED, true);
}
}
}
long elapsed = System.currentTimeMillis() - start;
- log.debug("took {} ms to copy to local tmp file", elapsed);
+ LOG.debug("took {} ms to copy to local tmp file", elapsed);
return TikaInputStream.get(tmpFile);
}
private void updateMetadata(String url, HttpResponse response,
HttpClientContext context,
- Map<String, Object> fetchMetadata,
- AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig) {
+ Metadata metadata) {
if (response == null) {
return;
}
if (response.getStatusLine() != null) {
- fetchMetadata.put(HTTP_STATUS_CODE.getName(),
response.getStatusLine().getStatusCode());
+ metadata.set(HTTP_STATUS_CODE,
response.getStatusLine().getStatusCode());
}
HttpEntity entity = response.getEntity();
if (entity != null && entity.getContentEncoding() != null) {
- fetchMetadata.put(HTTP_CONTENT_ENCODING.getName(),
entity.getContentEncoding().getValue());
+ metadata.set(HTTP_CONTENT_ENCODING,
entity.getContentEncoding().getValue());
}
if (entity != null && entity.getContentType() != null) {
- fetchMetadata.put(HTTP_CONTENT_TYPE.getName(),
entity.getContentType().getValue());
+ metadata.set(HTTP_CONTENT_TYPE,
entity.getContentType().getValue());
}
- if (atlassianJwtFetcherConfig.getHttpHeaders() != null) {
- for (String h : atlassianJwtFetcherConfig.getHttpHeaders()) {
+ if (config.getHttpHeaders() != null) {
+ for (String h : config.getHttpHeaders()) {
Header[] headers = response.getHeaders(h);
if (headers != null && headers.length > 0) {
- String name = HTTP_HEADER_PREFIX + h;
- List<String> headerList = new ArrayList<>();
- fetchMetadata.put(name, headerList);
for (Header header : headers) {
- headerList.add(header.getValue());
+ metadata.add(HTTP_HEADER_PREFIX + h,
header.getValue());
}
- fetchMetadata.put(name, headerList);
}
}
}
List<URI> uriList = context.getRedirectLocations();
if (uriList == null) {
- fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), 0);
- fetchMetadata.put(HTTP_TARGET_URL.getName(), url);
+ metadata.set(HTTP_NUM_REDIRECTS, 0);
+ metadata.set(HTTP_TARGET_URL, url);
} else {
- fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), uriList.size());
+ metadata.set(HTTP_NUM_REDIRECTS, uriList.size());
try {
URI uri = uriList.get(uriList.size() - 1);
if (uri != null) {
URL u = uri.toURL();
- fetchMetadata.put(HTTP_TARGET_URL.getName(), u.toString());
- fetchMetadata.put(TikaCoreProperties.RESOURCE_NAME_KEY,
u.getFile());
+ metadata.set(HTTP_TARGET_URL, u.toString());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY,
u.getFile());
}
} catch (MalformedURLException e) {
// swallow
@@ -265,24 +303,24 @@ public class AtlassianJwtFetcher implements Fetcher {
try {
InetAddress inetAddress = ((HttpInetConnection)
connection).getRemoteAddress();
if (inetAddress != null) {
- fetchMetadata.put(HTTP_TARGET_IP_ADDRESS.getName(),
inetAddress.getHostAddress());
+ metadata.set(HTTP_TARGET_IP_ADDRESS,
inetAddress.getHostAddress());
}
} catch (ConnectionShutdownException e) {
- log.warn("connection shutdown while trying to get target URL:
" + url);
+ LOG.warn("connection shutdown while trying to get target URL:
" + url);
}
}
}
- private String responseToString(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig, HttpResponse response) {
+ private String responseToString(HttpResponse response) {
if (response.getEntity() == null) {
return "";
}
try (InputStream is = response.getEntity().getContent()) {
UnsynchronizedByteArrayOutputStream bos =
UnsynchronizedByteArrayOutputStream.builder().get();
- IOUtils.copyLarge(is, bos, 0,
atlassianJwtFetcherConfig.getMaxErrMsgSize());
+ IOUtils.copyLarge(is, bos, 0, config.getMaxErrMsgSize());
return bos.toString(StandardCharsets.UTF_8);
} catch (IOException e) {
- log.warn("IOException trying to read error message", e);
+ LOG.warn("IOException trying to read error message", e);
return "";
} catch (NullPointerException e) {
return "";
@@ -290,62 +328,4 @@ public class AtlassianJwtFetcher implements Fetcher {
EntityUtils.consumeQuietly(response.getEntity());
}
}
-
- public void initIfNeeded(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig) throws TikaConfigException {
- if (isInit) {
- return;
- }
-
- log.info("AtlassianJwtFetcher initialization:");
- log.info("Shared Secret: {}",
atlassianJwtFetcherConfig.getSharedSecret() != null ? "[PRESENT]" :
"[MISSING]");
- log.info("Issuer: {}", atlassianJwtFetcherConfig.getIssuer());
- log.info("Subject: {}", atlassianJwtFetcherConfig.getSubject());
- log.info("JWT Expires In Seconds: {}",
atlassianJwtFetcherConfig.getJwtExpiresInSeconds());
-
- checkInitialization(atlassianJwtFetcherConfig);
-
- if (atlassianJwtFetcherConfig.getSocketTimeout() != null) {
-
httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getSocketTimeout());
- }
- if (atlassianJwtFetcherConfig.getRequestTimeout() != null) {
-
httpClientFactory.setRequestTimeout(atlassianJwtFetcherConfig.getRequestTimeout());
- }
- if (atlassianJwtFetcherConfig.getConnectTimeout() != null) {
-
httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getConnectTimeout());
- }
- if (atlassianJwtFetcherConfig.getMaxConnections() != null) {
-
httpClientFactory.setMaxConnections(atlassianJwtFetcherConfig.getMaxConnections());
- }
- if (atlassianJwtFetcherConfig.getMaxConnectionsPerRoute() != null) {
-
httpClientFactory.setMaxConnectionsPerRoute(atlassianJwtFetcherConfig.getMaxConnectionsPerRoute());
- }
-
- httpClient = httpClientFactory.build();
- HttpClientFactory cp = httpClientFactory.copy();
- cp.setDisableContentCompression(true);
- noCompressHttpClient = cp.build();
-
- if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret())
&&
- !StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) {
- jwtGenerator = new AtlassianJwtGenerator(
- atlassianJwtFetcherConfig.getSharedSecret(),
- atlassianJwtFetcherConfig.getIssuer(),
- atlassianJwtFetcherConfig.getSubject(),
- atlassianJwtFetcherConfig.getJwtExpiresInSeconds()
- );
- } else {
- log.warn("JWT generator not created. missing required
configuration");
- }
-
- isInit = true;
- }
-
- public void checkInitialization(AtlassianJwtFetcherConfig
atlassianJwtFetcherConfig) throws TikaConfigException {
- if (StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret())) {
- throw new TikaConfigException("Atlassian JWT Fetcher requires a
shared secret");
- }
- if (StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) {
- throw new TikaConfigException("Atlassian JWT Fetcher requires an
issuer");
- }
- }
}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java.backup
similarity index 100%
copy from
tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
copy to
tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java.backup