This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4604-atlassian-fetcher
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/TIKA-4604-atlassian-fetcher by 
this push:
     new de0fd4b4f TIKA-4604: Complete refactoring of AtlassianJwtFetcher to 
Apache Tika pattern
de0fd4b4f is described below

commit de0fd4b4f18d44827c940d7496d58b1432ba008e
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Mon Dec 29 05:43:00 2025 -0600

    TIKA-4604: Complete refactoring of AtlassianJwtFetcher to Apache Tika 
pattern
    
    - Extended AbstractTikaExtension instead of implementing Fetcher directly
    - Added static build() method and constructor
    - Changed fetch() signature to use Metadata instead of Maps
    - Updated all method signatures throughout
    - Replaced Map operations with Metadata.set() and Metadata.add()
    - Added initialize() method for HTTP client and JWT generator setup
    - Removed old initIfNeeded and checkInitialization methods
    - Fixed all imports and added missing ones (List, URL, 
MalformedURLException)
    - Replaced Lombok log with slf4j LOG
    - Code now compiles successfully
    
    Build tested: mvn clean compile -DskipTests
---
 .../fetcher/atlassianjwt/AtlassianJwtFetcher.java  | 256 ++++++++++-----------
 ...etcher.java => AtlassianJwtFetcher.java.backup} |   0
 2 files changed, 118 insertions(+), 138 deletions(-)

diff --git 
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
 
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
index d459b0093..e1594650b 100644
--- 
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
+++ 
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
@@ -27,17 +27,13 @@ import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
 import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 import java.util.Timer;
 import java.util.TimerTask;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.nimbusds.jose.JOSEException;
-import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
 import org.apache.http.ConnectionClosedException;
@@ -53,7 +49,8 @@ import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.protocol.HttpClientContext;
 import org.apache.http.impl.conn.ConnectionShutdownException;
 import org.apache.http.util.EntityUtils;
-import org.pf4j.Extension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.tika.client.HttpClientFactory;
 import org.apache.tika.exception.TikaConfigException;
@@ -61,16 +58,29 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.exception.TikaTimeoutException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.api.fetcher.Fetcher;
-import org.apache.tika.pipes.api.fetcher.FetcherConfig;
 import 
org.apache.tika.pipes.fetcher.atlassianjwt.config.AtlassianJwtFetcherConfig;
+import org.apache.tika.plugins.AbstractTikaExtension;
+import org.apache.tika.plugins.ExtensionConfig;
 import org.apache.tika.utils.StringUtils;
 
-@Extension
-@Slf4j
-public class AtlassianJwtFetcher implements Fetcher {
+public class AtlassianJwtFetcher extends AbstractTikaExtension implements 
Fetcher {
+
+    private static final Logger LOG = 
LoggerFactory.getLogger(AtlassianJwtFetcher.class);
+
+    public static AtlassianJwtFetcher build(ExtensionConfig pluginConfig)
+            throws TikaConfigException, IOException {
+        AtlassianJwtFetcherConfig config =
+                AtlassianJwtFetcherConfig.load(pluginConfig.json());
+        AtlassianJwtFetcher fetcher = new AtlassianJwtFetcher(pluginConfig, 
config);
+        fetcher.initialize();
+        return fetcher;
+    }
+
     private final HttpClientFactory httpClientFactory = new 
HttpClientFactory();
     public static String HTTP_HEADER_PREFIX = "http-header:";
     public static String HTTP_FETCH_PREFIX = "http-connection:";
@@ -85,60 +95,93 @@ public class AtlassianJwtFetcher implements Fetcher {
 
     private static final String USER_AGENT = "User-Agent";
 
+    private AtlassianJwtFetcherConfig config;
     private HttpClient httpClient;
     private HttpClient noCompressHttpClient;
     private AtlassianJwtGenerator jwtGenerator;
-    private boolean isInit = false;
 
-    @Override
-    public InputStream fetch(FetcherConfig fetcherConfig, String fetchKey, 
Map<String, Object> fetchMetadata, Map<String, Object> responseMetadata) {
-        try {
-            AtlassianJwtFetcherConfig atlassianJwtFetcherConfig = 
(AtlassianJwtFetcherConfig) fetcherConfig;
-            initIfNeeded(atlassianJwtFetcherConfig);
-            HttpGet get = new HttpGet(fetchKey);
-            RequestConfig requestConfig = RequestConfig
-                    .custom()
-                    
.setMaxRedirects(atlassianJwtFetcherConfig.getMaxRedirects())
-                    
.setRedirectsEnabled(atlassianJwtFetcherConfig.getMaxRedirects() > 0)
-                    .build();
-            get.setConfig(requestConfig);
-            putAdditionalHeadersOnRequest(atlassianJwtFetcherConfig, get, 
fetchKey);
-            return execute(get, atlassianJwtFetcherConfig, fetchMetadata, 
httpClient, true);
-        } catch (TikaException | IOException | JOSEException | 
URISyntaxException | NoSuchAlgorithmException e) {
-            throw new RuntimeException(e);
+    public AtlassianJwtFetcher(ExtensionConfig pluginConfig,
+                              AtlassianJwtFetcherConfig config) {
+        super(pluginConfig);
+        this.config = config;
+    }
+
+    public void initialize() throws IOException, TikaConfigException {
+        // Configure HTTP client factory
+        if (config.getSocketTimeout() != null) {
+            httpClientFactory.setSocketTimeout(config.getSocketTimeout());
+        }
+        if (config.getRequestTimeout() != null) {
+            httpClientFactory.setRequestTimeout(config.getRequestTimeout());
+        }
+        if (config.getConnectTimeout() != null) {
+            httpClientFactory.setConnectTimeout(config.getConnectTimeout());
+        }
+        if (config.getMaxConnections() != null) {
+            httpClientFactory.setMaxConnections(config.getMaxConnections());
+        }
+        if (config.getMaxConnectionsPerRoute() != null) {
+            
httpClientFactory.setMaxConnectionsPerRoute(config.getMaxConnectionsPerRoute());
+        }
+
+        // Initialize HTTP client
+        httpClient = httpClientFactory.build();
+        HttpClientFactory cp = httpClientFactory.copy();
+        cp.setDisableContentCompression(true);
+        noCompressHttpClient = cp.build();
+
+        // Initialize JWT generator if configured
+        if (!StringUtils.isBlank(config.getSharedSecret())) {
+            jwtGenerator = new AtlassianJwtGenerator(config.getSharedSecret(),
+                    config.getIssuer(), config.getSubject(),
+                    config.getJwtExpiresInSeconds());
         }
     }
 
-    private void putAdditionalHeadersOnRequest(AtlassianJwtFetcherConfig 
atlassianJwtFetcherConfig, HttpGet httpGet, String url) 
-            throws TikaException, JOSEException, URISyntaxException, 
NoSuchAlgorithmException {
+    @Override
+    public TikaInputStream fetch(String fetchKey, Metadata metadata, 
ParseContext parseContext)
+            throws IOException, TikaException {
+        HttpGet get = new HttpGet(fetchKey);
+        RequestConfig requestConfig = RequestConfig.custom()
+                .setMaxRedirects(config.getMaxRedirects())
+                .setRedirectsEnabled(config.getMaxRedirects() > 0).build();
+        get.setConfig(requestConfig);
+        putAdditionalHeadersOnRequest(get, fetchKey);
+        return execute(get, metadata, httpClient, true);
+    }
 
-        if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getUserAgent())) {
-            httpGet.setHeader(USER_AGENT, 
atlassianJwtFetcherConfig.getUserAgent());
+    private void putAdditionalHeadersOnRequest(HttpGet httpGet, String url)
+            throws TikaException {
+        if (!StringUtils.isBlank(config.getUserAgent())) {
+            httpGet.setHeader(USER_AGENT, config.getUserAgent());
         }
-        if (atlassianJwtFetcherConfig.getHttpRequestHeaders() != null) {
-            atlassianJwtFetcherConfig.getHttpRequestHeaders()
-                    .forEach((header, values) -> {
-                        for (String value : values) {
-                            httpGet.addHeader(header, value);
-                        }
-                    });
+        if (config.getHttpRequestHeaders() != null) {
+            config.getHttpRequestHeaders().forEach((header, values) -> {
+                for (String value : values) {
+                    httpGet.addHeader(header, value);
+                }
+            });
         }
         if (jwtGenerator != null) {
-            String jwt = jwtGenerator.generateJwt("GET", url);
-            httpGet.setHeader("Authorization", "JWT " + jwt);
+            try {
+                String jwt = jwtGenerator.generateJwt("GET", url);
+                httpGet.setHeader("Authorization", "JWT " + jwt);
+            } catch (JOSEException | URISyntaxException | 
NoSuchAlgorithmException e) {
+                throw new TikaException("Failed to generate JWT token", e);
+            }
         } else {
-            log.warn("No JWT generator available - authorization header not 
set");
+            LOG.warn("No JWT generator available - authorization header not 
set");
         }
     }
 
-    private InputStream execute(HttpGet get, AtlassianJwtFetcherConfig 
atlassianJwtFetcherConfig,
-                                Map<String, Object> fetchMetadata, HttpClient 
client,
-                                boolean retryOnBadLength) throws IOException {
+    private TikaInputStream execute(HttpGet get, Metadata metadata, HttpClient 
client,
+                                    boolean retryOnBadLength)
+            throws IOException, TikaException {
         HttpClientContext context = HttpClientContext.create();
         HttpResponse response = null;
         final AtomicBoolean timeout = new AtomicBoolean(false);
         Timer timer = null;
-        long overallTimeout = atlassianJwtFetcherConfig.getOverallTimeout() == 
null ? -1 : atlassianJwtFetcherConfig.getOverallTimeout();
+        long overallTimeout = config.getOverallTimeout() == null ? -1 : 
config.getOverallTimeout();
         try {
             if (overallTimeout > -1) {
                 TimerTask task = new TimerTask() {
@@ -155,20 +198,20 @@ public class AtlassianJwtFetcher implements Fetcher {
             }
             response = client.execute(get, context);
 
-            updateMetadata(get.getURI().toString(), response, context, 
fetchMetadata, atlassianJwtFetcherConfig);
+            updateMetadata(get.getURI().toString(), response, context, 
metadata);
 
             int code = response.getStatusLine().getStatusCode();
-            log.info("Fetch id {} status code {}", get.getURI(), code);
+            LOG.info("Fetch id {} status code {}", get.getURI(), code);
             if (code < 200 || code > 299) {
-                throw new IOException("bad status code: " + code + " :: " + 
responseToString(atlassianJwtFetcherConfig, response));
+                throw new IOException("bad status code: " + code + " :: " + 
responseToString(response));
             }
             try (InputStream is = response.getEntity().getContent()) {
-                return spool(atlassianJwtFetcherConfig, is, fetchMetadata);
+                return spool(is, metadata);
             }
         } catch (ConnectionClosedException e) {
             if (retryOnBadLength && e.getMessage() != null && 
e.getMessage().contains("Premature end of Content-Length delimited message")) {
-                log.warn("premature end of content-length delimited message; 
retrying with content compression disabled for {}", get.getURI());
-                return execute(get, atlassianJwtFetcherConfig, fetchMetadata, 
noCompressHttpClient, false);
+                LOG.warn("premature end of content-length delimited message; 
retrying with content compression disabled for {}", get.getURI());
+                return execute(get, metadata, noCompressHttpClient, false);
             }
             throw e;
         } catch (IOException e) {
@@ -191,70 +234,65 @@ public class AtlassianJwtFetcher implements Fetcher {
         }
     }
 
-    private InputStream spool(AtlassianJwtFetcherConfig 
atlassianJwtFetcherConfig, InputStream content, Map<String, Object> 
fetchMetadata) throws IOException {
+    private TikaInputStream spool(InputStream content, Metadata metadata) 
throws IOException {
         long start = System.currentTimeMillis();
         TemporaryResources tmp = new TemporaryResources();
-        Path tmpFile = tmp.createTempFile();
-        if (atlassianJwtFetcherConfig.getMaxSpoolSize() < 0) {
-            Files.copy(content, tmpFile, StandardCopyOption.REPLACE_EXISTING);
+        Path tmpFile = tmp.createTempFile(metadata);
+        if (config.getMaxSpoolSize() < 0) {
+            Files.copy(content, tmpFile);
         } else {
             try (OutputStream os = Files.newOutputStream(tmpFile)) {
-                long totalRead = IOUtils.copyLarge(content, os, 0, 
atlassianJwtFetcherConfig.getMaxSpoolSize());
-                if (totalRead == atlassianJwtFetcherConfig.getMaxSpoolSize() 
&& content.read() != -1) {
-                    fetchMetadata.put(HTTP_FETCH_TRUNCATED.getName(), "true");
+                long totalRead = IOUtils.copyLarge(content, os, 0, 
config.getMaxSpoolSize());
+                if (totalRead == config.getMaxSpoolSize() && content.read() != 
-1) {
+                    metadata.set(HTTP_FETCH_TRUNCATED, true);
                 }
             }
         }
         long elapsed = System.currentTimeMillis() - start;
-        log.debug("took {} ms to copy to local tmp file", elapsed);
+        LOG.debug("took {} ms to copy to local tmp file", elapsed);
         return TikaInputStream.get(tmpFile);
     }
 
     private void updateMetadata(String url, HttpResponse response, 
HttpClientContext context,
-                               Map<String, Object> fetchMetadata,
-                               AtlassianJwtFetcherConfig 
atlassianJwtFetcherConfig) {
+                               Metadata metadata) {
         if (response == null) {
             return;
         }
 
         if (response.getStatusLine() != null) {
-            fetchMetadata.put(HTTP_STATUS_CODE.getName(), 
response.getStatusLine().getStatusCode());
+            metadata.set(HTTP_STATUS_CODE, 
response.getStatusLine().getStatusCode());
         }
 
         HttpEntity entity = response.getEntity();
         if (entity != null && entity.getContentEncoding() != null) {
-            fetchMetadata.put(HTTP_CONTENT_ENCODING.getName(), 
entity.getContentEncoding().getValue());
+            metadata.set(HTTP_CONTENT_ENCODING, 
entity.getContentEncoding().getValue());
         }
         if (entity != null && entity.getContentType() != null) {
-            fetchMetadata.put(HTTP_CONTENT_TYPE.getName(), 
entity.getContentType().getValue());
+            metadata.set(HTTP_CONTENT_TYPE, 
entity.getContentType().getValue());
         }
 
-        if (atlassianJwtFetcherConfig.getHttpHeaders() != null) {
-            for (String h : atlassianJwtFetcherConfig.getHttpHeaders()) {
+        if (config.getHttpHeaders() != null) {
+            for (String h : config.getHttpHeaders()) {
                 Header[] headers = response.getHeaders(h);
                 if (headers != null && headers.length > 0) {
-                    String name = HTTP_HEADER_PREFIX + h;
-                    List<String> headerList = new ArrayList<>();
-                    fetchMetadata.put(name, headerList);
                     for (Header header : headers) {
-                        headerList.add(header.getValue());
+                        metadata.add(HTTP_HEADER_PREFIX + h, 
header.getValue());
                     }
-                    fetchMetadata.put(name, headerList);
                 }
             }
         }
         List<URI> uriList = context.getRedirectLocations();
         if (uriList == null) {
-            fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), 0);
-            fetchMetadata.put(HTTP_TARGET_URL.getName(), url);
+            metadata.set(HTTP_NUM_REDIRECTS, 0);
+            metadata.set(HTTP_TARGET_URL, url);
         } else {
-            fetchMetadata.put(HTTP_NUM_REDIRECTS.getName(), uriList.size());
+            metadata.set(HTTP_NUM_REDIRECTS, uriList.size());
             try {
                 URI uri = uriList.get(uriList.size() - 1);
                 if (uri != null) {
                     URL u = uri.toURL();
-                    fetchMetadata.put(HTTP_TARGET_URL.getName(), u.toString());
-                    fetchMetadata.put(TikaCoreProperties.RESOURCE_NAME_KEY, 
u.getFile());
+                    metadata.set(HTTP_TARGET_URL, u.toString());
+                    metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, 
u.getFile());
                 }
             } catch (MalformedURLException e) {
                 // swallow
@@ -265,24 +303,24 @@ public class AtlassianJwtFetcher implements Fetcher {
             try {
                 InetAddress inetAddress = ((HttpInetConnection) 
connection).getRemoteAddress();
                 if (inetAddress != null) {
-                    fetchMetadata.put(HTTP_TARGET_IP_ADDRESS.getName(), 
inetAddress.getHostAddress());
+                    metadata.set(HTTP_TARGET_IP_ADDRESS, 
inetAddress.getHostAddress());
                 }
             } catch (ConnectionShutdownException e) {
-                log.warn("connection shutdown while trying to get target URL: 
" + url);
+                LOG.warn("connection shutdown while trying to get target URL: 
" + url);
             }
         }
     }
 
-    private String responseToString(AtlassianJwtFetcherConfig 
atlassianJwtFetcherConfig, HttpResponse response) {
+    private String responseToString(HttpResponse response) {
         if (response.getEntity() == null) {
             return "";
         }
         try (InputStream is = response.getEntity().getContent()) {
             UnsynchronizedByteArrayOutputStream bos = 
UnsynchronizedByteArrayOutputStream.builder().get();
-            IOUtils.copyLarge(is, bos, 0, 
atlassianJwtFetcherConfig.getMaxErrMsgSize());
+            IOUtils.copyLarge(is, bos, 0, config.getMaxErrMsgSize());
             return bos.toString(StandardCharsets.UTF_8);
         } catch (IOException e) {
-            log.warn("IOException trying to read error message", e);
+            LOG.warn("IOException trying to read error message", e);
             return "";
         } catch (NullPointerException e) {
             return "";
@@ -290,62 +328,4 @@ public class AtlassianJwtFetcher implements Fetcher {
             EntityUtils.consumeQuietly(response.getEntity());
         }
     }
-
-    public void initIfNeeded(AtlassianJwtFetcherConfig 
atlassianJwtFetcherConfig) throws TikaConfigException {
-        if (isInit) {
-            return;
-        }
-
-        log.info("AtlassianJwtFetcher initialization:");
-        log.info("Shared Secret: {}", 
atlassianJwtFetcherConfig.getSharedSecret() != null ? "[PRESENT]" : 
"[MISSING]");
-        log.info("Issuer: {}", atlassianJwtFetcherConfig.getIssuer());
-        log.info("Subject: {}", atlassianJwtFetcherConfig.getSubject());
-        log.info("JWT Expires In Seconds: {}", 
atlassianJwtFetcherConfig.getJwtExpiresInSeconds());
-
-        checkInitialization(atlassianJwtFetcherConfig);
-
-        if (atlassianJwtFetcherConfig.getSocketTimeout() != null) {
-            
httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getSocketTimeout());
-        }
-        if (atlassianJwtFetcherConfig.getRequestTimeout() != null) {
-            
httpClientFactory.setRequestTimeout(atlassianJwtFetcherConfig.getRequestTimeout());
-        }
-        if (atlassianJwtFetcherConfig.getConnectTimeout() != null) {
-            
httpClientFactory.setSocketTimeout(atlassianJwtFetcherConfig.getConnectTimeout());
-        }
-        if (atlassianJwtFetcherConfig.getMaxConnections() != null) {
-            
httpClientFactory.setMaxConnections(atlassianJwtFetcherConfig.getMaxConnections());
-        }
-        if (atlassianJwtFetcherConfig.getMaxConnectionsPerRoute() != null) {
-            
httpClientFactory.setMaxConnectionsPerRoute(atlassianJwtFetcherConfig.getMaxConnectionsPerRoute());
-        }
-
-        httpClient = httpClientFactory.build();
-        HttpClientFactory cp = httpClientFactory.copy();
-        cp.setDisableContentCompression(true);
-        noCompressHttpClient = cp.build();
-
-        if (!StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret()) 
&& 
-            !StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) {
-            jwtGenerator = new AtlassianJwtGenerator(
-                atlassianJwtFetcherConfig.getSharedSecret(),
-                atlassianJwtFetcherConfig.getIssuer(),
-                atlassianJwtFetcherConfig.getSubject(),
-                atlassianJwtFetcherConfig.getJwtExpiresInSeconds()
-            );
-        } else {
-            log.warn("JWT generator not created. missing required 
configuration");
-        }
-
-        isInit = true;
-    }
-
-    public void checkInitialization(AtlassianJwtFetcherConfig 
atlassianJwtFetcherConfig) throws TikaConfigException {
-        if (StringUtils.isBlank(atlassianJwtFetcherConfig.getSharedSecret())) {
-            throw new TikaConfigException("Atlassian JWT Fetcher requires a 
shared secret");
-        }
-        if (StringUtils.isBlank(atlassianJwtFetcherConfig.getIssuer())) {
-            throw new TikaConfigException("Atlassian JWT Fetcher requires an 
issuer");
-        }
-    }
 }
diff --git 
a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
 
b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java.backup
similarity index 100%
copy from 
tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java
copy to 
tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java.backup

Reply via email to