This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4535
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 73b2583a5163498ee91116452f6f036f5c93e81e
Author: tallison <[email protected]>
AuthorDate: Thu Oct 30 14:11:13 2025 -0400

    TIKA-4535 -- limit use of TikaConfig.getDefaultConfig to where we need the 
full config.
---
 .../java/org/apache/tika/io/FilenameUtils.java     |  2 +-
 .../tika/eval/app/ExtractComparerRunner.java       |  3 ++-
 .../apache/tika/eval/app/ExtractProfileRunner.java |  3 ++-
 .../org/apache/tika/eval/app/db/MimeBuffer.java    | 15 ++++++-------
 .../org/apache/tika/eval/app/io/ExtractReader.java |  7 +++---
 .../org/apache/tika/parser/isatab/ISATabUtils.java | 25 ++++++++++++----------
 6 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java 
b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
index d4230d441..3cab9abb3 100644
--- a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
@@ -31,7 +31,7 @@ import org.apache.tika.utils.StringUtils;
 
 public class FilenameUtils {
 
-    private static final MimeTypes MIME_TYPES = 
TikaConfig.getDefaultConfig().getMimeRepository();
+    private static final MimeTypes MIME_TYPES = 
MimeTypes.getDefaultMimeTypes();
     private static final Pattern PROTOCOL_PATTERN = 
Pattern.compile("[A-Za-z0-9]{1,10}://+");
     /**
      * Reserved characters
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
index 8f86ab81e..1fde33370 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
@@ -55,6 +55,7 @@ import org.apache.tika.eval.app.io.DBWriter;
 import org.apache.tika.eval.app.io.ExtractReader;
 import org.apache.tika.eval.app.io.ExtractReaderException;
 import org.apache.tika.eval.app.io.IDBWriter;
+import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.pipes.core.FetchEmitTuple;
 import org.apache.tika.pipes.core.pipesiterator.CallablePipesIterator;
 import org.apache.tika.pipes.core.pipesiterator.PipesIterator;
@@ -191,7 +192,7 @@ public class ExtractComparerRunner {
         jdbcUtil.createTables(builder.getRefTableInfos(), 
JDBCUtil.CREATE_TABLE.THROW_EX_IF_EXISTS);
 
         //step 2. create mime buffer
-        return new MimeBuffer(jdbcUtil.getConnection(), 
builder.getMimeTable(), TikaConfig.getDefaultConfig());
+        return new MimeBuffer(jdbcUtil.getConnection(), 
builder.getMimeTable(), MimeTypes.getDefaultMimeTypes());
     }
 
     private static void USAGE() throws IOException {
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
index a73a2f579..92e7c240f 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
@@ -55,6 +55,7 @@ import org.apache.tika.eval.app.io.DBWriter;
 import org.apache.tika.eval.app.io.ExtractReader;
 import org.apache.tika.eval.app.io.ExtractReaderException;
 import org.apache.tika.eval.app.io.IDBWriter;
+import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.pipes.core.FetchEmitTuple;
 import org.apache.tika.pipes.core.pipesiterator.CallablePipesIterator;
 import org.apache.tika.pipes.core.pipesiterator.PipesIterator;
@@ -185,7 +186,7 @@ public class ExtractProfileRunner {
         jdbcUtil.createTables(builder.getRefTableInfos(), 
JDBCUtil.CREATE_TABLE.THROW_EX_IF_EXISTS);
 
         //step 2. create mime buffer
-        return new MimeBuffer(jdbcUtil.getConnection(), 
builder.getMimeTable(), TikaConfig.getDefaultConfig());
+        return new MimeBuffer(jdbcUtil.getConnection(), 
builder.getMimeTable(), MimeTypes.getDefaultMimeTypes());
     }
 
     private static void USAGE() throws IOException {
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
index 34cf18061..e3002ff3d 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
@@ -31,14 +31,14 @@ import org.apache.tika.mime.MimeTypes;
 public class MimeBuffer extends AbstractDBBuffer {
 
     private final PreparedStatement st;
-    private final TikaConfig config;
+    private final MimeTypes mimeTypes;
     private final Connection connection;
 
 
-    public MimeBuffer(Connection connection, TableInfo mimeTable, TikaConfig 
config) throws SQLException {
+    public MimeBuffer(Connection connection, TableInfo mimeTable, MimeTypes 
mimeTypes) throws SQLException {
         st = connection.prepareStatement(
                 "insert into " + mimeTable.getName() + "( " + 
Cols.MIME_ID.name() + ", " + Cols.MIME_STRING.name() + ", " + 
Cols.FILE_EXTENSION.name() + ") values (?,?,?)");
-        this.config = config;
+        this.mimeTypes = mimeTypes;
         this.connection = connection;
     }
 
@@ -49,7 +49,7 @@ public class MimeBuffer extends AbstractDBBuffer {
             st.setInt(1, id);
             st.setString(2, value);
             try {
-                String ext = MimeUtil.getExtension(value, config);
+                String ext = MimeUtil.getExtension(value, mimeTypes);
                 if (ext == null || ext.isEmpty()) {
                     st.setNull(3, Types.VARCHAR);
                 } else {
@@ -92,13 +92,12 @@ public class MimeBuffer extends AbstractDBBuffer {
          * don't currently return anything for {@link MimeType#getExtension};
          *
          * @param contentType string representing a content type, for example: 
"application/pdf"
-         * @param config      config from which to get MimeRepository
+         * @param mimeTypes MimeRepository
          * @return extension or empty string
          * @throws MimeTypeException thrown if MimeTypes can't parse the 
contentType
          */
-        public static String getExtension(String contentType, TikaConfig 
config) throws MimeTypeException {
-            MimeTypes types = config.getMimeRepository();
-            MimeType mime = types.forName(contentType);
+        public static String getExtension(String contentType, MimeTypes 
mimeTypes) throws MimeTypeException {
+            MimeType mime = mimeTypes.forName(contentType);
             return getExtension(mime);
         }
 
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
index bfab8b253..9acdd1c8a 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
@@ -41,6 +41,7 @@ import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.sax.ToTextContentHandler;
 import org.apache.tika.sax.ToXMLContentHandler;
 import org.apache.tika.serialization.JsonMetadataList;
@@ -52,7 +53,7 @@ public class ExtractReader {
     private final ALTER_METADATA_LIST alterMetadataList;
     private final long minExtractLength;
     private final long maxExtractLength;
-    private TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+    private final MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes();
 
     /**
      * Reads full extract, no modification of metadata list, no min or max 
extract length checking
@@ -204,9 +205,7 @@ public class ExtractReader {
         //but better than nothing.
         m.set(TikaCoreProperties.RESOURCE_NAME_KEY, 
fileSuffixes.originalFileName);
 
-        MediaType mimeType = tikaConfig
-                .getMimeRepository()
-                .detect(null, m);
+        MediaType mimeType = mimeTypes.detect(null, m);
         if (mimeType != null) {
             m.set(Metadata.CONTENT_TYPE, mimeType.toString());
         }
diff --git 
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
 
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
index 507d2da4e..7577b3217 100644
--- 
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
+++ 
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
@@ -33,6 +33,8 @@ import org.xml.sax.SAXException;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.AutoDetectReader;
+import org.apache.tika.detect.DefaultEncodingDetector;
+import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -79,12 +81,9 @@ public class ISATabUtils {
             throws IOException, TikaException, SAXException {
         TikaInputStream tis = TikaInputStream.get(stream);
         // Automatically detect the character encoding
-        TikaConfig tikaConfig = context.get(TikaConfig.class);
-        if (tikaConfig == null) {
-            tikaConfig = TikaConfig.getDefaultConfig();
-        }
+        EncodingDetector encodingDetector = getEncodingDetector(context);
         try (AutoDetectReader reader = new 
AutoDetectReader(CloseShieldInputStream.wrap(tis),
-                metadata, tikaConfig.getEncodingDetector());
+                metadata, encodingDetector);
                 CSVParser csvParser = 
CSVParser.builder().setReader(reader).setFormat(CSVFormat.TDF).get()) {
             Iterator<CSVRecord> iterator = csvParser.iterator();
 
@@ -118,19 +117,23 @@ public class ISATabUtils {
         }
     }
 
+    private static EncodingDetector getEncodingDetector(ParseContext context) {
+        TikaConfig tikaConfig = context.get(TikaConfig.class);
+        if (tikaConfig != null) {
+            return tikaConfig.getEncodingDetector();
+        }
+        return new DefaultEncodingDetector();
+    }
+
     public static void parseAssay(InputStream stream, XHTMLContentHandler 
xhtml, Metadata metadata,
                                   ParseContext context)
             throws IOException, TikaException, SAXException {
         TikaInputStream tis = TikaInputStream.get(stream);
 
         // Automatically detect the character encoding
-
-        TikaConfig tikaConfig = context.get(TikaConfig.class);
-        if (tikaConfig == null) {
-            tikaConfig = TikaConfig.getDefaultConfig();
-        }
+        EncodingDetector encodingDetector = getEncodingDetector(context);
         try (AutoDetectReader reader = new 
AutoDetectReader(CloseShieldInputStream.wrap(tis),
-                metadata, tikaConfig.getEncodingDetector());
+                metadata, encodingDetector);
                 CSVParser csvParser = 
CSVParser.builder().setReader(reader).setFormat(CSVFormat.TDF).get()) {
             xhtml.startElement("table");
 

Reply via email to