This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 79938eabf TIKA-4535 -- limit use of TikaConfig.getDefaultConfig to
where we nee… (#2381)
79938eabf is described below
commit 79938eabfa0138e9acedc91b6efff829386a2f11
Author: Tim Allison <[email protected]>
AuthorDate: Thu Oct 30 17:14:29 2025 -0400
TIKA-4535 -- limit use of TikaConfig.getDefaultConfig to where we nee…
(#2381)
* TIKA-4535 -- limit use of TikaConfig.getDefaultConfig to where we need
the full config.
---
.../java/org/apache/tika/io/FilenameUtils.java | 3 +--
.../tika/eval/app/ExtractComparerRunner.java | 4 ++--
.../apache/tika/eval/app/ExtractProfileRunner.java | 4 ++--
.../org/apache/tika/eval/app/db/MimeBuffer.java | 16 ++++++--------
.../org/apache/tika/eval/app/io/ExtractReader.java | 8 +++----
.../org/apache/tika/parser/isatab/ISATabUtils.java | 25 ++++++++++++----------
6 files changed, 29 insertions(+), 31 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
index d4230d441..234347c25 100644
--- a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
@@ -21,7 +21,6 @@ import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MimeTypeException;
@@ -31,7 +30,7 @@ import org.apache.tika.utils.StringUtils;
public class FilenameUtils {
- private static final MimeTypes MIME_TYPES =
TikaConfig.getDefaultConfig().getMimeRepository();
+ private static final MimeTypes MIME_TYPES =
MimeTypes.getDefaultMimeTypes();
private static final Pattern PROTOCOL_PATTERN =
Pattern.compile("[A-Za-z0-9]{1,10}://+");
/**
* Reserved characters
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
index 8f86ab81e..0ab120c81 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractComparerRunner.java
@@ -46,7 +46,6 @@ import org.apache.commons.cli.help.HelpFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.tika.config.TikaConfig;
import org.apache.tika.eval.app.db.Cols;
import org.apache.tika.eval.app.db.JDBCUtil;
import org.apache.tika.eval.app.db.MimeBuffer;
@@ -55,6 +54,7 @@ import org.apache.tika.eval.app.io.DBWriter;
import org.apache.tika.eval.app.io.ExtractReader;
import org.apache.tika.eval.app.io.ExtractReaderException;
import org.apache.tika.eval.app.io.IDBWriter;
+import org.apache.tika.mime.MimeTypes;
import org.apache.tika.pipes.core.FetchEmitTuple;
import org.apache.tika.pipes.core.pipesiterator.CallablePipesIterator;
import org.apache.tika.pipes.core.pipesiterator.PipesIterator;
@@ -191,7 +191,7 @@ public class ExtractComparerRunner {
jdbcUtil.createTables(builder.getRefTableInfos(),
JDBCUtil.CREATE_TABLE.THROW_EX_IF_EXISTS);
//step 2. create mime buffer
- return new MimeBuffer(jdbcUtil.getConnection(),
builder.getMimeTable(), TikaConfig.getDefaultConfig());
+ return new MimeBuffer(jdbcUtil.getConnection(),
builder.getMimeTable(), MimeTypes.getDefaultMimeTypes());
}
private static void USAGE() throws IOException {
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
index a73a2f579..b618bf0af 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ExtractProfileRunner.java
@@ -46,7 +46,6 @@ import org.apache.commons.cli.help.HelpFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.tika.config.TikaConfig;
import org.apache.tika.eval.app.db.Cols;
import org.apache.tika.eval.app.db.JDBCUtil;
import org.apache.tika.eval.app.db.MimeBuffer;
@@ -55,6 +54,7 @@ import org.apache.tika.eval.app.io.DBWriter;
import org.apache.tika.eval.app.io.ExtractReader;
import org.apache.tika.eval.app.io.ExtractReaderException;
import org.apache.tika.eval.app.io.IDBWriter;
+import org.apache.tika.mime.MimeTypes;
import org.apache.tika.pipes.core.FetchEmitTuple;
import org.apache.tika.pipes.core.pipesiterator.CallablePipesIterator;
import org.apache.tika.pipes.core.pipesiterator.PipesIterator;
@@ -185,7 +185,7 @@ public class ExtractProfileRunner {
jdbcUtil.createTables(builder.getRefTableInfos(),
JDBCUtil.CREATE_TABLE.THROW_EX_IF_EXISTS);
//step 2. create mime buffer
- return new MimeBuffer(jdbcUtil.getConnection(),
builder.getMimeTable(), TikaConfig.getDefaultConfig());
+ return new MimeBuffer(jdbcUtil.getConnection(),
builder.getMimeTable(), MimeTypes.getDefaultMimeTypes());
}
private static void USAGE() throws IOException {
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
index 34cf18061..e03a63a3e 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/db/MimeBuffer.java
@@ -21,7 +21,6 @@ import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Types;
-import org.apache.tika.config.TikaConfig;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
@@ -31,14 +30,14 @@ import org.apache.tika.mime.MimeTypes;
public class MimeBuffer extends AbstractDBBuffer {
private final PreparedStatement st;
- private final TikaConfig config;
+ private final MimeTypes mimeTypes;
private final Connection connection;
- public MimeBuffer(Connection connection, TableInfo mimeTable, TikaConfig
config) throws SQLException {
+ public MimeBuffer(Connection connection, TableInfo mimeTable, MimeTypes
mimeTypes) throws SQLException {
st = connection.prepareStatement(
"insert into " + mimeTable.getName() + "( " +
Cols.MIME_ID.name() + ", " + Cols.MIME_STRING.name() + ", " +
Cols.FILE_EXTENSION.name() + ") values (?,?,?)");
- this.config = config;
+ this.mimeTypes = mimeTypes;
this.connection = connection;
}
@@ -49,7 +48,7 @@ public class MimeBuffer extends AbstractDBBuffer {
st.setInt(1, id);
st.setString(2, value);
try {
- String ext = MimeUtil.getExtension(value, config);
+ String ext = MimeUtil.getExtension(value, mimeTypes);
if (ext == null || ext.isEmpty()) {
st.setNull(3, Types.VARCHAR);
} else {
@@ -92,13 +91,12 @@ public class MimeBuffer extends AbstractDBBuffer {
* don't currently return anything for {@link MimeType#getExtension};
*
* @param contentType string representing a content type, for example:
"application/pdf"
- * @param config config from which to get MimeRepository
+ * @param mimeTypes MimeRepository
* @return extension or empty string
* @throws MimeTypeException thrown if MimeTypes can't parse the
contentType
*/
- public static String getExtension(String contentType, TikaConfig
config) throws MimeTypeException {
- MimeTypes types = config.getMimeRepository();
- MimeType mime = types.forName(contentType);
+ public static String getExtension(String contentType, MimeTypes
mimeTypes) throws MimeTypeException {
+ MimeType mime = mimeTypes.forName(contentType);
return getExtension(mime);
}
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
index bfab8b253..3bac1a849 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/ExtractReader.java
@@ -37,10 +37,10 @@ import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
import org.apache.tika.sax.ToTextContentHandler;
import org.apache.tika.sax.ToXMLContentHandler;
import org.apache.tika.serialization.JsonMetadataList;
@@ -52,7 +52,7 @@ public class ExtractReader {
private final ALTER_METADATA_LIST alterMetadataList;
private final long minExtractLength;
private final long maxExtractLength;
- private TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+ private final MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes();
/**
* Reads full extract, no modification of metadata list, no min or max
extract length checking
@@ -204,9 +204,7 @@ public class ExtractReader {
//but better than nothing.
m.set(TikaCoreProperties.RESOURCE_NAME_KEY,
fileSuffixes.originalFileName);
- MediaType mimeType = tikaConfig
- .getMimeRepository()
- .detect(null, m);
+ MediaType mimeType = mimeTypes.detect(null, m);
if (mimeType != null) {
m.set(Metadata.CONTENT_TYPE, mimeType.toString());
}
diff --git
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
index 507d2da4e..7577b3217 100644
---
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
+++
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
@@ -33,6 +33,8 @@ import org.xml.sax.SAXException;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.AutoDetectReader;
+import org.apache.tika.detect.DefaultEncodingDetector;
+import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -79,12 +81,9 @@ public class ISATabUtils {
throws IOException, TikaException, SAXException {
TikaInputStream tis = TikaInputStream.get(stream);
// Automatically detect the character encoding
- TikaConfig tikaConfig = context.get(TikaConfig.class);
- if (tikaConfig == null) {
- tikaConfig = TikaConfig.getDefaultConfig();
- }
+ EncodingDetector encodingDetector = getEncodingDetector(context);
try (AutoDetectReader reader = new
AutoDetectReader(CloseShieldInputStream.wrap(tis),
- metadata, tikaConfig.getEncodingDetector());
+ metadata, encodingDetector);
CSVParser csvParser =
CSVParser.builder().setReader(reader).setFormat(CSVFormat.TDF).get()) {
Iterator<CSVRecord> iterator = csvParser.iterator();
@@ -118,19 +117,23 @@ public class ISATabUtils {
}
}
+ private static EncodingDetector getEncodingDetector(ParseContext context) {
+ TikaConfig tikaConfig = context.get(TikaConfig.class);
+ if (tikaConfig != null) {
+ return tikaConfig.getEncodingDetector();
+ }
+ return new DefaultEncodingDetector();
+ }
+
public static void parseAssay(InputStream stream, XHTMLContentHandler
xhtml, Metadata metadata,
ParseContext context)
throws IOException, TikaException, SAXException {
TikaInputStream tis = TikaInputStream.get(stream);
// Automatically detect the character encoding
-
- TikaConfig tikaConfig = context.get(TikaConfig.class);
- if (tikaConfig == null) {
- tikaConfig = TikaConfig.getDefaultConfig();
- }
+ EncodingDetector encodingDetector = getEncodingDetector(context);
try (AutoDetectReader reader = new
AutoDetectReader(CloseShieldInputStream.wrap(tis),
- metadata, tikaConfig.getEncodingDetector());
+ metadata, encodingDetector);
CSVParser csvParser =
CSVParser.builder().setReader(reader).setFormat(CSVFormat.TDF).get()) {
xhtml.startElement("table");