This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 79750a2 TIKA-3351 -- prevent multiple "parsed by" entries
79750a2 is described below
commit 79750a2f3a603b604be1cc11b61dcb1994eef363
Author: tballison <[email protected]>
AuthorDate: Tue Apr 13 12:24:30 2021 -0400
TIKA-3351 -- prevent multiple "parsed by" entries
---
tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
index dac1b02..10e9243 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
@@ -20,6 +20,7 @@ import static
org.apache.tika.metadata.TikaCoreProperties.EMBEDDED_EXCEPTION;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
@@ -75,7 +76,14 @@ public class ParserUtils {
* or used.
*/
public static void recordParserDetails(Parser parser, Metadata metadata) {
- metadata.add(TikaCoreProperties.TIKA_PARSED_BY,
getParserClassname(parser));
+ String className = getParserClassname(parser);
+ String[] parsedBys =
metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY);
+ if (parsedBys == null || parsedBys.length == 0) {
+ metadata.add(TikaCoreProperties.TIKA_PARSED_BY, className);
+ } else if (! Arrays.stream(parsedBys).anyMatch(className::equals)) {
+ //only add parser once
+ metadata.add(TikaCoreProperties.TIKA_PARSED_BY, className);
+ }
}
/**