This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 79750a2  TIKA-3351 -- prevent multiple "parsed by" entries
79750a2 is described below

commit 79750a2f3a603b604be1cc11b61dcb1994eef363
Author: tballison <[email protected]>
AuthorDate: Tue Apr 13 12:24:30 2021 -0400

    TIKA-3351 -- prevent multiple "parsed by" entries
---
 tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java 
b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
index dac1b02..10e9243 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/ParserUtils.java
@@ -20,6 +20,7 @@ import static 
org.apache.tika.metadata.TikaCoreProperties.EMBEDDED_EXCEPTION;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Arrays;
 
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
@@ -75,7 +76,14 @@ public class ParserUtils {
      * or used.
      */
     public static void recordParserDetails(Parser parser, Metadata metadata) {
-        metadata.add(TikaCoreProperties.TIKA_PARSED_BY, 
getParserClassname(parser));
+        String className = getParserClassname(parser);
+        String[] parsedBys = 
metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY);
+        if (parsedBys == null || parsedBys.length == 0) {
+            metadata.add(TikaCoreProperties.TIKA_PARSED_BY, className);
+        } else if (! Arrays.stream(parsedBys).anyMatch(className::equals)) {
+            //only add parser once
+            metadata.add(TikaCoreProperties.TIKA_PARSED_BY, className);
+        }
     }
 
     /**

Reply via email to