This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4532
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 7a407b88a114a48d13c4da1e1e54a62d5742efcb
Author: tallison <[email protected]>
AuthorDate: Wed Oct 29 14:19:18 2025 -0400

    TIKA-4532 -- remove lang3
---
 tika-bundles/tika-bundle-standard/pom.xml          |  1 -
 .../java/org/apache/tika/utils/StringUtils.java    | 17 +++++++++
 .../org/apache/tika/eval/app/ProfilerBase.java     | 22 ++++++------
 tika-eval/tika-eval-core/pom.xml                   |  4 ---
 .../eval/core/metadata/TikaEvalMetadataFilter.java |  5 ++-
 .../tika/eval/core/textstats/CommonTokens.java     | 13 +++----
 .../core/textstats/CommonTokensBhattacharyya.java  |  7 ++--
 .../eval/core/textstats/CommonTokensCosine.java    |  9 +++--
 .../eval/core/textstats/CommonTokensHellinger.java |  7 ++--
 .../eval/core/textstats/CommonTokensKLDNormed.java |  9 +++--
 .../core/textstats/CommonTokensKLDivergence.java   |  9 +++--
 .../{TokenEntropy.java => LangModelPair.java}      | 22 ++----------
 .../eval/core/textstats/TextProfileSignature.java  |  2 +-
 .../tika/eval/core/textstats/TokenEntropy.java     |  2 +-
 .../tika/eval/core/textstats/TokenLengths.java     |  2 +-
 .../tika/eval/core/textstats/TopNTokens.java       |  2 +-
 .../eval/core/textstats/UnicodeBlockCounter.java   |  3 +-
 .../eval/core/tokens/CommonTokenCountManager.java  |  7 ++--
 .../tika/eval/core/tokens/TokenContraster.java     |  3 +-
 .../apache/tika/eval/core/tokens/TokenCounter.java |  2 +-
 .../apache/tika/eval/core/tokens/TokenCounts.java  |  2 +-
 .../tika/eval/core/util/EvalExceptionUtils.java    |  3 +-
 .../TokenEntropy.java => util/MutableInt.java}     | 41 +++++++++++++---------
 .../tika/eval/core/tokens/TokenCounterTest.java    |  3 +-
 .../apache/tika/pipes/grpc/TikaGrpcServerImpl.java |  4 +--
 tika-parent/pom.xml                                |  6 ----
 .../org/apache/tika/parser/dwg/DWGReadParser.java  |  6 ++--
 .../tika-parser-code-module/pom.xml                |  5 ---
 .../executable/UniversalExecutableParser.java      | 16 +++++----
 .../tika-parser-microsoft-module/pom.xml           |  4 ---
 .../parser/microsoft/onenote/OneNoteDocument.java  | 10 +++---
 .../parser/microsoft/onenote/OneNoteParser.java    |  3 +-
 .../microsoft/onenote/OneNoteTreeWalker.java       | 11 +++---
 .../tika/parser/microsoft/onenote/RoleGuid.java    |  4 +++
 .../streamobj/chunking/ZipFilesChunking.java       |  3 +-
 .../microsoft/onenote/OneNoteParserTest.java       |  4 +--
 .../tika-parser-miscoffice-module/pom.xml          |  4 ---
 .../org/apache/tika/parser/mif/MIFExtractor.java   |  7 ++--
 .../tika-parser-ocr-module/pom.xml                 |  4 ---
 tika-server/tika-server-core/pom.xml               |  4 ---
 tika-server/tika-server-standard/pom.xml           |  1 -
 41 files changed, 128 insertions(+), 165 deletions(-)

diff --git a/tika-bundles/tika-bundle-standard/pom.xml 
b/tika-bundles/tika-bundle-standard/pom.xml
index c6522f276..20f38620d 100644
--- a/tika-bundles/tika-bundle-standard/pom.xml
+++ b/tika-bundles/tika-bundle-standard/pom.xml
@@ -173,7 +173,6 @@
               xmlbeans|
               jackcess|
               jackcess-encrypt|
-              commons-lang3|
               jsoup|
               asm|
               juniversalchardet|
diff --git a/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java 
b/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java
index b09963d46..f88f0eef0 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/StringUtils.java
@@ -44,6 +44,23 @@ public class StringUtils {
         return s == null || s.isBlank();
     }
 
+    public static boolean isNotBlank(final String s) {
+        return ! isBlank(s);
+    }
+
+    public static boolean startsWithIgnoreCase(String str, String prefix) {
+        if (str == null || prefix == null) {
+            return str != null && prefix.length() == 0;
+        }
+
+        if (str.length() < prefix.length()) {
+            return false;
+        }
+
+        return str.regionMatches(true, 0, prefix, 0, prefix.length());
+    }
+
+
     /**
      * <p>Left pad a String with a specified String.</p>
      *
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ProfilerBase.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ProfilerBase.java
index 18e30a5e4..904fadb23 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ProfilerBase.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/ProfilerBase.java
@@ -34,8 +34,6 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -64,6 +62,7 @@ import org.apache.tika.eval.core.tokens.TokenIntPair;
 import org.apache.tika.eval.core.util.ContentTagParser;
 import org.apache.tika.eval.core.util.ContentTags;
 import org.apache.tika.eval.core.util.EvalExceptionUtils;
+import org.apache.tika.eval.core.util.MutableInt;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.language.detect.LanguageResult;
 import org.apache.tika.metadata.Metadata;
@@ -616,15 +615,15 @@ public abstract class ProfilerBase {
     void unicodeBlocks(Map<Class, Object> tokenStats, Map<Cols, String> data) {
 
         Map<String, MutableInt> blocks = (Map<String, MutableInt>) 
tokenStats.get(UnicodeBlockCounter.class);
-        List<Pair<String, Integer>> pairs = new ArrayList<>();
+        List<FeatureCount> pairs = new ArrayList<>();
         for (Map.Entry<String, MutableInt> e : blocks.entrySet()) {
-            pairs.add(Pair.of(e.getKey(), e
+            pairs.add(new FeatureCount(e.getKey(), e
                     .getValue()
                     .intValue()));
         }
-        pairs.sort((o1, o2) -> o2
-                .getValue()
-                .compareTo(o1.getValue()));
+        pairs.sort((o1, o2) ->
+                Integer.compare(o2.count, o1.count)
+        );
         StringBuilder sb = new StringBuilder();
 
         for (int i = 0; i < 20 && i < pairs.size(); i++) {
@@ -633,12 +632,10 @@ public abstract class ProfilerBase {
             }
             sb
                     .append(pairs
-                            .get(i)
-                            .getKey())
+                            .get(i).feature)
                     .append(": ")
                     .append(pairs
-                            .get(i)
-                            .getValue());
+                            .get(i).feature);
         }
         data.put(Cols.UNICODE_CHAR_BLOCKS, sb.toString());
     }
@@ -810,6 +807,9 @@ public abstract class ProfilerBase {
         OOM, TIMEOUT
     }
 
+    private record FeatureCount(String feature, int count) {
+
+    };
 
 }
 
diff --git a/tika-eval/tika-eval-core/pom.xml b/tika-eval/tika-eval-core/pom.xml
index 140442ef3..16cb4f778 100644
--- a/tika-eval/tika-eval-core/pom.xml
+++ b/tika-eval/tika-eval-core/pom.xml
@@ -64,10 +64,6 @@
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-analysis-icu</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.jsoup</groupId>
       <artifactId>jsoup</artifactId>
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
index 811958af4..99b26228e 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
@@ -20,8 +20,6 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang3.StringUtils;
-
 import org.apache.tika.eval.core.langid.LanguageIDWrapper;
 import org.apache.tika.eval.core.textstats.BasicTokenCountStatsCalculator;
 import org.apache.tika.eval.core.textstats.CommonTokens;
@@ -35,6 +33,7 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.utils.StringUtils;
 
 public class TikaEvalMetadataFilter extends MetadataFilter {
 
@@ -75,7 +74,7 @@ public class TikaEvalMetadataFilter extends MetadataFilter {
     @Override
     public void filter(Metadata metadata) throws TikaException {
         String content = metadata.get(TikaCoreProperties.TIKA_CONTENT);
-        if (StringUtils.isAllBlank(content)) {
+        if (StringUtils.isBlank(content)) {
             return;
         }
         calcStats(content, metadata);
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokens.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokens.java
index dbdd4a67d..d9e39761b 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokens.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokens.java
@@ -20,14 +20,11 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.lang3.tuple.Pair;
-
 import org.apache.tika.eval.core.tokens.AlphaIdeographFilterFactory;
 import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
 import org.apache.tika.eval.core.tokens.CommonTokenResult;
-import org.apache.tika.eval.core.tokens.LangModel;
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 import org.apache.tika.language.detect.LanguageResult;
 
 public class CommonTokens implements 
LanguageAwareTokenCountStats<CommonTokenResult> {
@@ -44,10 +41,10 @@ public class CommonTokens implements 
LanguageAwareTokenCountStats<CommonTokenRes
 
     @Override
     public CommonTokenResult calculate(List<LanguageResult> languages, 
TokenCounts tokenCounts) {
-        Pair<String, LangModel> pair =
+        LangModelPair pair =
                 
commonTokenCountManager.getLangTokens(languages.get(0).getLanguage());
-        String actualLangCode = pair.getKey();
-        Set<String> commonTokens = pair.getValue().getTokens();
+
+        Set<String> commonTokens = pair.langModel().getTokens();
         int numUniqueCommonTokens = 0;
         int numCommonTokens = 0;
         int numUniqueAlphabeticTokens = 0;
@@ -65,7 +62,7 @@ public class CommonTokens implements 
LanguageAwareTokenCountStats<CommonTokenRes
             }
 
         }
-        return new CommonTokenResult(actualLangCode, numUniqueCommonTokens, 
numCommonTokens,
+        return new CommonTokenResult(pair.lang(), numUniqueCommonTokens, 
numCommonTokens,
                 numUniqueAlphabeticTokens, numAlphabeticTokens);
     }
 }
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensBhattacharyya.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensBhattacharyya.java
index f23c17252..9fe075857 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensBhattacharyya.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensBhattacharyya.java
@@ -19,13 +19,12 @@ package org.apache.tika.eval.core.textstats;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.math3.util.FastMath;
 
 import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
 import org.apache.tika.eval.core.tokens.LangModel;
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 import org.apache.tika.language.detect.LanguageResult;
 
 public class CommonTokensBhattacharyya implements 
LanguageAwareTokenCountStats<Double> {
@@ -38,9 +37,9 @@ public class CommonTokensBhattacharyya implements 
LanguageAwareTokenCountStats<D
 
     @Override
     public Double calculate(List<LanguageResult> languages, TokenCounts 
tokenCounts) {
-        Pair<String, LangModel> pair =
+        LangModelPair pair =
                 
commonTokenCountManager.getLangTokens(languages.get(0).getLanguage());
-        LangModel model = pair.getValue();
+        LangModel model = pair.langModel();
         double sum = 0.0;
         if (tokenCounts.getTokens().entrySet().size() == 0) {
             return 0.0;
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensCosine.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensCosine.java
index d0b275249..b0d7c3f9a 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensCosine.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensCosine.java
@@ -20,13 +20,12 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.math3.util.FastMath;
 
 import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
 import org.apache.tika.eval.core.tokens.LangModel;
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 import org.apache.tika.language.detect.LanguageResult;
 
 public class CommonTokensCosine implements 
LanguageAwareTokenCountStats<Double> {
@@ -39,11 +38,11 @@ public class CommonTokensCosine implements 
LanguageAwareTokenCountStats<Double>
 
     @Override
     public Double calculate(List<LanguageResult> languages, TokenCounts 
tokenCounts) {
-        Pair<String, LangModel> pair =
+        LangModelPair pair =
                 
commonTokenCountManager.getLangTokens(languages.get(0).getLanguage());
-        LangModel model = pair.getValue();
+        LangModel model = pair.langModel();
         double kl = 0.0;
-        if (tokenCounts.getTokens().entrySet().size() == 0) {
+        if (tokenCounts.getTokens().isEmpty()) {
             return 1.0;
         }
         double numerator = 0.0;
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensHellinger.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensHellinger.java
index cbbcacc46..fb221aacc 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensHellinger.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensHellinger.java
@@ -19,13 +19,12 @@ package org.apache.tika.eval.core.textstats;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.math3.util.FastMath;
 
 import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
 import org.apache.tika.eval.core.tokens.LangModel;
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 import org.apache.tika.language.detect.LanguageResult;
 
 public class CommonTokensHellinger implements 
LanguageAwareTokenCountStats<Double> {
@@ -38,9 +37,9 @@ public class CommonTokensHellinger implements 
LanguageAwareTokenCountStats<Doubl
 
     @Override
     public Double calculate(List<LanguageResult> languages, TokenCounts 
tokenCounts) {
-        Pair<String, LangModel> pair =
+        LangModelPair pair =
                 
commonTokenCountManager.getLangTokens(languages.get(0).getLanguage());
-        LangModel model = pair.getValue();
+        LangModel model = pair.langModel();
         double sum = 0.0;
         if (tokenCounts.getTokens().entrySet().size() == 0) {
             return 0.0;
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDNormed.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDNormed.java
index ec0a99ba7..1922f6603 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDNormed.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDNormed.java
@@ -19,13 +19,12 @@ package org.apache.tika.eval.core.textstats;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.math3.util.FastMath;
 
 import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
 import org.apache.tika.eval.core.tokens.LangModel;
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 import org.apache.tika.language.detect.LanguageResult;
 
 public class CommonTokensKLDNormed implements 
LanguageAwareTokenCountStats<Double> {
@@ -38,11 +37,11 @@ public class CommonTokensKLDNormed implements 
LanguageAwareTokenCountStats<Doubl
 
     @Override
     public Double calculate(List<LanguageResult> languages, TokenCounts 
tokenCounts) {
-        Pair<String, LangModel> pair =
+        LangModelPair pair =
                 
commonTokenCountManager.getLangTokens(languages.get(0).getLanguage());
-        LangModel model = pair.getValue();
+        LangModel model = pair.langModel();
         double kl = 0.0;
-        if (tokenCounts.getTokens().entrySet().size() == 0) {
+        if (tokenCounts.getTokens().isEmpty()) {
             return 1.0;
         }
         double worstCase = 0.0;
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDivergence.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDivergence.java
index 59d31aeb9..8bfb3a034 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDivergence.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/CommonTokensKLDivergence.java
@@ -19,13 +19,12 @@ package org.apache.tika.eval.core.textstats;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.math3.util.FastMath;
 
 import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
 import org.apache.tika.eval.core.tokens.LangModel;
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 import org.apache.tika.language.detect.LanguageResult;
 
 public class CommonTokensKLDivergence implements 
LanguageAwareTokenCountStats<Double> {
@@ -38,11 +37,11 @@ public class CommonTokensKLDivergence implements 
LanguageAwareTokenCountStats<Do
 
     @Override
     public Double calculate(List<LanguageResult> languages, TokenCounts 
tokenCounts) {
-        Pair<String, LangModel> pair =
+        LangModelPair pair =
                 
commonTokenCountManager.getLangTokens(languages.get(0).getLanguage());
-        LangModel model = pair.getValue();
+        LangModel model = pair.langModel();
         double kl = 0.0;
-        if (tokenCounts.getTokens().entrySet().size() == 0) {
+        if (tokenCounts.getTokens().isEmpty()) {
             return 1.0;
         }
         for (Map.Entry<String, MutableInt> e : 
tokenCounts.getTokens().entrySet()) {
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/LangModelPair.java
similarity index 55%
copy from 
tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
copy to 
tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/LangModelPair.java
index ebb2d0a5f..1f7591634 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/LangModelPair.java
@@ -16,25 +16,7 @@
  */
 package org.apache.tika.eval.core.textstats;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.math3.util.FastMath;
+import org.apache.tika.eval.core.tokens.LangModel;
 
-import org.apache.tika.eval.core.tokens.TokenCounts;
-
-public class TokenEntropy implements TokenCountStatsCalculator<Double> {
-
-    @Override
-    public Double calculate(TokenCounts tokenCounts) {
-        double ent = 0.0d;
-        double p = 0.0d;
-        double base = 2.0;
-        double totalTokens = (double) tokenCounts.getTotalTokens();
-        for (MutableInt i : tokenCounts.getTokens().values()) {
-            int termFreq = i.intValue();
-
-            p = (double) termFreq / totalTokens;
-            ent += p * FastMath.log(base, p);
-        }
-        return -1.0 * ent;
-    }
+public record LangModelPair(String lang, LangModel langModel) {
 }
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TextProfileSignature.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TextProfileSignature.java
index 9f726c93a..60ceff5fb 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TextProfileSignature.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TextProfileSignature.java
@@ -23,9 +23,9 @@ import java.util.Map;
 
 import org.apache.commons.codec.binary.Base32;
 import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.lang3.mutable.MutableInt;
 
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 
 /**
  * Copied nearly directly from Apache Nutch:
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
index ebb2d0a5f..fd7b32225 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
@@ -16,10 +16,10 @@
  */
 package org.apache.tika.eval.core.textstats;
 
-import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.commons.math3.util.FastMath;
 
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 
 public class TokenEntropy implements TokenCountStatsCalculator<Double> {
 
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenLengths.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenLengths.java
index c98eb06d7..7884e5530 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenLengths.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenLengths.java
@@ -18,10 +18,10 @@ package org.apache.tika.eval.core.textstats;
 
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
 
 import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.eval.core.util.MutableInt;
 
 public class TokenLengths implements 
TokenCountStatsCalculator<SummaryStatistics> {
 
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TopNTokens.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TopNTokens.java
index faf74dea2..e27d41d69 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TopNTokens.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TopNTokens.java
@@ -18,11 +18,11 @@ package org.apache.tika.eval.core.textstats;
 
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
 
 import org.apache.tika.eval.core.tokens.TokenCounts;
 import org.apache.tika.eval.core.tokens.TokenIntPair;
+import org.apache.tika.eval.core.util.MutableInt;
 
 public class TopNTokens implements TokenCountStatsCalculator<TokenIntPair[]> {
 
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/UnicodeBlockCounter.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/UnicodeBlockCounter.java
index c02852e88..7cd4b99d8 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/UnicodeBlockCounter.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/UnicodeBlockCounter.java
@@ -23,10 +23,11 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.tika.eval.core.util.MutableInt;
+
 public class UnicodeBlockCounter implements StringStatsCalculator<Map<String, 
MutableInt>> {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(UnicodeBlockCounter.class);
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/CommonTokenCountManager.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/CommonTokenCountManager.java
index 696890d04..624af9c3d 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/CommonTokenCountManager.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/CommonTokenCountManager.java
@@ -34,10 +34,11 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.tuple.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.tika.eval.core.textstats.LangModelPair;
+
 public class CommonTokenCountManager {
     private static final Logger LOG = 
LoggerFactory.getLogger(CommonTokenCountManager.class);
 
@@ -90,9 +91,9 @@ public class CommonTokenCountManager {
      * @return pair of actual language code used and a set of common
      * tokens for that language
      */
-    public Pair<String, LangModel> getLangTokens(String lang) {
+    public LangModelPair getLangTokens(String lang) {
         String actualLangCode = getActualLangCode(lang);
-        return Pair.of(actualLangCode, commonTokenMap.get(actualLangCode));
+        return new LangModelPair(actualLangCode, 
commonTokenMap.get(actualLangCode));
     }
 
     //return langcode for lang that you are actually using
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenContraster.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenContraster.java
index b61862511..cca36d712 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenContraster.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenContraster.java
@@ -19,9 +19,10 @@ package org.apache.tika.eval.core.tokens;
 
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.lucene.util.PriorityQueue;
 
+import org.apache.tika.eval.core.util.MutableInt;
+
 /**
  * Computes some corpus contrast statistics.
  * <p>
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounter.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounter.java
index bd9cf9c3a..08b752989 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounter.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounter.java
@@ -21,7 +21,6 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
 import org.apache.commons.math3.util.FastMath;
 import org.apache.lucene.analysis.Analyzer;
@@ -33,6 +32,7 @@ import 
org.apache.tika.eval.core.textstats.TokenCountPriorityQueue;
 import org.apache.tika.eval.core.textstats.TokenEntropy;
 import org.apache.tika.eval.core.textstats.TokenLengths;
 import org.apache.tika.eval.core.textstats.TopNTokens;
+import org.apache.tika.eval.core.util.MutableInt;
 
 /**
  * @deprecated use {@link CompositeTextStatsCalculator}
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounts.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounts.java
index 8b420696c..53b7f4e1a 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounts.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounts.java
@@ -19,7 +19,7 @@ package org.apache.tika.eval.core.tokens;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.commons.lang3.mutable.MutableInt;
+import org.apache.tika.eval.core.util.MutableInt;
 
 public class TokenCounts {
 
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/util/EvalExceptionUtils.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/util/EvalExceptionUtils.java
index 3b9454718..f89179d39 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/util/EvalExceptionUtils.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/util/EvalExceptionUtils.java
@@ -19,9 +19,8 @@ package org.apache.tika.eval.core.util;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.lang3.StringUtils;
-
 import org.apache.tika.utils.ExceptionUtils;
+import org.apache.tika.utils.StringUtils;
 
 public class EvalExceptionUtils {
 
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/util/MutableInt.java
similarity index 54%
copy from 
tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
copy to 
tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/util/MutableInt.java
index ebb2d0a5f..ed18ba9f3 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/textstats/TokenEntropy.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/util/MutableInt.java
@@ -14,27 +14,36 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.eval.core.textstats;
+package org.apache.tika.eval.core.util;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.commons.math3.util.FastMath;
+/**
+ * non-thread safe mutable int
+ */
+public class MutableInt {
 
-import org.apache.tika.eval.core.tokens.TokenCounts;
+    private int val = 0;
+    public MutableInt(int i) {
+        this.val = i;
+    }
 
-public class TokenEntropy implements TokenCountStatsCalculator<Double> {
+    public void increment() {
+        val++;
+    }
+    public int intValue() {
+        return val;
+    }
 
     @Override
-    public Double calculate(TokenCounts tokenCounts) {
-        double ent = 0.0d;
-        double p = 0.0d;
-        double base = 2.0;
-        double totalTokens = (double) tokenCounts.getTotalTokens();
-        for (MutableInt i : tokenCounts.getTokens().values()) {
-            int termFreq = i.intValue();
-
-            p = (double) termFreq / totalTokens;
-            ent += p * FastMath.log(base, p);
+    public final boolean equals(Object o) {
+        if (!(o instanceof MutableInt that)) {
+            return false;
         }
-        return -1.0 * ent;
+
+        return val == that.val;
+    }
+
+    @Override
+    public int hashCode() {
+        return val;
     }
 }
diff --git 
a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/tokens/TokenCounterTest.java
 
b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/tokens/TokenCounterTest.java
index dc687a2b2..87c1608f7 100644
--- 
a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/tokens/TokenCounterTest.java
+++ 
b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/tokens/TokenCounterTest.java
@@ -25,13 +25,14 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Random;
 
-import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
+import org.apache.tika.eval.core.util.MutableInt;
+
 public class TokenCounterTest {
     private final static String FIELD = "f";
     private static AnalyzerManager analyzerManager;
diff --git 
a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java 
b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
index 7f06cb681..c3b312676 100644
--- a/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
+++ b/tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java
@@ -44,7 +44,6 @@ import com.google.rpc.Status;
 import io.grpc.protobuf.StatusProto;
 import io.grpc.stub.StreamObserver;
 import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@@ -79,6 +78,7 @@ import org.apache.tika.pipes.core.fetcher.AbstractFetcher;
 import org.apache.tika.pipes.core.fetcher.FetchKey;
 import org.apache.tika.pipes.core.fetcher.config.AbstractConfig;
 import org.apache.tika.pipes.core.fetcher.config.FetcherConfigContainer;
+import org.apache.tika.utils.StringUtils;
 import org.apache.tika.utils.XMLReaderUtils;
 
 class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
@@ -225,7 +225,7 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
         try {
             ParseContext parseContext = new ParseContext();
             String additionalFetchConfigJson = 
request.getAdditionalFetchConfigJson();
-            if (StringUtils.isNotBlank(additionalFetchConfigJson)) {
+            if (! StringUtils.isBlank(additionalFetchConfigJson)) {
                 // The fetch and parse has the option to specify additional 
configuration
                 AbstractConfig abstractConfig = expiringFetcherStore
                         .getFetcherConfigs()
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 3f00072f7..235adf019 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -339,7 +339,6 @@
     <commons.exec.version>1.5.0</commons.exec.version>
     <commons.fileupload.version>1.6.0</commons.fileupload.version>
     <commons.io.version>2.20.0</commons.io.version>
-    <commons.lang3.version>3.19.0</commons.lang3.version>
     <commons.logging.version>1.3.5</commons.logging.version>
     <commons.math3.version>3.6.1</commons.math3.version>
     <commons.net.version>3.12.0</commons.net.version>
@@ -883,11 +882,6 @@
         <artifactId>commons-exec</artifactId>
         <version>${commons.exec.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-lang3</artifactId>
-        <version>${commons.lang3.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-math3</artifactId>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGReadParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGReadParser.java
index 241aa738e..34b6bace2 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGReadParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGReadParser.java
@@ -41,8 +41,6 @@ import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.core.JsonToken;
 import com.fasterxml.jackson.core.json.JsonReadFeature;
 import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.lang3.Strings;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.ContentHandler;
@@ -57,6 +55,7 @@ import org.apache.tika.sax.XHTMLContentHandler;
 import org.apache.tika.utils.ExceptionUtils;
 import org.apache.tika.utils.FileProcessResult;
 import org.apache.tika.utils.ProcessUtils;
+import org.apache.tika.utils.StringUtils;
 
 
 
@@ -245,7 +244,6 @@ public class DWGReadParser extends AbstractDWGParser {
                     if ("text".equals(nextFieldName)) {
                         String textVal = jsonParser.getText();
                         if (StringUtils.isNotBlank(textVal)) {
-
                             textConsumer.accept(textVal);
                         }
                     } else if ("text_value".equals(nextFieldName)) {
@@ -312,7 +310,7 @@ public class DWGReadParser extends AbstractDWGParser {
                             metadata.set(TikaCoreProperties.TITLE, textVal);
                         } else if ("LASTSAVEDBY".equals(nextFieldName)) {
                             metadata.set(TikaCoreProperties.MODIFIER, textVal);
-                        } else if (!Strings.CI.startsWith(nextFieldName, 
"unknown")) {
+                        } else if (! 
StringUtils.startsWithIgnoreCase(nextFieldName, "unknown")) {
                             metadata.set(nextFieldName, textVal);
                         }
                     }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/pom.xml
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/pom.xml
index 8bc0f9667..107504dd7 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/pom.xml
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/pom.xml
@@ -49,11 +49,6 @@
       <artifactId>asm</artifactId>
       <version>${asm.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
-
     <dependency>
       <groupId>com.epam</groupId>
       <artifactId>parso</artifactId>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/UniversalExecutableParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/UniversalExecutableParser.java
index 20e12a564..595d77b6d 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/UniversalExecutableParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/UniversalExecutableParser.java
@@ -24,7 +24,6 @@ import java.util.Comparator;
 import java.util.Set;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.tuple.Pair;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -111,7 +110,7 @@ public class UniversalExecutableParser implements Parser {
         long archsSize = (long) archsCount * archStructSize;
 
         var unsortedOffsets = false;
-        var offsetAndSizePerArch = new Pair[archsCount];
+        var offsetAndSizePerArch = new OffsetSize[archsCount];
         for (int archIndex = 0; archIndex < archsCount; archIndex++) {
             IOUtils.skipFully(stream, 8);
 
@@ -121,7 +120,7 @@ public class UniversalExecutableParser implements Parser {
             if (offset < 4 + 4 + archsSize) {
                 throw new TikaException("Invalid offset: " + offset);
             }
-            if (!unsortedOffsets && archIndex > 0 && offset < (long) 
offsetAndSizePerArch[archIndex - 1].getLeft()) {
+            if (!unsortedOffsets && archIndex > 0 && offset < (long) 
offsetAndSizePerArch[archIndex - 1].offset) {
                 unsortedOffsets = true;
             }
             long size = is64
@@ -131,7 +130,7 @@ public class UniversalExecutableParser implements Parser {
             if (size < 0 || size > MAX_ARCH_SIZE) {
                 throw new TikaException("Arch size=" + size + " must be > 0 
and < " + MAX_ARCH_SIZE);
             }
-            offsetAndSizePerArch[archIndex] = Pair.of(offset, size);
+            offsetAndSizePerArch[archIndex] = new OffsetSize(offset, size);
 
             if (is64) {
                 IOUtils.skipFully(stream, 8);
@@ -142,14 +141,14 @@ public class UniversalExecutableParser implements Parser {
             currentOffset += archStructSize;
         }
         if (unsortedOffsets) {
-            Arrays.sort(offsetAndSizePerArch, Comparator.comparingLong(entry 
-> (long) entry.getLeft()));
+            Arrays.sort(offsetAndSizePerArch, Comparator.comparingLong(entry 
-> (long) entry.offset));
         }
 
         for (int archIndex = 0; archIndex < archsCount; archIndex++) {
-            long skipUntilStart = 
(long)offsetAndSizePerArch[archIndex].getLeft() - currentOffset;
+            long skipUntilStart = (long)offsetAndSizePerArch[archIndex].offset 
- currentOffset;
             IOUtils.skipFully(stream, skipUntilStart);
             currentOffset += skipUntilStart;
-            long sz = (long)offsetAndSizePerArch[archIndex].getRight();
+            long sz = (long)offsetAndSizePerArch[archIndex].size;
             //we bounds checked this above.
             byte[] perArchMachO = new byte[(int)sz];
             IOUtils.readFully(stream, perArchMachO);
@@ -163,4 +162,7 @@ public class UniversalExecutableParser implements Parser {
         }
     }
 
+    private record OffsetSize(long offset, long size) {
+
+    }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml
index 97a19a9a7..db04aed4b 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml
@@ -71,10 +71,6 @@
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.poi</groupId>
       <artifactId>poi</artifactId>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
index a949b0762..26e2f32ef 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteDocument.java
@@ -22,8 +22,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.lang3.tuple.Pair;
-
 class OneNoteDocument {
     OneNoteHeader header;
     List<ExtendedGUID> revisionListOrder = new ArrayList<>();
@@ -32,7 +30,7 @@ class OneNoteDocument {
     Map<ExtendedGUID, FileChunkReference> guidToRef = new HashMap<>();
     Map<ExtendedGUID, FileNodePtr> guidToObject = new HashMap<>();
 
-    Map<ExtendedGUID, Pair<Long, ExtendedGUID>> revisionRoleMap = new 
HashMap<>();
+    Map<ExtendedGUID, RoleGuid> revisionRoleMap = new HashMap<>();
     ExtendedGUID currentRevision = ExtendedGUID.nil();
     FileNodeList root = new FileNodeList();
 
@@ -63,7 +61,7 @@ class OneNoteDocument {
 
     public void registerAdditionalRevisionRole(ExtendedGUID gosid, long 
revisionRole,
                                                ExtendedGUID gctxid) {
-        revisionRoleMap.put(gosid, Pair.of(revisionRole, gctxid));
+        revisionRoleMap.put(gosid, new RoleGuid(revisionRole, gctxid));
     }
 
     public List<ExtendedGUID> getRevisionListOrder() {
@@ -112,12 +110,12 @@ class OneNoteDocument {
         return this;
     }
 
-    public Map<ExtendedGUID, Pair<Long, ExtendedGUID>> getRevisionRoleMap() {
+    public Map<ExtendedGUID, RoleGuid> getRevisionRoleMap() {
         return revisionRoleMap;
     }
 
     public OneNoteDocument setRevisionRoleMap(
-            Map<ExtendedGUID, Pair<Long, ExtendedGUID>> revisionRoleMap) {
+            Map<ExtendedGUID, RoleGuid> revisionRoleMap) {
         this.revisionRoleMap = revisionRoleMap;
         return this;
     }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
index 55913515f..658d4983e 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java
@@ -27,7 +27,6 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.tuple.Pair;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -127,7 +126,7 @@ public class OneNoteParser implements Parser {
                 metadata.set(ONE_NOTE_PREFIX + "rgbPlaceholder",
                         "0x" + 
Long.toHexString(oneNoteDocument.header.rgbPlaceholder));
 
-                Pair<Long, ExtendedGUID> roleAndContext = Pair.of(1L, 
ExtendedGUID.nil());
+               RoleGuid roleAndContext = new RoleGuid(1L, ExtendedGUID.nil());
                 OneNoteTreeWalker oneNoteTreeWalker =
                         new OneNoteTreeWalker(options, oneNoteDocument, 
oneNoteDirectFileResource,
                                 xhtml, metadata, context, roleAndContext);
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
index 82297e11d..9d4f31043 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java
@@ -35,7 +35,6 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.tuple.Pair;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
 
@@ -92,7 +91,7 @@ class OneNoteTreeWalker {
     private final OneNoteDocument oneNoteDocument;
     private final OneNoteDirectFileResource dif;
     private final XHTMLContentHandler xhtml;
-    private final Pair<Long, ExtendedGUID> roleAndContext;
+    private final RoleGuid roleAndContext;
     private Instant lastModifiedTimestamp = Instant.MIN;
     private long creationTimestamp = Long.MAX_VALUE;
     private long lastModified = Long.MIN_VALUE;
@@ -102,7 +101,7 @@ class OneNoteTreeWalker {
     /**
      * Contains pairs of {Offset,Length} that we have added to the text stream 
already.
      */
-    private final Set<Pair<Long, Integer>> textAlreadyFetched = new 
HashSet<>();
+    private final Set<RoleGuid> textAlreadyFetched = new HashSet<>();
 
     /**
      * Create a one tree walker.
@@ -119,7 +118,7 @@ class OneNoteTreeWalker {
     public OneNoteTreeWalker(OneNoteTreeWalkerOptions options, OneNoteDocument 
oneNoteDocument,
                              OneNoteDirectFileResource dif, 
XHTMLContentHandler xhtml,
                              Metadata parentMetadata, ParseContext 
parseContext,
-                             Pair<Long, ExtendedGUID> roleAndContext) {
+                             RoleGuid roleAndContext) {
         this.options = options;
         this.oneNoteDocument = oneNoteDocument;
         this.dif = dif;
@@ -176,8 +175,8 @@ class OneNoteTreeWalker {
      * @param revisionRole The revision role Long,GUID pair.
      * @return True if exists, false if not.
      */
-    private boolean hasRevisionRole(ExtendedGUID rid, Pair<Long, ExtendedGUID> 
revisionRole) {
-        Pair<Long, ExtendedGUID> where = 
oneNoteDocument.revisionRoleMap.get(rid);
+    private boolean hasRevisionRole(ExtendedGUID rid, RoleGuid revisionRole) {
+        RoleGuid where = oneNoteDocument.revisionRoleMap.get(rid);
         return where != null && where.equals(revisionRole);
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/RoleGuid.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/RoleGuid.java
new file mode 100644
index 000000000..9afae58c2
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/RoleGuid.java
@@ -0,0 +1,4 @@
+package org.apache.tika.parser.microsoft.onenote;
+
+public record RoleGuid(Long role, ExtendedGUID extendedGUID) {
+}
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/chunking/ZipFilesChunking.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/chunking/ZipFilesChunking.java
index 19eb2a06d..560cee6b6 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/chunking/ZipFilesChunking.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/streamobj/chunking/ZipFilesChunking.java
@@ -24,7 +24,6 @@ import java.util.List;
 import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.lang3.NotImplementedException;
 
 import org.apache.tika.exception.TikaException;
 import 
org.apache.tika.parser.microsoft.onenote.fsshttpb.streamobj.LeafNodeObject;
@@ -218,6 +217,6 @@ public class ZipFilesChunking extends AbstractChunking {
      */
     private SignatureObject getSubChunkSignature() {
         // In current, it has no idea about how to compute the signature for 
sub chunk.
-        throw new NotImplementedException("The Get sub chunk signature method 
is not implemented.");
+        throw new IllegalStateException("The Get sub chunk signature method is 
not implemented.");
     }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
index bab0a6fbc..f9c887316 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
@@ -25,7 +25,6 @@ import java.time.Instant;
 import java.util.Arrays;
 import java.util.List;
 
-import org.apache.commons.lang3.StringUtils;
 import org.junit.jupiter.api.Test;
 
 import org.apache.tika.TikaTest;
@@ -285,8 +284,7 @@ public class OneNoteParserTest extends TikaTest {
     public void testDupeText() throws Exception {
         Metadata metadata = new Metadata();
         String txt = getText("test-tika-3970-dupetext.one", metadata);
-
-        assertEquals(1, StringUtils.countMatches(txt, "Sunday morning"));
+        assertContainsCount("Sunday morning", txt, 1);
     }
 
     /**
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/pom.xml
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/pom.xml
index a79c83abd..f1a254c96 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/pom.xml
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/pom.xml
@@ -45,10 +45,6 @@
       <artifactId>tika-parser-xml-module</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-collections4</artifactId>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/mif/MIFExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/mif/MIFExtractor.java
index 457895277..b7d40c511 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/mif/MIFExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/mif/MIFExtractor.java
@@ -24,15 +24,13 @@ import java.util.Stack;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.lang3.StringEscapeUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.lang3.Strings;
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
 
 import org.apache.tika.detect.AutoDetectReader;
+import org.apache.tika.utils.StringUtils;
 
 /**
  * Helper Class to Parse and Extract Adobe MIF Files.
@@ -124,8 +122,7 @@ public class MIFExtractor {
         handler.startElement(StringUtils.EMPTY, tag.getName(), tag.getName(), 
attrs);
         String value = Strings.CS.removeStart(tag.getValue(), START_TAG_VALUE);
         value = Strings.CS.removeEnd(value, END_TAG_VALUE);
-        String content = StringEscapeUtils.escapeXml(value);
-        handler.characters(content.toCharArray(), 0, content.length());
+        handler.characters(content.toCharArray(), 0, value.length());
         handler.endElement(StringUtils.EMPTY, tag.getName(), tag.getName());
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/pom.xml
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/pom.xml
index 709d3a8ac..7af98521b 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/pom.xml
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/pom.xml
@@ -30,10 +30,6 @@
   <name>Apache Tika OCR parser module</name>
 
   <dependencies>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-exec</artifactId>
diff --git a/tika-server/tika-server-core/pom.xml 
b/tika-server/tika-server-core/pom.xml
index 8dc3c5a82..18a68a6d9 100644
--- a/tika-server/tika-server-core/pom.xml
+++ b/tika-server/tika-server-core/pom.xml
@@ -95,10 +95,6 @@
       <groupId>org.apache.cxf</groupId>
       <artifactId>cxf-rt-rs-client</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <!-- logging -->
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/tika-server/tika-server-standard/pom.xml 
b/tika-server/tika-server-standard/pom.xml
index 0edd7d1aa..4d877dfe8 100644
--- a/tika-server/tika-server-standard/pom.xml
+++ b/tika-server/tika-server-standard/pom.xml
@@ -168,7 +168,6 @@
                   <exclude>org.slf4j:slf4j-api:jar:</exclude>
                   <exclude>commons-logging:commons-logging:jar:</exclude>
                   <exclude>org.apache.cxf:cxf-rt-rs-client:jar:</exclude>
-                  <exclude>org.apache.commons:commons-lang3:jar:</exclude>
                   <exclude>commons-cli:commons-cli:jar:</exclude>
                   
<exclude>org.apache.cxf:cxf-rt-rs-security-cors:jar:</exclude>
                   <exclude>org.eclipse.jetty:jetty-io:jar:</exclude>

Reply via email to