This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4193 in repository https://gitbox.apache.org/repos/asf/tika.git
commit d07fb16b132294ced01a9ce64ae7f8263149f3d8 Author: tallison <[email protected]> AuthorDate: Thu Feb 8 14:38:30 2024 -0500 TIKA-4193 -- add num common tokens to TikaEvalMetadataFilter --- .../org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java | 4 ++++ .../apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java | 1 + 2 files changed, 5 insertions(+) diff --git a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java index 0ac65d240..811958af4 100644 --- a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java +++ b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java @@ -48,6 +48,9 @@ public class TikaEvalMetadataFilter extends MetadataFilter { public static Property NUM_ALPHA_TOKENS = Property.externalInteger(TIKA_EVAL_NS + "numAlphaTokens"); + public static Property NUM_COMMON_TOKENS = + Property.externalInteger(TIKA_EVAL_NS + "numCommonTokens"); + public static Property NUM_UNIQUE_ALPHA_TOKENS = Property.externalInteger(TIKA_EVAL_NS + "numUniqueAlphaTokens"); @@ -90,6 +93,7 @@ public class TikaEvalMetadataFilter extends MetadataFilter { CommonTokenResult commonTokenResult = (CommonTokenResult) results.get(CommonTokens.class); metadata.set(NUM_ALPHA_TOKENS, commonTokenResult.getAlphabeticTokens()); metadata.set(NUM_UNIQUE_ALPHA_TOKENS, commonTokenResult.getUniqueAlphabeticTokens()); + metadata.set(NUM_COMMON_TOKENS, commonTokenResult.getCommonTokens()); if (commonTokenResult.getAlphabeticTokens() > 0) { metadata.set(OUT_OF_VOCABULARY, commonTokenResult.getOOV()); } else { diff --git a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java index 1961698b4..f1fd21c21 100644 --- a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java +++ b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java @@ -42,6 +42,7 @@ public class TikaEvalMetadataFilterTest { assertEquals(11, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS)); assertEquals(10, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_ALPHA_TOKENS)); assertEquals(9, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_ALPHA_TOKENS)); + assertEquals(9, (int) metadata.getInt(TikaEvalMetadataFilter.NUM_COMMON_TOKENS)); assertEquals(0.0999,
