[1/4] tika git commit: TIKA-1332 -- add English/Spanish common tokens, fix logging
Repository: tika Updated Branches: refs/heads/master a2d214c71 -> dc2dcd4cc http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/main/resources/log4j.properties -- diff --git a/tika-eval/src/main/resources/log4j.properties b/tika-eval/src/main/resources/log4j.properties new file mode 100644 index 000..925f9f2 --- /dev/null +++ b/tika-eval/src/main/resources/log4j.properties @@ -0,0 +1,11 @@ + +log4j.rootLogger=WARN,A1 + +#for debugging +#log4j.rootLogger=TRACE,A1 + +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/main/resources/tika-eval-comparison-config.xml -- diff --git a/tika-eval/src/main/resources/tika-eval-comparison-config.xml b/tika-eval/src/main/resources/tika-eval-comparison-config.xml index 04ef658..88fdd0a 100644 --- a/tika-eval/src/main/resources/tika-eval-comparison-config.xml +++ b/tika-eval/src/main/resources/tika-eval-comparison-config.xml @@ -28,8 +28,6 @@ > - - @@ -72,7 +68,7 @@ crawlingInputDir="false" minJsonFileSizeBytes="-1" maxJsonFileSizeBytes="200" - commonTokens="resources/commontokens" + commonTokens="resources/common_tokens" /> http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/main/resources/tika-eval-profiler-config.xml -- diff --git a/tika-eval/src/main/resources/tika-eval-profiler-config.xml b/tika-eval/src/main/resources/tika-eval-profiler-config.xml index bd94b25..be7adf4 100644 --- a/tika-eval/src/main/resources/tika-eval-profiler-config.xml +++ b/tika-eval/src/main/resources/tika-eval-profiler-config.xml @@ -27,16 +27,13 @@ timeoutThresholdMillis="30"> - - - @@ -66,7 +63,7 @@ + commonTokens="resources/common_tokens"/> http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java -- diff --git a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java index 72e8008..6d4d4ef 100644 --- a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java +++ b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java @@ -59,7 +59,7 @@ public class SimpleComparerTest extends TikaTest { Paths.get("extractsA"), Paths.get("extractsB"), writer, -1, -1, ExtractReader.ALTER_METADATA_LIST.AS_IS); - AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/commontokens").toPath()); + AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/common_tokens").toPath()); LanguageIDWrapper.loadBuiltInModels(); } http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java -- diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java index c358149..ff0961c 100644 --- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java +++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java @@ -30,7 +30,7 @@ public class TikaEvalCLITest { public void testBasic() throws Exception { List args = new ArrayList<>(); args.add("Profile"); -args.add("-extractDir"); +args.add("-extracts"); args.add("tika"); args.add("-db"); args.add("mydb"); http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/resources/common_tokens/en -- diff --git a/tika-eval/src/test/resources/common_tokens/en b/tika-eval/src/test/resources/common_tokens/en new file mode 100644 index 000..8d442fe --- /dev/null +++ b/tika-eval/src/test/resources/common_tokens/en @@ -0,0 +1,8 @@ +the +of +and +a +or +#quick +brown +fox \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/resources/common_tokens/es -- diff --git a/tika-eval/src/test/resources/common_tokens/es b/tika-eval/src/test/resources/common_tokens/es new file mode 100644 index 000..b9bfd03 --- /dev/null +++ b/tika-
[1/4] tika git commit: TIKA-1332 -- add English Spanish common tokens; fix logging
Repository: tika Updated Branches: refs/heads/2.x 61532258f -> 81150859b http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/log4j.properties -- diff --git a/tika-eval/src/main/resources/log4j.properties b/tika-eval/src/main/resources/log4j.properties new file mode 100644 index 000..925f9f2 --- /dev/null +++ b/tika-eval/src/main/resources/log4j.properties @@ -0,0 +1,11 @@ + +log4j.rootLogger=WARN,A1 + +#for debugging +#log4j.rootLogger=TRACE,A1 + +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/tika-eval-comparison-config.xml -- diff --git a/tika-eval/src/main/resources/tika-eval-comparison-config.xml b/tika-eval/src/main/resources/tika-eval-comparison-config.xml index 2c51616..8070672 100644 --- a/tika-eval/src/main/resources/tika-eval-comparison-config.xml +++ b/tika-eval/src/main/resources/tika-eval-comparison-config.xml @@ -28,8 +28,6 @@ > - - @@ -72,7 +68,7 @@ crawlingInputDir="false" minJsonFileSizeBytes="-1" maxJsonFileSizeBytes="200" - commonTokens="resources/commontokens" + commonTokens="resources/common_tokens" /> http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/tika-eval-profiler-config.xml -- diff --git a/tika-eval/src/main/resources/tika-eval-profiler-config.xml b/tika-eval/src/main/resources/tika-eval-profiler-config.xml index bd94b25..be7adf4 100644 --- a/tika-eval/src/main/resources/tika-eval-profiler-config.xml +++ b/tika-eval/src/main/resources/tika-eval-profiler-config.xml @@ -27,16 +27,13 @@ timeoutThresholdMillis="30"> - - - @@ -66,7 +63,7 @@ + commonTokens="resources/common_tokens"/> http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java -- diff --git a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java index 72e8008..6d4d4ef 100644 --- a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java +++ b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java @@ -59,7 +59,7 @@ public class SimpleComparerTest extends TikaTest { Paths.get("extractsA"), Paths.get("extractsB"), writer, -1, -1, ExtractReader.ALTER_METADATA_LIST.AS_IS); - AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/commontokens").toPath()); + AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/common_tokens").toPath()); LanguageIDWrapper.loadBuiltInModels(); } http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java -- diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java index c358149..ff0961c 100644 --- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java +++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java @@ -30,7 +30,7 @@ public class TikaEvalCLITest { public void testBasic() throws Exception { List args = new ArrayList<>(); args.add("Profile"); -args.add("-extractDir"); +args.add("-extracts"); args.add("tika"); args.add("-db"); args.add("mydb"); http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/en -- diff --git a/tika-eval/src/test/resources/common_tokens/en b/tika-eval/src/test/resources/common_tokens/en new file mode 100644 index 000..8d442fe --- /dev/null +++ b/tika-eval/src/test/resources/common_tokens/en @@ -0,0 +1,8 @@ +the +of +and +a +or +#quick +brown +fox \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/es -- diff --git a/tika-eval/src/test/resources/common_tokens/es b/tika-eval/src/test/resources/common_tokens/es new file mode 100644 index 000..b9bfd03 --- /dev/null +++ b/tika-eva