Repository: tika Updated Branches: refs/heads/2.x 61532258f -> 81150859b
http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/tika-eval/src/main/resources/log4j.properties b/tika-eval/src/main/resources/log4j.properties new file mode 100644 index 0000000..925f9f2 --- /dev/null +++ b/tika-eval/src/main/resources/log4j.properties @@ -0,0 +1,11 @@ + +log4j.rootLogger=WARN,A1 + +#for debugging +#log4j.rootLogger=TRACE,A1 + +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/tika-eval-comparison-config.xml ---------------------------------------------------------------------- diff --git a/tika-eval/src/main/resources/tika-eval-comparison-config.xml b/tika-eval/src/main/resources/tika-eval-comparison-config.xml index 2c51616..8070672 100644 --- a/tika-eval/src/main/resources/tika-eval-comparison-config.xml +++ b/tika-eval/src/main/resources/tika-eval-comparison-config.xml @@ -28,8 +28,6 @@ > <commandline> - <option opt="c" longOpt="tika-config" hasArg="true" - description="TikaConfig file"/> <option opt="bc" longOpt="batch-config" hasArg="true" description="xml batch config file" required="true"/> <option opt="inputDir" hasArg="true" @@ -47,8 +45,6 @@ process full metadata list ('as_is'=default), take just the first/container document ('first_only'), concatenate all content into the first metadata item ('concatenate_content')"/> - <option opt="includeFilePat" hasArg="true" - description="regex for files to include"/> </commandline> @@ -72,7 +68,7 @@ crawlingInputDir="false" minJsonFileSizeBytes="-1" maxJsonFileSizeBytes="2000000" - commonTokens="resources/commontokens" + commonTokens="resources/common_tokens" /> <!-- reporter and interrupter are optional --> http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/tika-eval-profiler-config.xml ---------------------------------------------------------------------- diff --git a/tika-eval/src/main/resources/tika-eval-profiler-config.xml b/tika-eval/src/main/resources/tika-eval-profiler-config.xml index bd94b25..be7adf4 100644 --- a/tika-eval/src/main/resources/tika-eval-profiler-config.xml +++ b/tika-eval/src/main/resources/tika-eval-profiler-config.xml @@ -27,16 +27,13 @@ timeoutThresholdMillis="300000"> <commandline> - <option opt="c" longOpt="tika-config" hasArg="true" - description="TikaConfig file"/> - <option opt="bc" longOpt="batch-config" hasArg="true" description="xml batch config file" required="true"/> <option opt="inputDir" hasArg="true" description="dir to start crawling"/> <option opt="numConsumers" hasArg="true" description="number of fileConsumers threads"/> - <option opt="extractDir" hasArg="true" + <option opt="extracts" hasArg="true" description="this dir for analysis" required="false"/> <option opt="db" hasArg="true" description="name of db directory or file to which to write results"/> @@ -66,7 +63,7 @@ <consumers builderClass="org.apache.tika.eval.batch.EvalConsumersBuilder" consumerBuilderClass="org.apache.tika.eval.batch.SingleFileConsumerBuilder" - commonTokens="resources/commontokens"/> + commonTokens="resources/common_tokens"/> <!-- reporter and interrupter are optional --> http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java index 72e8008..6d4d4ef 100644 --- a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java +++ b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java @@ -59,7 +59,7 @@ public class SimpleComparerTest extends TikaTest { Paths.get("extractsA"), Paths.get("extractsB"), writer, -1, -1, ExtractReader.ALTER_METADATA_LIST.AS_IS); - AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/commontokens").toPath()); + AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/common_tokens").toPath()); LanguageIDWrapper.loadBuiltInModels(); } http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java index c358149..ff0961c 100644 --- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java +++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java @@ -30,7 +30,7 @@ public class TikaEvalCLITest { public void testBasic() throws Exception { List<String> args = new ArrayList<>(); args.add("Profile"); - args.add("-extractDir"); + args.add("-extracts"); args.add("tika"); args.add("-db"); args.add("mydb"); http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/en ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/common_tokens/en b/tika-eval/src/test/resources/common_tokens/en new file mode 100644 index 0000000..8d442fe --- /dev/null +++ b/tika-eval/src/test/resources/common_tokens/en @@ -0,0 +1,8 @@ +the +of +and +a +or +#quick +brown +fox \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/es ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/common_tokens/es b/tika-eval/src/test/resources/common_tokens/es new file mode 100644 index 0000000..b9bfd03 --- /dev/null +++ b/tika-eval/src/test/resources/common_tokens/es @@ -0,0 +1,10 @@ +la +de +y +una + + +o +rápido +marrón +zorro \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/zh-cn ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/common_tokens/zh-cn b/tika-eval/src/test/resources/common_tokens/zh-cn new file mode 100644 index 0000000..bec617d --- /dev/null +++ b/tika-eval/src/test/resources/common_tokens/zh-cn @@ -0,0 +1,8 @@ +ç +ç +å +ä¸å +è¦ä¹ +å¿« +æ£è² +çç¸ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/zh-tw ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/common_tokens/zh-tw b/tika-eval/src/test/resources/common_tokens/zh-tw new file mode 100644 index 0000000..bc91291 --- /dev/null +++ b/tika-eval/src/test/resources/common_tokens/zh-tw @@ -0,0 +1,8 @@ +ç +ç +å +ä¸ä¸ª +è¦ä¹ +å¿« +æ£è² +çç¸ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/commontokens/en ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/commontokens/en b/tika-eval/src/test/resources/commontokens/en deleted file mode 100644 index 8d442fe..0000000 --- a/tika-eval/src/test/resources/commontokens/en +++ /dev/null @@ -1,8 +0,0 @@ -the -of -and -a -or -#quick -brown -fox \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/commontokens/es ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/commontokens/es b/tika-eval/src/test/resources/commontokens/es deleted file mode 100644 index b9bfd03..0000000 --- a/tika-eval/src/test/resources/commontokens/es +++ /dev/null @@ -1,10 +0,0 @@ -la -de -y -una - - -o -rápido -marrón -zorro \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/commontokens/zh-cn ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/commontokens/zh-cn b/tika-eval/src/test/resources/commontokens/zh-cn deleted file mode 100644 index bec617d..0000000 --- a/tika-eval/src/test/resources/commontokens/zh-cn +++ /dev/null @@ -1,8 +0,0 @@ -ç -ç -å -ä¸å -è¦ä¹ -å¿« -æ£è² -çç¸ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/commontokens/zh-tw ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/commontokens/zh-tw b/tika-eval/src/test/resources/commontokens/zh-tw deleted file mode 100644 index bc91291..0000000 --- a/tika-eval/src/test/resources/commontokens/zh-tw +++ /dev/null @@ -1,8 +0,0 @@ -ç -ç -å -ä¸ä¸ª -è¦ä¹ -å¿« -æ£è² -çç¸ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/log4j.properties b/tika-eval/src/test/resources/log4j.properties deleted file mode 100644 index 925f9f2..0000000 --- a/tika-eval/src/test/resources/log4j.properties +++ /dev/null @@ -1,11 +0,0 @@ - -log4j.rootLogger=WARN,A1 - -#for debugging -#log4j.rootLogger=TRACE,A1 - -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/log4j_process.properties ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/log4j_process.properties b/tika-eval/src/test/resources/log4j_process.properties deleted file mode 100644 index cca8871..0000000 --- a/tika-eval/src/test/resources/log4j_process.properties +++ /dev/null @@ -1,11 +0,0 @@ - -log4j.rootLogger=TRACE,A1 - -#for debugging -#log4j.rootLogger=TRACE,A1 - -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/single-file-profiler-crawl-extract-config.xml ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/single-file-profiler-crawl-extract-config.xml b/tika-eval/src/test/resources/single-file-profiler-crawl-extract-config.xml index 1cb38f9..69e4262 100644 --- a/tika-eval/src/test/resources/single-file-profiler-crawl-extract-config.xml +++ b/tika-eval/src/test/resources/single-file-profiler-crawl-extract-config.xml @@ -36,7 +36,7 @@ description="dir to start crawling"/> <option opt="numConsumers" hasArg="true" description="number of fileConsumers threads"/> - <option opt="extractDir" hasArg="true" + <option opt="extracts" hasArg="true" description="this dir contains the files containing extracted metadata/content" required="false"/> <option opt="db" hasArg="true" description="name of db directory or file to which to write results"/> @@ -61,7 +61,7 @@ <consumers builderClass="org.apache.tika.eval.batch.EvalConsumersBuilder" consumerBuilderClass="org.apache.tika.eval.batch.SingleFileConsumerBuilder" errorLogFile="src/test/resources/test-dirs/batch-logs/batch-process-fatal.xml" - extractDir="src/test/resources/test-dirs/extractsA" + extracts="src/test/resources/test-dirs/extractsA" commonTokens="src/test/resources/common_tokens_short.txt"/> http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/single-file-profiler-crawl-input-config.xml ---------------------------------------------------------------------- diff --git a/tika-eval/src/test/resources/single-file-profiler-crawl-input-config.xml b/tika-eval/src/test/resources/single-file-profiler-crawl-input-config.xml index 29b7f3b..1e37b52 100644 --- a/tika-eval/src/test/resources/single-file-profiler-crawl-input-config.xml +++ b/tika-eval/src/test/resources/single-file-profiler-crawl-input-config.xml @@ -36,7 +36,7 @@ description="dir to start crawling"/> <option opt="numConsumers" hasArg="true" description="number of fileConsumers threads"/> - <option opt="extractDir" hasArg="true" + <option opt="extracts" hasArg="true" description="this dir contains the files containing extracted metadata/content" required="false"/> <option opt="db" hasArg="true" description="name of db directory or file to which to write results"/> @@ -61,7 +61,7 @@ <consumers builderClass="org.apache.tika.eval.batch.EvalConsumersBuilder" consumerBuilderClass="org.apache.tika.eval.batch.SingleFileConsumerBuilder" errorLogFile="src/test/resources/test-dirs/batch-logs/batch-process-fatal.xml" - extractDir="src/test/resources/test-dirs/extractsA" + extracts="src/test/resources/test-dirs/extractsA" inputDir="src/test/resources/test-dirs/raw_input" commonTokens="src/test/resources/common_tokens"/>
