[1/4] tika git commit: TIKA-1332 -- add English/Spanish common tokens, fix logging

2017-02-16 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/master a2d214c71 -> dc2dcd4cc


http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/main/resources/log4j.properties
--
diff --git a/tika-eval/src/main/resources/log4j.properties 
b/tika-eval/src/main/resources/log4j.properties
new file mode 100644
index 000..925f9f2
--- /dev/null
+++ b/tika-eval/src/main/resources/log4j.properties
@@ -0,0 +1,11 @@
+
+log4j.rootLogger=WARN,A1
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
+
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/main/resources/tika-eval-comparison-config.xml
--
diff --git a/tika-eval/src/main/resources/tika-eval-comparison-config.xml 
b/tika-eval/src/main/resources/tika-eval-comparison-config.xml
index 04ef658..88fdd0a 100644
--- a/tika-eval/src/main/resources/tika-eval-comparison-config.xml
+++ b/tika-eval/src/main/resources/tika-eval-comparison-config.xml
@@ -28,8 +28,6 @@
 >
 
 
-
 
 
-
 
 
 
@@ -72,7 +68,7 @@
crawlingInputDir="false"
minJsonFileSizeBytes="-1"
maxJsonFileSizeBytes="200"
-   commonTokens="resources/commontokens"
+   commonTokens="resources/common_tokens"
 />
 
 

http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/main/resources/tika-eval-profiler-config.xml
--
diff --git a/tika-eval/src/main/resources/tika-eval-profiler-config.xml 
b/tika-eval/src/main/resources/tika-eval-profiler-config.xml
index bd94b25..be7adf4 100644
--- a/tika-eval/src/main/resources/tika-eval-profiler-config.xml
+++ b/tika-eval/src/main/resources/tika-eval-profiler-config.xml
@@ -27,16 +27,13 @@
 timeoutThresholdMillis="30">
 
 
-
-
 
 
 
-
 
@@ -66,7 +63,7 @@
 
 
+   commonTokens="resources/common_tokens"/>
 
 
 

http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
--
diff --git 
a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java 
b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
index 72e8008..6d4d4ef 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
@@ -59,7 +59,7 @@ public class SimpleComparerTest extends TikaTest {
 Paths.get("extractsA"), Paths.get("extractsB"),
 writer, -1, -1,
 ExtractReader.ALTER_METADATA_LIST.AS_IS);
-
AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/commontokens").toPath());
+
AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/common_tokens").toPath());
 LanguageIDWrapper.loadBuiltInModels();
 }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
--
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java 
b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
index c358149..ff0961c 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
@@ -30,7 +30,7 @@ public class TikaEvalCLITest {
 public void testBasic() throws Exception {
 List args = new ArrayList<>();
 args.add("Profile");
-args.add("-extractDir");
+args.add("-extracts");
 args.add("tika");
 args.add("-db");
 args.add("mydb");

http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/resources/common_tokens/en
--
diff --git a/tika-eval/src/test/resources/common_tokens/en 
b/tika-eval/src/test/resources/common_tokens/en
new file mode 100644
index 000..8d442fe
--- /dev/null
+++ b/tika-eval/src/test/resources/common_tokens/en
@@ -0,0 +1,8 @@
+the
+of
+and
+a
+or
+#quick
+brown
+fox
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/dc2dcd4c/tika-eval/src/test/resources/common_tokens/es
--
diff --git a/tika-eval/src/test/resources/common_tokens/es 
b/tika-eval/src/test/resources/common_tokens/es
new file mode 100644
index 000..b9bfd03
--- /dev/null
+++ b/tika-

[1/4] tika git commit: TIKA-1332 -- add English Spanish common tokens; fix logging

2017-02-16 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/2.x 61532258f -> 81150859b


http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/log4j.properties
--
diff --git a/tika-eval/src/main/resources/log4j.properties 
b/tika-eval/src/main/resources/log4j.properties
new file mode 100644
index 000..925f9f2
--- /dev/null
+++ b/tika-eval/src/main/resources/log4j.properties
@@ -0,0 +1,11 @@
+
+log4j.rootLogger=WARN,A1
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
+
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/tika-eval-comparison-config.xml
--
diff --git a/tika-eval/src/main/resources/tika-eval-comparison-config.xml 
b/tika-eval/src/main/resources/tika-eval-comparison-config.xml
index 2c51616..8070672 100644
--- a/tika-eval/src/main/resources/tika-eval-comparison-config.xml
+++ b/tika-eval/src/main/resources/tika-eval-comparison-config.xml
@@ -28,8 +28,6 @@
 >
 
 
-
 
 
-
 
 
 
@@ -72,7 +68,7 @@
crawlingInputDir="false"
minJsonFileSizeBytes="-1"
maxJsonFileSizeBytes="200"
-   commonTokens="resources/commontokens"
+   commonTokens="resources/common_tokens"
 />
 
 

http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/main/resources/tika-eval-profiler-config.xml
--
diff --git a/tika-eval/src/main/resources/tika-eval-profiler-config.xml 
b/tika-eval/src/main/resources/tika-eval-profiler-config.xml
index bd94b25..be7adf4 100644
--- a/tika-eval/src/main/resources/tika-eval-profiler-config.xml
+++ b/tika-eval/src/main/resources/tika-eval-profiler-config.xml
@@ -27,16 +27,13 @@
 timeoutThresholdMillis="30">
 
 
-
-
 
 
 
-
 
@@ -66,7 +63,7 @@
 
 
+   commonTokens="resources/common_tokens"/>
 
 
 

http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
--
diff --git 
a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java 
b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
index 72e8008..6d4d4ef 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/SimpleComparerTest.java
@@ -59,7 +59,7 @@ public class SimpleComparerTest extends TikaTest {
 Paths.get("extractsA"), Paths.get("extractsB"),
 writer, -1, -1,
 ExtractReader.ALTER_METADATA_LIST.AS_IS);
-
AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/commontokens").toPath());
+
AbstractProfiler.loadCommonTokens(this.getResourceAsFile("/common_tokens").toPath());
 LanguageIDWrapper.loadBuiltInModels();
 }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
--
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java 
b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
index c358149..ff0961c 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
@@ -30,7 +30,7 @@ public class TikaEvalCLITest {
 public void testBasic() throws Exception {
 List args = new ArrayList<>();
 args.add("Profile");
-args.add("-extractDir");
+args.add("-extracts");
 args.add("tika");
 args.add("-db");
 args.add("mydb");

http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/en
--
diff --git a/tika-eval/src/test/resources/common_tokens/en 
b/tika-eval/src/test/resources/common_tokens/en
new file mode 100644
index 000..8d442fe
--- /dev/null
+++ b/tika-eval/src/test/resources/common_tokens/en
@@ -0,0 +1,8 @@
+the
+of
+and
+a
+or
+#quick
+brown
+fox
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/81150859/tika-eval/src/test/resources/common_tokens/es
--
diff --git a/tika-eval/src/test/resources/common_tokens/es 
b/tika-eval/src/test/resources/common_tokens/es
new file mode 100644
index 000..b9bfd03
--- /dev/null
+++ b/tika-eva