This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 9be81d1  TIKA-3317 -- small cleanups
9be81d1 is described below

commit 9be81d17944043ad0003e17fd73b39432995d566
Author: tallison <[email protected]>
AuthorDate: Mon May 17 13:19:44 2021 -0400

    TIKA-3317 -- small cleanups
---
 .../java/org/apache/tika/pipes/PipesClient.java    |   2 +-
 .../java/org/apache/tika/pipes/PipesServer.java    |   2 +-
 tika-pipes/tika-emitters/tika-emitter-solr/pom.xml |   8 ++
 .../tika/pipes/emitter/solr/SolrEmitter.java       |  91 +++++++++---------
 .../src/test/resources/log4j.properties            |  20 ++--
 tika-pipes/tika-httpclient-commons/pom.xml         |   6 ++
 tika-pipes/tika-pipes-integration-tests/pom.xml    |  32 +++++++
 .../tika/pipes/solrtest/TikaPipesSolr6Test.java    |   6 +-
 .../tika/pipes/solrtest/TikaPipesSolr7Test.java    |   6 +-
 .../tika/pipes/solrtest/TikaPipesSolr8Test.java    |   6 +-
 .../tika/pipes/solrtest/TikaPipesSolrTestBase.java | 104 +++++++++++----------
 .../{log4j.properties => log4j2.properties}        |  20 ++--
 .../src/test/resources/tika-async-log4j.properties |  13 ---
 .../test/resources/tika-async-log4j2.properties}   |  21 +++--
 .../tika-pipes-iterator-solr/pom.xml               |  50 +++++-----
 .../tika/pipes/solrtest/SolrPipesIterator.java     |  41 ++++----
 16 files changed, 239 insertions(+), 189 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java 
b/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
index 2db98b2..1b2eecd 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesClient.java
@@ -343,7 +343,7 @@ public class PipesClient implements Closeable {
                 } else if (line.startsWith("error ")) {
                     SERVER_LOG.error(line.substring(6));
                 } else {
-                    SERVER_LOG.error(line);
+                    SERVER_LOG.debug(line);
                 }
                 try {
                     line = reader.readLine();
diff --git a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java 
b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
index e251dc8..6a5f6c3 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java
@@ -184,7 +184,7 @@ public class PipesServer implements Runnable {
     }
 
     private void err(Throwable t) {
-        System.err.println("err " + 
ExceptionUtils.getStackTrace(t).replaceAll("[\r\n]", " "));
+        System.err.println("error " + 
ExceptionUtils.getStackTrace(t).replaceAll("[\r\n]", " "));
         System.err.flush();
     }
 
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml 
b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
index 6d0d485..6fe2304 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
@@ -71,12 +71,20 @@
         </exclusion>
         <exclusion>
           <groupId>org.eclipse.jetty</groupId>
+          <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
           <artifactId>jetty-http</artifactId>
         </exclusion>
         <exclusion>
           <groupId>log4j</groupId>
           <artifactId>log4j</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.httpcomponents</groupId>
+          <artifactId>httpclient</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
index 78eca0d..534e899 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
+++ 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
@@ -31,6 +31,9 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.impl.LBHttpSolrClient;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.common.SolrInputDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.tika.client.HttpClientFactory;
 import org.apache.tika.config.Field;
 import org.apache.tika.config.Initializable;
@@ -41,26 +44,12 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.pipes.emitter.AbstractEmitter;
 import org.apache.tika.pipes.emitter.EmitData;
 import org.apache.tika.pipes.emitter.TikaEmitterException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-public class SolrEmitter extends AbstractEmitter implements Initializable {
-
-    public enum AttachmentStrategy {
-        SKIP,
-        CONCATENATE_CONTENT,
-        PARENT_CHILD,
-        //anything else?
-    }
 
-    public enum UpdateStrategy {
-        ADD,
-        UPDATE_MUST_EXIST,
-        UPDATE_MUST_NOT_EXIST,
-    }
+public class SolrEmitter extends AbstractEmitter implements Initializable {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(SolrEmitter.class);
-
+    private final HttpClientFactory httpClientFactory;
     private AttachmentStrategy attachmentStrategy = 
AttachmentStrategy.PARENT_CHILD;
     private UpdateStrategy updateStrategy = UpdateStrategy.ADD;
     private String solrCollection;
@@ -75,16 +64,14 @@ public class SolrEmitter extends AbstractEmitter implements 
Initializable {
     private int commitWithin = 1000;
     private int connectionTimeout = 10000;
     private int socketTimeout = 60000;
-    private final HttpClientFactory httpClientFactory;
     private SolrClient solrClient;
-
     public SolrEmitter() throws TikaConfigException {
         httpClientFactory = new HttpClientFactory();
     }
 
     @Override
-    public void emit(String emitKey, List<Metadata> metadataList) throws 
IOException,
-            TikaEmitterException {
+    public void emit(String emitKey, List<Metadata> metadataList)
+            throws IOException, TikaEmitterException {
         if (metadataList == null || metadataList.size() == 0) {
             LOG.warn("metadataList is null or empty");
             return;
@@ -104,8 +91,7 @@ public class SolrEmitter extends AbstractEmitter implements 
Initializable {
         } else if (updateStrategy == UpdateStrategy.UPDATE_MUST_NOT_EXIST) {
             solrInputDocument.setField("_version_", -1);
         }
-        if (attachmentStrategy == AttachmentStrategy.SKIP ||
-                metadataList.size() == 1) {
+        if (attachmentStrategy == AttachmentStrategy.SKIP || 
metadataList.size() == 1) {
             addMetadataToSolrInputDocument(metadataList.get(0), 
solrInputDocument, updateStrategy);
         } else if (attachmentStrategy == 
AttachmentStrategy.CONCATENATE_CONTENT) {
             //this only handles text for now, not xhtml
@@ -128,8 +114,8 @@ public class SolrEmitter extends AbstractEmitter implements 
Initializable {
                 addMetadataToSolrInputDocument(m, childSolrInputDocument, 
updateStrategy);
             }
         } else {
-            throw new IllegalArgumentException("I don't yet support this 
attachment strategy: "
-                    + attachmentStrategy);
+            throw new IllegalArgumentException(
+                    "I don't yet support this attachment strategy: " + 
attachmentStrategy);
         }
         docsToUpdate.add(solrInputDocument);
     }
@@ -142,12 +128,14 @@ public class SolrEmitter extends AbstractEmitter 
implements Initializable {
         }
         List<SolrInputDocument> docsToUpdate = new ArrayList<>();
         for (EmitData d : batch) {
-            addMetadataAsSolrInputDocuments(d.getEmitKey().getEmitKey(), 
d.getMetadataList(), docsToUpdate);
+            addMetadataAsSolrInputDocuments(d.getEmitKey().getEmitKey(), 
d.getMetadataList(),
+                    docsToUpdate);
         }
         emitSolrBatch(docsToUpdate);
     }
 
-    private void emitSolrBatch(List<SolrInputDocument> docsToUpdate) throws 
IOException, TikaEmitterException {
+    private void emitSolrBatch(List<SolrInputDocument> docsToUpdate)
+            throws IOException, TikaEmitterException {
         if (LOG.isDebugEnabled()) {
             LOG.debug("Emitting solr doc batch: {}", docsToUpdate);
         }
@@ -164,7 +152,8 @@ public class SolrEmitter extends AbstractEmitter implements 
Initializable {
         }
     }
 
-    private void addMetadataToSolrInputDocument(Metadata metadata, 
SolrInputDocument solrInputDocument,
+    private void addMetadataToSolrInputDocument(Metadata metadata,
+                                                SolrInputDocument 
solrInputDocument,
                                                 UpdateStrategy updateStrategy) 
{
         for (String n : metadata.names()) {
             String[] vals = metadata.getValues(n);
@@ -175,16 +164,18 @@ public class SolrEmitter extends AbstractEmitter 
implements Initializable {
                     solrInputDocument.setField(n, vals[0]);
                 } else {
                     solrInputDocument.setField(n, new HashMap<String, 
String>() {{
-                        put("set", vals[0]);
-                    }});
+                            put("set", vals[0]);
+                        }
+                    });
                 }
             } else if (vals.length > 1) {
                 if (updateStrategy == UpdateStrategy.ADD) {
                     solrInputDocument.setField(n, vals);
                 } else {
                     solrInputDocument.setField(n, new HashMap<String, 
String[]>() {{
-                        put("set", vals);
-                    }});
+                            put("set", vals);
+                        }
+                    });
                 }
             }
         }
@@ -220,6 +211,10 @@ public class SolrEmitter extends AbstractEmitter 
implements Initializable {
         this.socketTimeout = socketTimeout;
     }
 
+    public String getContentField() {
+        return contentField;
+    }
+
     /**
      * This is the field _after_ metadata mappings have been applied
      * that contains the "content" for each metadata object.
@@ -234,8 +229,8 @@ public class SolrEmitter extends AbstractEmitter implements 
Initializable {
         this.contentField = contentField;
     }
 
-    public String getContentField() {
-        return contentField;
+    public int getCommitWithin() {
+        return commitWithin;
     }
 
     @Field
@@ -243,10 +238,6 @@ public class SolrEmitter extends AbstractEmitter 
implements Initializable {
         this.commitWithin = commitWithin;
     }
 
-    public int getCommitWithin() {
-        return commitWithin;
-    }
-
     /**
      * Specify the field in the first Metadata that should be
      * used as the id field for the document.
@@ -308,21 +299,18 @@ public class SolrEmitter extends AbstractEmitter 
implements Initializable {
     public void initialize(Map<String, Param> params) throws 
TikaConfigException {
         if (solrUrls == null || solrUrls.isEmpty()) {
             solrClient = new CloudSolrClient.Builder(solrZkHosts, 
Optional.ofNullable(solrZkChroot))
-                    .withConnectionTimeout(connectionTimeout)
-                    .withSocketTimeout(socketTimeout)
-                    .withHttpClient(httpClientFactory.build())
-                    .build();
+                    
.withConnectionTimeout(connectionTimeout).withSocketTimeout(socketTimeout)
+                    .withHttpClient(httpClientFactory.build()).build();
         } else {
-            solrClient = new LBHttpSolrClient.Builder()
-                    .withConnectionTimeout(connectionTimeout)
-                    .withSocketTimeout(socketTimeout)
-                    .withHttpClient(httpClientFactory.build())
-                    .withBaseSolrUrls(solrUrls.toArray(new String[] 
{})).build();
+            solrClient = new 
LBHttpSolrClient.Builder().withConnectionTimeout(connectionTimeout)
+                    
.withSocketTimeout(socketTimeout).withHttpClient(httpClientFactory.build())
+                    .withBaseSolrUrls(solrUrls.toArray(new 
String[]{})).build();
         }
     }
 
     @Override
-    public void checkInitialization(InitializableProblemHandler 
problemHandler) throws TikaConfigException {
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
         mustNotBeEmpty("solrCollection", this.solrCollection);
         mustNotBeEmpty("urlFieldName", this.idField);
         if ((this.solrUrls == null || this.solrUrls.isEmpty()) &&
@@ -336,4 +324,13 @@ public class SolrEmitter extends AbstractEmitter 
implements Initializable {
                     "expected either param solrUrls or param solrZkHosts, but 
both were specified");
         }
     }
+
+    public enum AttachmentStrategy {
+        SKIP, CONCATENATE_CONTENT, PARENT_CHILD,
+        //anything else?
+    }
+
+    public enum UpdateStrategy {
+        ADD, UPDATE_MUST_EXIST, UPDATE_MUST_NOT_EXIST,
+    }
 }
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
index 11e5887..d17a4a1 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
+++ 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
@@ -13,10 +13,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#info,debug, error,fatal ...
-log4j.rootLogger=debug,stderr
-#console
-log4j.appender.stderr=org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
-log4j.appender.stderr.Target=System.err
-log4j.appender.stderr.layout.ConversionPattern=%-5p %m%n
+status=info
+name=PropertiesConfig
+filters=threshold
+filter.threshold.type=ThresholdFilter
+filter.threshold.level=info
+appenders=console
+appender.console.type=Console
+appender.console.name=STDERR
+appender.console.layout.type=PatternLayout
+appender.console.layout.pattern=%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n
+rootLogger.level=info
+rootLogger.appenderRefs=stderr
+rootLogger.appenderRef.stderr.ref=STDERR
diff --git a/tika-pipes/tika-httpclient-commons/pom.xml 
b/tika-pipes/tika-httpclient-commons/pom.xml
index 3bd4409..80af2b8 100644
--- a/tika-pipes/tika-httpclient-commons/pom.xml
+++ b/tika-pipes/tika-httpclient-commons/pom.xml
@@ -41,6 +41,12 @@
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpclient</artifactId>
       <version>${httpcomponents.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-codec</groupId>
+          <artifactId>commons-codec</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
   </dependencies>
 
diff --git a/tika-pipes/tika-pipes-integration-tests/pom.xml 
b/tika-pipes/tika-pipes-integration-tests/pom.xml
index b4c4036..caa6214 100644
--- a/tika-pipes/tika-pipes-integration-tests/pom.xml
+++ b/tika-pipes/tika-pipes-integration-tests/pom.xml
@@ -76,6 +76,16 @@
       <artifactId>testcontainers</artifactId>
       <version>${test.containers.version}</version>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>net.java.dev.jna</groupId>
+          <artifactId>jna</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-annotations</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
@@ -99,6 +109,28 @@
       <groupId>org.apache.solr</groupId>
       <artifactId>solr-solrj</artifactId>
       <version>${solrj.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>jetty-io</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>jetty-http</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.httpcomponents</groupId>
+          <artifactId>httpclient</artifactId>
+        </exclusion>
+      </exclusions>
       <scope>test</scope>
     </dependency>
   </dependencies>
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr6Test.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr6Test.java
index 12e9ac7..5066fee 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr6Test.java
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr6Test.java
@@ -25,9 +25,9 @@ import org.testcontainers.utility.DockerImageName;
 public class TikaPipesSolr6Test extends TikaPipesSolrTestBase {
 
     @Rule
-    public GenericContainer<?> solr6 = new 
GenericContainer<>(DockerImageName.parse("solr:6"))
-            .withExposedPorts(8983, 9983)
-            .withCommand("-DzkRun");
+    public GenericContainer<?> solr6 =
+            new 
GenericContainer<>(DockerImageName.parse("solr:6")).withExposedPorts(8983, 9983)
+                    .withCommand("-DzkRun");
 
     @Before
     public void setupTest() throws Exception {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr7Test.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr7Test.java
index c9cf566..314659d 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr7Test.java
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr7Test.java
@@ -25,9 +25,9 @@ import org.testcontainers.utility.DockerImageName;
 public class TikaPipesSolr7Test extends TikaPipesSolrTestBase {
 
     @Rule
-    public GenericContainer<?> solr7 = new 
GenericContainer<>(DockerImageName.parse("solr:7"))
-            .withExposedPorts(8983, 9983)
-            .withCommand("-DzkRun");
+    public GenericContainer<?> solr7 =
+            new 
GenericContainer<>(DockerImageName.parse("solr:7")).withExposedPorts(8983, 9983)
+                    .withCommand("-DzkRun");
 
     @Before
     public void setupTest() throws Exception {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr8Test.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr8Test.java
index d1470df..29411f4 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr8Test.java
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolr8Test.java
@@ -25,9 +25,9 @@ import org.testcontainers.utility.DockerImageName;
 public class TikaPipesSolr8Test extends TikaPipesSolrTestBase {
 
     @Rule
-    public GenericContainer<?> solr8 = new 
GenericContainer<>(DockerImageName.parse("solr:8"))
-            .withExposedPorts(8983, 9983)
-            .withCommand("-DzkRun");
+    public GenericContainer<?> solr8 =
+            new 
GenericContainer<>(DockerImageName.parse("solr:8")).withExposedPorts(8983, 9983)
+                    .withCommand("-DzkRun");
 
     @Before
     public void setupTest() throws Exception {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolrTestBase.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolrTestBase.java
index 12f3f77..8d80c32 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolrTestBase.java
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/solrtest/TikaPipesSolrTestBase.java
@@ -25,22 +25,21 @@ import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.impl.LBHttpSolrClient;
 import org.apache.solr.common.SolrInputDocument;
-import org.apache.tika.cli.TikaCLI;
-import org.apache.tika.pipes.PipeIntegrationTests;
-import org.apache.tika.pipes.emitter.solr.SolrEmitter;
 import org.jetbrains.annotations.NotNull;
 import org.junit.Assert;
 import org.testcontainers.containers.GenericContainer;
 import org.testcontainers.shaded.org.apache.commons.io.FileUtils;
 
+import org.apache.tika.cli.TikaCLI;
+import org.apache.tika.pipes.PipeIntegrationTests;
+import org.apache.tika.pipes.emitter.solr.SolrEmitter;
+
 public abstract class TikaPipesSolrTestBase {
 
     private final String collection = "testcol";
     private final int numDocs = 42;
-
-    protected GenericContainer<?> solr;
-
     private final File testFileFolder = new File("target", "test-files");
+    protected GenericContainer<?> solr;
     private String solrHost;
     private int solrPort;
     private int zkPort;
@@ -49,7 +48,8 @@ public abstract class TikaPipesSolrTestBase {
     private void createTestHtmlFiles(String bodyContent) throws Exception {
         testFileFolder.mkdirs();
         for (int i = 0; i < numDocs; ++i) {
-            FileUtils.writeStringToFile(new File(testFileFolder, "test-" + i + 
".html"), "<html><body>" + bodyContent + "</body></html>", 
StandardCharsets.UTF_8);
+            FileUtils.writeStringToFile(new File(testFileFolder, "test-" + i + 
".html"),
+                    "<html><body>" + bodyContent + "</body></html>", 
StandardCharsets.UTF_8);
         }
     }
 
@@ -63,8 +63,8 @@ public abstract class TikaPipesSolrTestBase {
 
         solr.execInContainer("/opt/solr/bin/solr", "create_collection", "-c", 
collection);
 
-        try (SolrClient solrClient = new LBHttpSolrClient.Builder()
-                .withBaseSolrUrls(solrEndpoint).build()) {
+        try (SolrClient solrClient = new 
LBHttpSolrClient.Builder().withBaseSolrUrls(solrEndpoint)
+                .build()) {
 
             for (int i = 0; i < numDocs; ++i) {
                 SolrInputDocument solrDoc = new SolrInputDocument();
@@ -79,76 +79,84 @@ public abstract class TikaPipesSolrTestBase {
 
     /**
      * Runs a test using Solr Pipe Iterator, File Fetcher and Solr Emitter.
+     *
      * @param useZk If true, use zookeeper to connect to solr. Otherwise use 
direct solr URLs.
      */
-    protected void runTikaAsyncSolrPipeIteratorFileFetcherSolrEmitter(boolean 
useZk) throws Exception {
+    protected void runTikaAsyncSolrPipeIteratorFileFetcherSolrEmitter(boolean 
useZk)
+            throws Exception {
         File tikaConfigFile = new File("target", "ta.xml");
-        File log4jPropFile = new File("target", "tmp-log4j.properties");
-        try (InputStream is = 
PipeIntegrationTests.class.getResourceAsStream("/tika-async-log4j.properties")) 
{
+        File log4jPropFile = new File("target", "tmp-log4j2.properties");
+        try (InputStream is = PipeIntegrationTests.class
+                .getResourceAsStream("/tika-async-log4j2.properties")) {
             FileUtils.copyInputStreamToFile(is, log4jPropFile);
         }
         String tikaConfigTemplateXml;
-        try (InputStream is = 
PipeIntegrationTests.class.getResourceAsStream("/tika-config-solr-urls.xml")) {
+        try (InputStream is = PipeIntegrationTests.class
+                .getResourceAsStream("/tika-config-solr-urls.xml")) {
             tikaConfigTemplateXml = IOUtils.toString(is, 
StandardCharsets.UTF_8);
         }
 
-        String tikaConfigXml = createTikaConfigXml(useZk,
-                tikaConfigFile,
-                log4jPropFile,
-                tikaConfigTemplateXml,
-                SolrEmitter.UpdateStrategy.ADD,
-                SolrEmitter.AttachmentStrategy.CONCATENATE_CONTENT);
+        String tikaConfigXml =
+                createTikaConfigXml(useZk, tikaConfigFile, log4jPropFile, 
tikaConfigTemplateXml,
+                        SolrEmitter.UpdateStrategy.ADD,
+                        SolrEmitter.AttachmentStrategy.CONCATENATE_CONTENT);
         FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, 
StandardCharsets.UTF_8);
 
         TikaCLI.main(new String[]{"-a", "--config=" + 
tikaConfigFile.getAbsolutePath()});
 
-        try (SolrClient solrClient = new LBHttpSolrClient.Builder()
-                .withBaseSolrUrls(solrEndpoint).build()) {
+        try (SolrClient solrClient = new 
LBHttpSolrClient.Builder().withBaseSolrUrls(solrEndpoint)
+                .build()) {
             solrClient.commit(collection);
-            Assert.assertEquals(numDocs, solrClient.query(collection, new 
SolrQuery("mime_s:\"text/html; 
charset=ISO-8859-1\"")).getResults().getNumFound());
-            Assert.assertEquals(numDocs, solrClient.query(collection, new 
SolrQuery("content_s:*initial*")).getResults().getNumFound());
+            Assert.assertEquals(numDocs, solrClient
+                    .query(collection, new SolrQuery("mime_s:\"text/html; 
charset=ISO-8859-1\""))
+                    .getResults().getNumFound());
+            Assert.assertEquals(numDocs,
+                    solrClient.query(collection, new 
SolrQuery("content_s:*initial*")).getResults()
+                            .getNumFound());
         }
 
-        // update the documents with "update must exist" and run tika async 
again with "UPDATE_MUST_EXIST". It should not fail, and docs should be updated.
+        // update the documents with "update must exist" and run tika async 
again with "UPDATE_MUST_EXIST".
+        // It should not fail, and docs should be updated.
         createTestHtmlFiles("updated");
-        tikaConfigXml = createTikaConfigXml(useZk,
-                tikaConfigFile,
-                log4jPropFile,
-                tikaConfigTemplateXml,
-                SolrEmitter.UpdateStrategy.UPDATE_MUST_EXIST,
-                SolrEmitter.AttachmentStrategy.CONCATENATE_CONTENT);
+        tikaConfigXml =
+                createTikaConfigXml(useZk, tikaConfigFile, log4jPropFile, 
tikaConfigTemplateXml,
+                        SolrEmitter.UpdateStrategy.UPDATE_MUST_EXIST,
+                        SolrEmitter.AttachmentStrategy.CONCATENATE_CONTENT);
         FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, 
StandardCharsets.UTF_8);
 
         TikaCLI.main(new String[]{"-a", "--config=" + 
tikaConfigFile.getAbsolutePath()});
 
-        try (SolrClient solrClient = new LBHttpSolrClient.Builder()
-                .withBaseSolrUrls(solrEndpoint).build()) {
+        try (SolrClient solrClient = new 
LBHttpSolrClient.Builder().withBaseSolrUrls(solrEndpoint)
+                .build()) {
             solrClient.commit(collection);
-            Assert.assertEquals(numDocs, solrClient.query(collection, new 
SolrQuery("mime_s:\"text/html; 
charset=ISO-8859-1\"")).getResults().getNumFound());
-            Assert.assertEquals(numDocs, solrClient.query(collection, new 
SolrQuery("content_s:*updated*")).getResults().getNumFound());
+            Assert.assertEquals(numDocs, solrClient
+                    .query(collection, new SolrQuery("mime_s:\"text/html; 
charset=ISO-8859-1\""))
+                    .getResults().getNumFound());
+            Assert.assertEquals(numDocs,
+                    solrClient.query(collection, new 
SolrQuery("content_s:*updated*")).getResults()
+                            .getNumFound());
         }
     }
 
     @NotNull
-    private String createTikaConfigXml(boolean useZk,
-                                       File tikaConfigFile,
-                                       File log4jPropFile,
+    private String createTikaConfigXml(boolean useZk, File tikaConfigFile, 
File log4jPropFile,
                                        String tikaConfigTemplateXml,
                                        SolrEmitter.UpdateStrategy 
updateStrategy,
                                        SolrEmitter.AttachmentStrategy 
attachmentStrategy) {
-        String res = tikaConfigTemplateXml.replace("{TIKA_CONFIG}", 
tikaConfigFile.getAbsolutePath())
-                .replace("{UPDATE_STRATEGY}", updateStrategy.toString())
-                .replace("{ATTACHMENT_STRATEGY}", 
attachmentStrategy.toString())
-                .replace("{LOG4J_PROPERTIES_FILE}", 
log4jPropFile.getAbsolutePath())
-                .replace("{PATH_TO_DOCS}", testFileFolder.getAbsolutePath());
+        String res =
+                tikaConfigTemplateXml.replace("{TIKA_CONFIG}", 
tikaConfigFile.getAbsolutePath())
+                        .replace("{UPDATE_STRATEGY}", 
updateStrategy.toString())
+                        .replace("{ATTACHMENT_STRATEGY}", 
attachmentStrategy.toString())
+                        .replace("{LOG4J_PROPERTIES_FILE}", 
log4jPropFile.getAbsolutePath())
+                        .replace("{PATH_TO_DOCS}", 
testFileFolder.getAbsolutePath());
         if (useZk) {
-            res = res.replace("{SOLR_CONNECTION}", "<solrZkHosts>\n" +
-                    "        <solrZkHost>" + solrHost + ":" + zkPort + 
"</solrZkHost>\n" +
-                    "      </solrZkHosts>\n");
+            res = res.replace("{SOLR_CONNECTION}",
+                    "<solrZkHosts>\n" + "        <solrZkHost>" + solrHost + 
":" + zkPort +
+                            "</solrZkHost>\n" + "      </solrZkHosts>\n");
         } else {
-            res = res.replace("{SOLR_CONNECTION}", "<solrUrls>\n" +
-                    "        <solrUrl>http://"; + solrHost + ":" + solrPort + 
"/solr</solrUrl>\n" +
-                    "      </solrUrls>\n");
+            res = res.replace("{SOLR_CONNECTION}",
+                    "<solrUrls>\n" + "        <solrUrl>http://"; + solrHost + 
":" + solrPort +
+                            "/solr</solrUrl>\n" + "      </solrUrls>\n");
         }
         return res;
     }
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/log4j.properties 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/log4j2.properties
similarity index 66%
rename from 
tika-pipes/tika-pipes-integration-tests/src/test/resources/log4j.properties
rename to 
tika-pipes/tika-pipes-integration-tests/src/test/resources/log4j2.properties
index 2b2da1a..d17a4a1 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/log4j.properties
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/log4j2.properties
@@ -13,10 +13,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#info,debug, error,fatal ...
-log4j.rootLogger=info,stderr
-#console
-log4j.appender.stderr=org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
-log4j.appender.stderr.Target=System.err
-log4j.appender.stderr.layout.ConversionPattern=%-5p [%t]: %m%n
+status=info
+name=PropertiesConfig
+filters=threshold
+filter.threshold.type=ThresholdFilter
+filter.threshold.level=info
+appenders=console
+appender.console.type=Console
+appender.console.name=STDERR
+appender.console.layout.type=PatternLayout
+appender.console.layout.pattern=%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n
+rootLogger.level=info
+rootLogger.appenderRefs=stderr
+rootLogger.appenderRef.stderr.ref=STDERR
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/tika-async-log4j.properties
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/tika-async-log4j.properties
deleted file mode 100644
index c7c6821..0000000
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/tika-async-log4j.properties
+++ /dev/null
@@ -1,13 +0,0 @@
-status=debug
-name=PropertiesConfig
-filters=threshold
-filter.threshold.type=ThresholdFilter
-filter.threshold.level=debug
-appenders=console
-appender.console.type=Console
-appender.console.name=STDERR
-appender.console.layout.type=PatternLayout
-appender.console.layout.pattern=%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n
-rootLogger.level=debug
-rootLogger.appenderRefs=stderr
-rootLogger.appenderRef.stderr.ref=STDERR
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/tika-async-log4j2.properties
similarity index 66%
copy from 
tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
copy to 
tika-pipes/tika-pipes-integration-tests/src/test/resources/tika-async-log4j2.properties
index 11e5887..bc6f2fd 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/tika-async-log4j2.properties
@@ -13,10 +13,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#info,debug, error,fatal ...
-log4j.rootLogger=debug,stderr
-#console
-log4j.appender.stderr=org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
-log4j.appender.stderr.Target=System.err
-log4j.appender.stderr.layout.ConversionPattern=%-5p %m%n
+
+status=debug
+name=PropertiesConfig
+filters=threshold
+filter.threshold.type=ThresholdFilter
+filter.threshold.level=debug
+appenders=console
+appender.console.type=Console
+appender.console.name=STDERR
+appender.console.layout.type=PatternLayout
+appender.console.layout.pattern=%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n
+rootLogger.level=debug
+rootLogger.appenderRefs=stderr
+rootLogger.appenderRef.stderr.ref=STDERR
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/pom.xml 
b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/pom.xml
index cb80a34..347aa26 100644
--- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/pom.xml
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/pom.xml
@@ -41,29 +41,6 @@
       <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>com.amazonaws</groupId>
-      <artifactId>aws-java-sdk-s3</artifactId>
-      <version>${aws.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>commons-logging</groupId>
-          <artifactId>commons-logging</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-core</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-databind</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>commons-codec</groupId>
-          <artifactId>commons-codec</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
       <version>${commons.codec.version}</version>
@@ -79,14 +56,31 @@
       <version>${commons.logging.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j-impl</artifactId>
-      <version>${log4j2.version}</version>
-    </dependency>
-    <dependency>
       <groupId>org.apache.solr</groupId>
       <artifactId>solr-solrj</artifactId>
       <version>${solrj.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>jetty-io</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>jetty-http</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.httpcomponents</groupId>
+          <artifactId>httpclient</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
diff --git 
a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/solrtest/SolrPipesIterator.java
 
b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/solrtest/SolrPipesIterator.java
index 78d6a49..85b070c 100644
--- 
a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/solrtest/SolrPipesIterator.java
+++ 
b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-solr/src/main/java/org/apache/tika/pipes/solrtest/SolrPipesIterator.java
@@ -34,6 +34,9 @@ import org.apache.solr.client.solrj.impl.LBHttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.params.CursorMarkParams;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.tika.client.HttpClientFactory;
 import org.apache.tika.config.Field;
 import org.apache.tika.config.Initializable;
@@ -45,8 +48,6 @@ import org.apache.tika.pipes.HandlerConfig;
 import org.apache.tika.pipes.emitter.EmitKey;
 import org.apache.tika.pipes.fetcher.FetchKey;
 import org.apache.tika.pipes.pipesiterator.PipesIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Iterates through results from a Solr query.
@@ -54,7 +55,7 @@ import org.slf4j.LoggerFactory;
 public class SolrPipesIterator extends PipesIterator implements Initializable {
 
     private static final Logger LOGGER = 
LoggerFactory.getLogger(SolrPipesIterator.class);
-
+    private final HttpClientFactory httpClientFactory;
     private String solrCollection;
     /**
      * You can specify solrUrls, or you can specify solrZkHosts and use use 
zookeeper to determine the solr server urls.
@@ -72,8 +73,6 @@ public class SolrPipesIterator extends PipesIterator 
implements Initializable {
     private int connectionTimeout = 10000;
     private int socketTimeout = 60000;
 
-    private final HttpClientFactory httpClientFactory;
-
     public SolrPipesIterator() throws TikaConfigException {
         httpClientFactory = new HttpClientFactory();
     }
@@ -203,7 +202,9 @@ public class SolrPipesIterator extends PipesIterator 
implements Initializable {
                 QueryResponse qr = solrClient.query(solrCollection, query);
                 long totalToFetch = qr.getResults().getNumFound();
                 String nextCursorMark = qr.getNextCursorMark();
-                LOGGER.info("Query to fetch files to parse collection={}, 
q={}, onCount={}, totalCount={}", solrCollection, query, fileCount, 
totalToFetch);
+                LOGGER.info(
+                        "Query to fetch files to parse collection={}, q={}, 
onCount={}, totalCount={}",
+                        solrCollection, query, fileCount, totalToFetch);
                 for (SolrDocument sd : qr.getResults()) {
                     ++fileCount;
                     String fetchKey = (String) sd.getFieldValue(idField);
@@ -213,11 +214,8 @@ public class SolrPipesIterator extends PipesIterator 
implements Initializable {
                         metadata.add(nextField, (String) 
sd.getFieldValue(nextField));
                     }
                     LOGGER.info("iterator doc: {}, idField={}, fetchKey={}", 
sd, idField, fetchKey);
-                    tryToAdd(new FetchEmitTuple(fetchKey,
-                            new FetchKey(fetcherName, fetchKey),
-                            new EmitKey(emitterName, emitKey),
-                            new Metadata(),
-                            handlerConfig,
+                    tryToAdd(new FetchEmitTuple(fetchKey, new 
FetchKey(fetcherName, fetchKey),
+                            new EmitKey(emitterName, emitKey), new Metadata(), 
handlerConfig,
                             getOnParseException()));
                 }
                 if (cursorMark.equals(nextCursorMark)) {
@@ -234,14 +232,11 @@ public class SolrPipesIterator extends PipesIterator 
implements Initializable {
         if (solrUrls == null || solrUrls.isEmpty()) {
             return new CloudSolrClient.Builder(solrZkHosts, 
Optional.ofNullable(solrZkChroot))
                     .withHttpClient(httpClientFactory.build())
-                    .withConnectionTimeout(connectionTimeout)
-                    .withSocketTimeout(socketTimeout)
+                    
.withConnectionTimeout(connectionTimeout).withSocketTimeout(socketTimeout)
                     .build();
         }
-        return new LBHttpSolrClient.Builder()
-                .withConnectionTimeout(connectionTimeout)
-                .withSocketTimeout(socketTimeout)
-                .withHttpClient(httpClientFactory.build())
+        return new 
LBHttpSolrClient.Builder().withConnectionTimeout(connectionTimeout)
+                
.withSocketTimeout(socketTimeout).withHttpClient(httpClientFactory.build())
                 .withBaseSolrUrls(solrUrls.toArray(new String[]{})).build();
     }
 
@@ -254,11 +249,15 @@ public class SolrPipesIterator extends PipesIterator 
implements Initializable {
         mustNotBeEmpty("parsingIdField", this.parsingIdField);
         mustNotBeEmpty("failCountField", this.failCountField);
         mustNotBeEmpty("sizeFieldName", this.sizeFieldName);
-        if ((this.solrUrls == null || this.solrUrls.isEmpty()) && 
(this.solrZkHosts == null || this.solrZkHosts.isEmpty())) {
-            throw new IllegalArgumentException("expected either param solrUrls 
or param solrZkHosts, but neither was specified");
+        if ((this.solrUrls == null || this.solrUrls.isEmpty()) &&
+                (this.solrZkHosts == null || this.solrZkHosts.isEmpty())) {
+            throw new IllegalArgumentException(
+                    "expected either param solrUrls or param solrZkHosts, but 
neither was specified");
         }
-        if (this.solrUrls != null && !this.solrUrls.isEmpty() && 
this.solrZkHosts != null && !this.solrZkHosts.isEmpty()) {
-            throw new IllegalArgumentException("expected either param solrUrls 
or param solrZkHosts, but both were specified");
+        if (this.solrUrls != null && !this.solrUrls.isEmpty() && 
this.solrZkHosts != null &&
+                !this.solrZkHosts.isEmpty()) {
+            throw new IllegalArgumentException(
+                    "expected either param solrUrls or param solrZkHosts, but 
both were specified");
         }
     }
 }

Reply via email to