This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch code-cleanup-for-4x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 0b5a17ae834bffb9d6c6ca83a3ca33ca71c949a4
Author: tallison <[email protected]>
AuthorDate: Fri Feb 6 17:01:06 2026 -0500

    cleanup for 4.x
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  13 +-
 .../java/org/apache/tika/mime/MimeTypesReader.java |   9 +-
 .../java/org/apache/tika/parser/NetworkParser.java | 170 ---------------------
 3 files changed, 2 insertions(+), 190 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index dfcc299520..9c12d2d804 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -32,7 +32,6 @@ import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 import java.io.Writer;
 import java.lang.reflect.Field;
-import java.net.URI;
 import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -84,7 +83,6 @@ import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.CompositeParser;
-import org.apache.tika.parser.NetworkParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
@@ -128,7 +126,6 @@ public class TikaCLI {
     private TikaLoader tikaLoader;
     private String configFilePath;
     private boolean recursiveJSON = false;
-    private URI networkURI = null;
     /**
      * Output character encoding, or <code>null</code> for platform default
      */
@@ -511,10 +508,6 @@ public class TikaCLI {
             prettyPrint = true;
         } else if (arg.equals("-p") || arg.equals("--port") || 
arg.equals("-s") || arg.equals("--server")) {
             throw new IllegalArgumentException("As of Tika 2.0, the server 
option is no longer supported in tika-app.\n" + "See 
https://wiki.apache.org/tika/TikaJAXRS for usage.");
-        } else if (arg.startsWith("-c")) {
-            networkURI = new URI(arg.substring("-c".length()));
-        } else if (arg.startsWith("--client=")) {
-            networkURI = new URI(arg.substring("--client=".length()));
         } else {
             pipeMode = false;
             configure();
@@ -879,11 +872,7 @@ public class TikaCLI {
                 Files.deleteIfExists(tempConfig);
             }
         }
-        if (networkURI != null) {
-            parser = new NetworkParser(networkURI);
-        } else {
-            parser = tikaLoader.loadAutoDetectParser();
-        }
+        parser = tikaLoader.loadAutoDetectParser();
 
         // Load configs from tika-config.json and merge into existing context
         // (preserves EmbeddedDocumentExtractor and other items set before 
configure())
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java 
b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
index 76bc5c7525..72168feac4 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
@@ -26,7 +26,6 @@ import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
-import javax.xml.XMLConstants;
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
@@ -211,14 +210,8 @@ public class MimeTypesReader extends DefaultHandler 
implements MimeTypesReaderMe
     }
 
     private static SAXParser newSAXParser() throws TikaException {
-        SAXParserFactory factory = SAXParserFactory.newInstance();
+        SAXParserFactory factory = XMLReaderUtils.getSAXParserFactory();
         factory.setNamespaceAware(false);
-        try {
-            factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
-        } catch (ParserConfigurationException | SAXException e) {
-            LOG.warn("can't set secure processing feature on: " + 
factory.getClass() +
-                    ". User assumes responsibility for consequences.");
-        }
         try {
             return factory.newSAXParser();
         } catch (ParserConfigurationException | SAXException e) {
diff --git a/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java
deleted file mode 100644
index ff88f17c11..0000000000
--- a/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser;
-
-import java.io.FilterOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.net.Socket;
-import java.net.URI;
-import java.net.URL;
-import java.net.URLConnection;
-import java.util.Collections;
-import java.util.Set;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.input.CloseShieldInputStream;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.sax.TaggedContentHandler;
-import org.apache.tika.sax.TeeContentHandler;
-import org.apache.tika.utils.XMLReaderUtils;
-
-
-public class NetworkParser implements Parser {
-
-    private final URI uri;
-
-    private final Set<MediaType> supportedTypes;
-
-    public NetworkParser(URI uri, Set<MediaType> supportedTypes) {
-        this.uri = uri;
-        this.supportedTypes = supportedTypes;
-    }
-
-    public NetworkParser(URI uri) {
-        this(uri, Collections.singleton(MediaType.OCTET_STREAM));
-    }
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return supportedTypes;
-    }
-
-    public void parse(TikaInputStream tis, ContentHandler handler, Metadata 
metadata,
-                      ParseContext context) throws IOException, SAXException, 
TikaException {
-        if ("telnet".equals(uri.getScheme())) {
-            try (Socket socket = new Socket(uri.getHost(), uri.getPort())) {
-                new ParsingTask(tis, new 
FilterOutputStream(socket.getOutputStream()) {
-                    @Override
-                    public void close() throws IOException {
-                        socket.shutdownOutput();
-                    }
-                }).parse(socket.getInputStream(), handler, metadata, context);
-            }
-        } else {
-            URL url = uri.toURL();
-            URLConnection connection = url.openConnection();
-            connection.setDoOutput(true);
-            connection.connect();
-            try (InputStream input = connection.getInputStream()) {
-                new ParsingTask(tis, connection.getOutputStream())
-                        .parse(CloseShieldInputStream.wrap(input), handler, 
metadata, context);
-            }
-        }
-
-    }
-
-    private static class ParsingTask implements Runnable {
-
-        private final TikaInputStream input;
-
-        private final OutputStream output;
-
-        private volatile Exception exception = null;
-
-        public ParsingTask(TikaInputStream input, OutputStream output) {
-            this.input = input;
-            this.output = output;
-        }
-
-        public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata,
-                          ParseContext context) throws IOException, 
SAXException, TikaException {
-            Thread thread = new Thread(this, "Tika network parser");
-            thread.start();
-
-            TaggedContentHandler tagged =
-                    new TaggedContentHandler(handler);
-            try {
-                XMLReaderUtils
-                        .parseSAX(stream, new TeeContentHandler(tagged, new 
MetaHandler(metadata)),
-                                context);
-            } catch (SAXException e) {
-                tagged.throwIfCauseOf(e);
-                throw new TikaException("Invalid network parser output", e);
-            } catch (IOException e) {
-                throw new TikaException("Unable to read network parser 
output", e);
-            } finally {
-                try {
-                    thread.join(1000);
-                } catch (InterruptedException e) {
-                    throw new TikaException("Network parser interrupted", e);
-                }
-
-                if (exception != null) {
-                    input.throwIfCauseOf(exception);
-                    throw new TikaException("Unexpected network parser error", 
exception);
-                }
-            }
-        }
-
-        //----------------------------------------------------------<Runnable>
-
-        public void run() {
-            try {
-                try {
-                    IOUtils.copy(input, output);
-                } finally {
-                    output.close();
-                }
-            } catch (Exception e) {
-                exception = e;
-            }
-        }
-
-    }
-
-    private static class MetaHandler extends DefaultHandler {
-
-        private final Metadata metadata;
-
-        public MetaHandler(Metadata metadata) {
-            this.metadata = metadata;
-        }
-
-        @Override
-        public void startElement(String uri, String localName, String qName, 
Attributes attributes)
-                throws SAXException {
-            if ("http://www.w3.org/1999/xhtml".equals(uri) && 
"meta".equals(localName)) {
-                String name = attributes.getValue("", "name");
-                String content = attributes.getValue("", "content");
-                if (name != null && content != null) {
-                    metadata.add(name, content);
-                }
-            }
-        }
-
-    }
-
-}

Reply via email to