This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new a40c9d93f TIKA-4451 -- remove XML logger updater (#2275)
a40c9d93f is described below

commit a40c9d93fbd76344143841609aa853abea9759d8
Author: Tim Allison <[email protected]>
AuthorDate: Tue Jul 8 17:57:08 2025 -0400

    TIKA-4451 -- remove XML logger updater (#2275)
---
 .../apache/tika/eval/app/XMLErrorLogUpdater.java   | 211 ---------------------
 .../tika/eval/app/batch/DBConsumersManager.java    |  12 --
 .../apache/tika/eval/app/io/XMLLogMsgHandler.java  |  27 ---
 .../org/apache/tika/eval/app/io/XMLLogReader.java  | 116 -----------
 .../tika/eval/app/io/FatalExceptionReaderTest.java |  35 ----
 5 files changed, 401 deletions(-)

diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
deleted file mode 100644
index 81ae5f2be..000000000
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.eval.app;
-
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.Connection;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.event.Level;
-
-import org.apache.tika.eval.app.db.Cols;
-import org.apache.tika.eval.app.db.H2Util;
-import org.apache.tika.eval.app.db.JDBCUtil;
-import org.apache.tika.eval.app.db.TableInfo;
-import org.apache.tika.eval.app.io.XMLLogMsgHandler;
-import org.apache.tika.eval.app.io.XMLLogReader;
-import org.apache.tika.eval.app.reports.ResultsReporter;
-
-/**
- * This is a very task specific class that reads a log file and updates
- * the "comparisons" table.  It should not be run in a multithreaded 
environment.
- */
-public class XMLErrorLogUpdater {
-    private static final Logger LOG = 
LoggerFactory.getLogger(ResultsReporter.class);
-
-    private Statement statement;
-
-    public static void main(String[] args) throws Exception {
-        XMLErrorLogUpdater writer = new XMLErrorLogUpdater();
-        Path xmlLogFileA = Paths.get(args[0]);
-        Path xmlLogFileB = Paths.get(args[1]);
-        Path db = Paths.get(args[2]);
-        JDBCUtil dbUtil = new H2Util(db);
-        Connection connection = dbUtil.getConnection();
-        writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A, 
xmlLogFileA);
-        writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B, 
xmlLogFileB);
-        connection.commit();
-    }
-
-    public void update(Connection connection, TableInfo tableInfo, Path 
xmlLogFile) throws Exception {
-        statement = connection.createStatement();
-        XMLLogReader reader = new XMLLogReader();
-        try (InputStream is = Files.newInputStream(xmlLogFile)) {
-            reader.read(is, new ErrorMsgUpdater(tableInfo.getName()));
-        } catch (IOException e) {
-            throw new RuntimeException("Problem reading: " + xmlLogFile
-                    .toAbsolutePath()
-                    .toString());
-        } finally {
-            try {
-                connection.commit();
-                statement.close();
-            } catch (SQLException e) {
-                throw new RuntimeException("Failed to close db connection!", 
e);
-            }
-        }
-    }
-
-    private class ErrorMsgUpdater implements XMLLogMsgHandler {
-        private final String errorTablename;
-
-        private ErrorMsgUpdater(String errorTablename) {
-            this.errorTablename = errorTablename;
-        }
-
-        @Override
-        public void handleMsg(Level level, String xml) throws SQLException, 
IOException {
-            if (!level.equals(Level.ERROR)) {
-                return;
-            }
-            XMLStreamReader reader = null;
-            try {
-                reader = XMLInputFactory
-                        .newInstance()
-                        .createXMLStreamReader(new StringReader(xml));
-            } catch (XMLStreamException e) {
-                throw new IOException(e);
-            }
-            String type = null;
-            String resourceId = null;
-            try {
-                while (reader.hasNext() && type == null && resourceId == null) 
{
-                    reader.next();
-                    switch (reader.getEventType()) {
-                        case XMLStreamConstants.START_ELEMENT:
-                            if ("timed_out".equals(reader.getLocalName())) {
-                                resourceId = reader.getAttributeValue("", 
"resourceId");
-                                update(errorTablename, resourceId, 
AbstractProfiler.PARSE_ERROR_TYPE.TIMEOUT);
-
-                            } else if ("oom".equals(reader.getLocalName())) {
-                                resourceId = reader.getAttributeValue("", 
"resourceId");
-                                update(errorTablename, resourceId, 
AbstractProfiler.PARSE_ERROR_TYPE.OOM);
-                            }
-                            break;
-                    }
-                }
-                reader.close();
-            } catch (XMLStreamException e) {
-                throw new IOException(e);
-            }
-        }
-
-        private void update(String errorTableName, String filePath, 
AbstractProfiler.PARSE_ERROR_TYPE type) throws SQLException {
-            int containerId = getContainerId(filePath);
-            String sql = "SELECT count(1) from " + errorTableName + " where " 
+ Cols.CONTAINER_ID + " = " + containerId + " or " + Cols.FILE_PATH + "='" + 
filePath + "'";
-            int hitCount;
-            try (ResultSet rs = statement.executeQuery(sql)) {
-                //now try to figure out if that file already exists
-                //in parse errors
-                hitCount = 0;
-                while (rs.next()) {
-                    hitCount = rs.getInt(1);
-                }
-            }
-
-            //if it does, update all records matching that path or container id
-            if (hitCount > 0) {
-                sql = "UPDATE " + errorTableName + " SET " + 
Cols.PARSE_ERROR_ID + " = " + type.ordinal() + "," + Cols.FILE_PATH + "='" + 
filePath + "'" + " where " +
-                        Cols.CONTAINER_ID + "=" + containerId + " or " + 
Cols.FILE_PATH + "='" + filePath + "'";
-
-            } else {
-                //if not and container id > -1
-                //insert full record
-                if (containerId > -1) {
-                    sql = "INSERT INTO " + errorTableName + " (" + 
Cols.CONTAINER_ID + "," + Cols.FILE_PATH + "," + Cols.PARSE_ERROR_ID + ")" + " 
values (" + containerId + ", '" +
-                            filePath + "'," + type.ordinal() + ");";
-                } else {
-                    //if container id == -1, insert only file path and parse 
error type id
-                    sql = "INSERT INTO " + errorTableName + " (" + 
Cols.FILE_PATH.name() + "," + Cols.PARSE_ERROR_ID + ")" + "values ('" + 
filePath + "'," + type.ordinal() + ");";
-                }
-
-            }
-            int updated = statement.executeUpdate(sql);
-            if (updated == 0) {
-                //TODO: log
-                LOG.warn("made no updates in xmlerrorlogupdater!");
-            } else if (updated > 1) {
-                LOG.warn("made too many updates");
-            }
-        }
-
-        private int getContainerId(String resourceId) throws SQLException {
-            int containerId = -1;
-            String sql = "SELECT " + Cols.CONTAINER_ID.name() + " from " + 
ExtractProfiler.CONTAINER_TABLE.getName() + " where " + Cols.FILE_PATH + " ='" 
+ resourceId + "'";
-            int resultCount;
-            try (ResultSet rs = statement.executeQuery(sql)) {
-                resultCount = 0;
-                while (rs.next()) {
-                    containerId = rs.getInt(1);
-                    resultCount++;
-                }
-            }
-
-            if (resultCount == 0) {
-                LOG.warn("Should have found a container for: {}", resourceId);
-            } else if (resultCount > 1) {
-                LOG.error("Records ids should be unique: {}", resourceId);
-            }
-/*
-            if (containerId < 0) {
-                System.err.println("CONTAINER ID < 0!!!");
-                sql = "SELECT MAX("+ Cols.CONTAINER_ID.name() +
-                        ") from "+ExtractProfiler.CONTAINER_TABLE.getName();
-                rs = statement.executeQuery(sql);
-                while (rs.next()) {
-                    containerId = rs.getInt(1);
-                }
-                rs.close();
-                if (containerId < 0) {
-                    //log and abort
-                    //return -1?
-                } else {
-                    containerId++;
-                }
-
-            }*/
-            return containerId;
-        }
-
-
-    }
-
-}
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
index aba35416f..41db03520 100644
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
+++ 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
@@ -26,7 +26,6 @@ import java.util.List;
 import org.apache.tika.batch.ConsumersManager;
 import org.apache.tika.batch.FileResourceConsumer;
 import org.apache.tika.eval.app.AbstractProfiler;
-import org.apache.tika.eval.app.XMLErrorLogUpdater;
 import org.apache.tika.eval.app.db.JDBCUtil;
 import org.apache.tika.eval.app.db.MimeBuffer;
 import org.apache.tika.eval.app.db.TableInfo;
@@ -64,17 +63,6 @@ public class DBConsumersManager extends ConsumersManager {
             throw new RuntimeException(e);
         }
 
-        //MUST HAPPEN AFTER consumers have closed and
-        //committed container information!!!
-        XMLErrorLogUpdater up = new XMLErrorLogUpdater();
-        for (LogTablePair p : errorLogs) {
-            try {
-                up.update(conn, p.tableInfo, p.log);
-            } catch (Exception e) {
-                throw new RuntimeException(e);
-            }
-        }
-
 
         try {
             conn.commit();
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogMsgHandler.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogMsgHandler.java
deleted file mode 100644
index 0221849ba..000000000
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogMsgHandler.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.eval.app.io;
-
-import java.io.IOException;
-import java.sql.SQLException;
-
-import org.slf4j.event.Level;
-
-
-public interface XMLLogMsgHandler {
-    public void handleMsg(Level level, String xml) throws IOException, 
SQLException;
-}
diff --git 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogReader.java
 
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogReader.java
deleted file mode 100644
index 2b8350894..000000000
--- 
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogReader.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.eval.app.io;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.sql.SQLException;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-import org.apache.commons.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.event.Level;
-
-import org.apache.tika.utils.XMLReaderUtils;
-
-
-public class XMLLogReader {
-    private static final Logger LOG = 
LoggerFactory.getLogger(XMLLogReader.class);
-    //class that wraps a logger's xml output
-    //into a single xml parseable input stream.
-
-    public void read(InputStream xmlLogFileIs, XMLLogMsgHandler handler) 
throws XMLStreamException {
-        InputStream is = new LogXMLWrappingInputStream(xmlLogFileIs);
-        XMLInputFactory factory = XMLReaderUtils.getXMLInputFactory();
-        XMLStreamReader reader = factory.createXMLStreamReader(is);
-
-        Level level = null;
-        while (reader.hasNext()) {
-            reader.next();
-            switch (reader.getEventType()) {
-                case XMLStreamConstants.START_ELEMENT:
-                    if ("event".equals(reader.getLocalName())) {
-                        String levelString = reader.getAttributeValue("", 
"level");
-                        if (levelString != null) {
-                            level = Level.valueOf(levelString);
-                        } else {
-                            level = Level.DEBUG;
-                        }
-                    } else if ("message".equals(reader.getLocalName())) {
-                        try {
-                            handler.handleMsg(level, reader.getElementText());
-                        } catch (IOException e) {
-                            LOG.warn("Error parsing: {}", 
reader.getElementText());
-                        } catch (SQLException e) {
-                            LOG.warn("SQLException: {}", e.getMessage());
-                        }
-                    }
-                    break;
-                case XMLStreamConstants.END_ELEMENT:
-                    if ("event".equals(reader.getLocalName())) {
-                        level = null;
-                    } else if ("message".equals(reader.getLocalName())) {
-                        //do we care any more?
-                    }
-                    break;
-            }
-        }
-    }
-
-
-    static class LogXMLWrappingInputStream extends InputStream {
-        //plagiarized from log4j's chainsaw
-        private final static String HEADER =
-                "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + 
"<log4j:eventSet version=\"1.2\" " + 
"xmlns:log4j=\"http://jakarta.apache.org/log4j/\";>";
-        private static final String FOOTER = "</log4j:eventSet>";
-        int currentStreamIndex = 0;
-        private InputStream[] streams;
-
-        private LogXMLWrappingInputStream(InputStream xmlLogFileIs) {
-            streams = new InputStream[3];
-            streams[0] = new 
ByteArrayInputStream(HEADER.getBytes(StandardCharsets.UTF_8));
-            streams[1] = xmlLogFileIs;
-            streams[2] = new 
ByteArrayInputStream(FOOTER.getBytes(StandardCharsets.UTF_8));
-
-        }
-
-        @Override
-        public int read() throws IOException {
-            int c = streams[currentStreamIndex].read();
-            if (c < 0) {
-                IOUtils.closeQuietly(streams[currentStreamIndex]);
-                while (currentStreamIndex < streams.length - 1) {
-                    currentStreamIndex++;
-                    int tmpC = streams[currentStreamIndex].read();
-                    if (tmpC < 0) {
-                        IOUtils.closeQuietly(streams[currentStreamIndex]);
-                    } else {
-                        return tmpC;
-                    }
-                }
-                return -1;
-            }
-            return c;
-        }
-    }
-}
diff --git 
a/tika-eval/tika-eval-app/src/test/java/org/apache/tika/eval/app/io/FatalExceptionReaderTest.java
 
b/tika-eval/tika-eval-app/src/test/java/org/apache/tika/eval/app/io/FatalExceptionReaderTest.java
deleted file mode 100644
index 2cd0d1363..000000000
--- 
a/tika-eval/tika-eval-app/src/test/java/org/apache/tika/eval/app/io/FatalExceptionReaderTest.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.eval.app.io;
-
-
-import java.io.InputStream;
-
-import org.junit.jupiter.api.Test;
-
-public class FatalExceptionReaderTest {
-    @Test
-    public void testSimpleRead() throws Exception {
-        try (InputStream is = this
-                .getClass()
-                
.getResourceAsStream("/test-dirs/batch-logs/batch-process-fatal.xml")) {
-            XMLLogReader reader = new XMLLogReader();
-            //reader.read(is);
-        }
-    }
-}

Reply via email to