This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new a40c9d93f TIKA-4451 -- remove XML logger updater (#2275)
a40c9d93f is described below
commit a40c9d93fbd76344143841609aa853abea9759d8
Author: Tim Allison <[email protected]>
AuthorDate: Tue Jul 8 17:57:08 2025 -0400
TIKA-4451 -- remove XML logger updater (#2275)
---
.../apache/tika/eval/app/XMLErrorLogUpdater.java | 211 ---------------------
.../tika/eval/app/batch/DBConsumersManager.java | 12 --
.../apache/tika/eval/app/io/XMLLogMsgHandler.java | 27 ---
.../org/apache/tika/eval/app/io/XMLLogReader.java | 116 -----------
.../tika/eval/app/io/FatalExceptionReaderTest.java | 35 ----
5 files changed, 401 deletions(-)
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
deleted file mode 100644
index 81ae5f2be..000000000
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.eval.app;
-
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.Connection;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.event.Level;
-
-import org.apache.tika.eval.app.db.Cols;
-import org.apache.tika.eval.app.db.H2Util;
-import org.apache.tika.eval.app.db.JDBCUtil;
-import org.apache.tika.eval.app.db.TableInfo;
-import org.apache.tika.eval.app.io.XMLLogMsgHandler;
-import org.apache.tika.eval.app.io.XMLLogReader;
-import org.apache.tika.eval.app.reports.ResultsReporter;
-
-/**
- * This is a very task specific class that reads a log file and updates
- * the "comparisons" table. It should not be run in a multithreaded
environment.
- */
-public class XMLErrorLogUpdater {
- private static final Logger LOG =
LoggerFactory.getLogger(ResultsReporter.class);
-
- private Statement statement;
-
- public static void main(String[] args) throws Exception {
- XMLErrorLogUpdater writer = new XMLErrorLogUpdater();
- Path xmlLogFileA = Paths.get(args[0]);
- Path xmlLogFileB = Paths.get(args[1]);
- Path db = Paths.get(args[2]);
- JDBCUtil dbUtil = new H2Util(db);
- Connection connection = dbUtil.getConnection();
- writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A,
xmlLogFileA);
- writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B,
xmlLogFileB);
- connection.commit();
- }
-
- public void update(Connection connection, TableInfo tableInfo, Path
xmlLogFile) throws Exception {
- statement = connection.createStatement();
- XMLLogReader reader = new XMLLogReader();
- try (InputStream is = Files.newInputStream(xmlLogFile)) {
- reader.read(is, new ErrorMsgUpdater(tableInfo.getName()));
- } catch (IOException e) {
- throw new RuntimeException("Problem reading: " + xmlLogFile
- .toAbsolutePath()
- .toString());
- } finally {
- try {
- connection.commit();
- statement.close();
- } catch (SQLException e) {
- throw new RuntimeException("Failed to close db connection!",
e);
- }
- }
- }
-
- private class ErrorMsgUpdater implements XMLLogMsgHandler {
- private final String errorTablename;
-
- private ErrorMsgUpdater(String errorTablename) {
- this.errorTablename = errorTablename;
- }
-
- @Override
- public void handleMsg(Level level, String xml) throws SQLException,
IOException {
- if (!level.equals(Level.ERROR)) {
- return;
- }
- XMLStreamReader reader = null;
- try {
- reader = XMLInputFactory
- .newInstance()
- .createXMLStreamReader(new StringReader(xml));
- } catch (XMLStreamException e) {
- throw new IOException(e);
- }
- String type = null;
- String resourceId = null;
- try {
- while (reader.hasNext() && type == null && resourceId == null)
{
- reader.next();
- switch (reader.getEventType()) {
- case XMLStreamConstants.START_ELEMENT:
- if ("timed_out".equals(reader.getLocalName())) {
- resourceId = reader.getAttributeValue("",
"resourceId");
- update(errorTablename, resourceId,
AbstractProfiler.PARSE_ERROR_TYPE.TIMEOUT);
-
- } else if ("oom".equals(reader.getLocalName())) {
- resourceId = reader.getAttributeValue("",
"resourceId");
- update(errorTablename, resourceId,
AbstractProfiler.PARSE_ERROR_TYPE.OOM);
- }
- break;
- }
- }
- reader.close();
- } catch (XMLStreamException e) {
- throw new IOException(e);
- }
- }
-
- private void update(String errorTableName, String filePath,
AbstractProfiler.PARSE_ERROR_TYPE type) throws SQLException {
- int containerId = getContainerId(filePath);
- String sql = "SELECT count(1) from " + errorTableName + " where "
+ Cols.CONTAINER_ID + " = " + containerId + " or " + Cols.FILE_PATH + "='" +
filePath + "'";
- int hitCount;
- try (ResultSet rs = statement.executeQuery(sql)) {
- //now try to figure out if that file already exists
- //in parse errors
- hitCount = 0;
- while (rs.next()) {
- hitCount = rs.getInt(1);
- }
- }
-
- //if it does, update all records matching that path or container id
- if (hitCount > 0) {
- sql = "UPDATE " + errorTableName + " SET " +
Cols.PARSE_ERROR_ID + " = " + type.ordinal() + "," + Cols.FILE_PATH + "='" +
filePath + "'" + " where " +
- Cols.CONTAINER_ID + "=" + containerId + " or " +
Cols.FILE_PATH + "='" + filePath + "'";
-
- } else {
- //if not and container id > -1
- //insert full record
- if (containerId > -1) {
- sql = "INSERT INTO " + errorTableName + " (" +
Cols.CONTAINER_ID + "," + Cols.FILE_PATH + "," + Cols.PARSE_ERROR_ID + ")" + "
values (" + containerId + ", '" +
- filePath + "'," + type.ordinal() + ");";
- } else {
- //if container id == -1, insert only file path and parse
error type id
- sql = "INSERT INTO " + errorTableName + " (" +
Cols.FILE_PATH.name() + "," + Cols.PARSE_ERROR_ID + ")" + "values ('" +
filePath + "'," + type.ordinal() + ");";
- }
-
- }
- int updated = statement.executeUpdate(sql);
- if (updated == 0) {
- //TODO: log
- LOG.warn("made no updates in xmlerrorlogupdater!");
- } else if (updated > 1) {
- LOG.warn("made too many updates");
- }
- }
-
- private int getContainerId(String resourceId) throws SQLException {
- int containerId = -1;
- String sql = "SELECT " + Cols.CONTAINER_ID.name() + " from " +
ExtractProfiler.CONTAINER_TABLE.getName() + " where " + Cols.FILE_PATH + " ='"
+ resourceId + "'";
- int resultCount;
- try (ResultSet rs = statement.executeQuery(sql)) {
- resultCount = 0;
- while (rs.next()) {
- containerId = rs.getInt(1);
- resultCount++;
- }
- }
-
- if (resultCount == 0) {
- LOG.warn("Should have found a container for: {}", resourceId);
- } else if (resultCount > 1) {
- LOG.error("Records ids should be unique: {}", resourceId);
- }
-/*
- if (containerId < 0) {
- System.err.println("CONTAINER ID < 0!!!");
- sql = "SELECT MAX("+ Cols.CONTAINER_ID.name() +
- ") from "+ExtractProfiler.CONTAINER_TABLE.getName();
- rs = statement.executeQuery(sql);
- while (rs.next()) {
- containerId = rs.getInt(1);
- }
- rs.close();
- if (containerId < 0) {
- //log and abort
- //return -1?
- } else {
- containerId++;
- }
-
- }*/
- return containerId;
- }
-
-
- }
-
-}
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
index aba35416f..41db03520 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/batch/DBConsumersManager.java
@@ -26,7 +26,6 @@ import java.util.List;
import org.apache.tika.batch.ConsumersManager;
import org.apache.tika.batch.FileResourceConsumer;
import org.apache.tika.eval.app.AbstractProfiler;
-import org.apache.tika.eval.app.XMLErrorLogUpdater;
import org.apache.tika.eval.app.db.JDBCUtil;
import org.apache.tika.eval.app.db.MimeBuffer;
import org.apache.tika.eval.app.db.TableInfo;
@@ -64,17 +63,6 @@ public class DBConsumersManager extends ConsumersManager {
throw new RuntimeException(e);
}
- //MUST HAPPEN AFTER consumers have closed and
- //committed container information!!!
- XMLErrorLogUpdater up = new XMLErrorLogUpdater();
- for (LogTablePair p : errorLogs) {
- try {
- up.update(conn, p.tableInfo, p.log);
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
try {
conn.commit();
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogMsgHandler.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogMsgHandler.java
deleted file mode 100644
index 0221849ba..000000000
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogMsgHandler.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.eval.app.io;
-
-import java.io.IOException;
-import java.sql.SQLException;
-
-import org.slf4j.event.Level;
-
-
-public interface XMLLogMsgHandler {
- public void handleMsg(Level level, String xml) throws IOException,
SQLException;
-}
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogReader.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogReader.java
deleted file mode 100644
index 2b8350894..000000000
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/io/XMLLogReader.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.eval.app.io;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.sql.SQLException;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-import org.apache.commons.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.event.Level;
-
-import org.apache.tika.utils.XMLReaderUtils;
-
-
-public class XMLLogReader {
- private static final Logger LOG =
LoggerFactory.getLogger(XMLLogReader.class);
- //class that wraps a logger's xml output
- //into a single xml parseable input stream.
-
- public void read(InputStream xmlLogFileIs, XMLLogMsgHandler handler)
throws XMLStreamException {
- InputStream is = new LogXMLWrappingInputStream(xmlLogFileIs);
- XMLInputFactory factory = XMLReaderUtils.getXMLInputFactory();
- XMLStreamReader reader = factory.createXMLStreamReader(is);
-
- Level level = null;
- while (reader.hasNext()) {
- reader.next();
- switch (reader.getEventType()) {
- case XMLStreamConstants.START_ELEMENT:
- if ("event".equals(reader.getLocalName())) {
- String levelString = reader.getAttributeValue("",
"level");
- if (levelString != null) {
- level = Level.valueOf(levelString);
- } else {
- level = Level.DEBUG;
- }
- } else if ("message".equals(reader.getLocalName())) {
- try {
- handler.handleMsg(level, reader.getElementText());
- } catch (IOException e) {
- LOG.warn("Error parsing: {}",
reader.getElementText());
- } catch (SQLException e) {
- LOG.warn("SQLException: {}", e.getMessage());
- }
- }
- break;
- case XMLStreamConstants.END_ELEMENT:
- if ("event".equals(reader.getLocalName())) {
- level = null;
- } else if ("message".equals(reader.getLocalName())) {
- //do we care any more?
- }
- break;
- }
- }
- }
-
-
- static class LogXMLWrappingInputStream extends InputStream {
- //plagiarized from log4j's chainsaw
- private final static String HEADER =
- "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" +
"<log4j:eventSet version=\"1.2\" " +
"xmlns:log4j=\"http://jakarta.apache.org/log4j/\">";
- private static final String FOOTER = "</log4j:eventSet>";
- int currentStreamIndex = 0;
- private InputStream[] streams;
-
- private LogXMLWrappingInputStream(InputStream xmlLogFileIs) {
- streams = new InputStream[3];
- streams[0] = new
ByteArrayInputStream(HEADER.getBytes(StandardCharsets.UTF_8));
- streams[1] = xmlLogFileIs;
- streams[2] = new
ByteArrayInputStream(FOOTER.getBytes(StandardCharsets.UTF_8));
-
- }
-
- @Override
- public int read() throws IOException {
- int c = streams[currentStreamIndex].read();
- if (c < 0) {
- IOUtils.closeQuietly(streams[currentStreamIndex]);
- while (currentStreamIndex < streams.length - 1) {
- currentStreamIndex++;
- int tmpC = streams[currentStreamIndex].read();
- if (tmpC < 0) {
- IOUtils.closeQuietly(streams[currentStreamIndex]);
- } else {
- return tmpC;
- }
- }
- return -1;
- }
- return c;
- }
- }
-}
diff --git
a/tika-eval/tika-eval-app/src/test/java/org/apache/tika/eval/app/io/FatalExceptionReaderTest.java
b/tika-eval/tika-eval-app/src/test/java/org/apache/tika/eval/app/io/FatalExceptionReaderTest.java
deleted file mode 100644
index 2cd0d1363..000000000
---
a/tika-eval/tika-eval-app/src/test/java/org/apache/tika/eval/app/io/FatalExceptionReaderTest.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.eval.app.io;
-
-
-import java.io.InputStream;
-
-import org.junit.jupiter.api.Test;
-
-public class FatalExceptionReaderTest {
- @Test
- public void testSimpleRead() throws Exception {
- try (InputStream is = this
- .getClass()
-
.getResourceAsStream("/test-dirs/batch-logs/batch-process-fatal.xml")) {
- XMLLogReader reader = new XMLLogReader();
- //reader.read(is);
- }
- }
-}