This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch TIKA-4742-improve-logging in repository https://gitbox.apache.org/repos/asf/tika.git
commit 82099691dd18e8105ce92523df5cbce1c3b3d235 Author: tallison <[email protected]> AuthorDate: Wed May 27 12:30:45 2026 -0400 TIKA-4742 -- refactor logging for beta-1 --- docs/modules/ROOT/pages/pipes/troubleshooting.adoc | 80 ++++++++++++++++++++++ .../resources/pipes-fork-server-default-log4j2.xml | 32 --------- .../tika/parser/ocrencode/EncodeOCRParser.java | 4 +- .../tika/parser/ner/grobid/GrobidNERecogniser.java | 12 ++-- .../tika/parser/ner/nltk/NLTKNERecogniser.java | 2 +- .../detect/microsoft/POIFSContainerDetector.java | 2 +- .../tika/parser/microsoft/libpst/LibPstParser.java | 3 +- .../microsoft/msg/ExtendedMetadataExtractor.java | 2 +- .../microsoft/ooxml/TikaSheetXMLHandler.java | 5 +- .../apache/tika/parser/hwp/HwpTextExtractorV5.java | 4 +- .../apache/tika/parser/ocr/tess4j/ImageDeskew.java | 7 +- .../apache/tika/parser/ocr/tess4j/ImageUtil.java | 7 +- .../org/apache/tika/client/HttpClientFactory.java | 4 +- .../org/apache/tika/pipes/core/PipesClient.java | 2 +- .../tika/pipes/core/async/AsyncProcessor.java | 3 +- .../apache/tika/pipes/core/server/EmitHandler.java | 10 +-- .../tika/pipes/core/server/FetchHandler.java | 4 +- .../tika/pipes/core/server/ParseHandler.java | 22 +++--- .../apache/tika/pipes/core/server/PipesServer.java | 21 +++--- .../apache/tika/pipes/core/server/PipesWorker.java | 4 +- .../resources/pipes-fork-server-default-log4j2.xml | 24 ++++++- .../fetcher/atlassianjwt/AtlassianJwtFetcher.java | 4 +- .../tika/pipes/fetcher/azblob/AZBlobFetcher.java | 2 +- .../tika/pipes/iterator/csv/CSVPipesIterator.java | 2 +- .../apache/tika/pipes/emitter/es/ESEmitter.java | 4 +- .../apache/tika/pipes/fetcher/gcs/GCSFetcher.java | 2 +- .../tika/pipes/fetcher/http/HttpFetcher.java | 5 +- .../tika/pipes/emitter/jdbc/JDBCEmitter.java | 4 +- .../pipes/iterator/jdbc/JDBCPipesIterator.java | 19 +++-- .../pipesiterator/json/JsonPipesIterator.java | 2 +- .../tika/pipes/emitter/kafka/KafkaEmitter.java | 2 +- .../emitter/opensearch/OpenSearchEmitter.java | 6 +- .../apache/tika/pipes/fetcher/s3/S3Fetcher.java | 6 +- .../pipes/iterator/solr/SolrPipesIterator.java | 2 +- .../apache/tika/server/client/TikaClientCLI.java | 6 +- .../tika/server/core/resource/AsyncResource.java | 6 +- .../server/core/resource/DetectorResource.java | 2 +- .../server/core/resource/LanguageResource.java | 2 +- .../server/core/resource/MetadataResource.java | 2 +- .../server/core/resource/TranslateResource.java | 6 +- 40 files changed, 202 insertions(+), 136 deletions(-) diff --git a/docs/modules/ROOT/pages/pipes/troubleshooting.adoc b/docs/modules/ROOT/pages/pipes/troubleshooting.adoc index 3765bae2e7..c596e4ade9 100644 --- a/docs/modules/ROOT/pages/pipes/troubleshooting.adoc +++ b/docs/modules/ROOT/pages/pipes/troubleshooting.adoc @@ -62,6 +62,37 @@ pick them up automatically. The default `pipes-fork-server-default-log4j2.xml` writes to `SYSTEM_ERR`, so inheritance is what makes those records visible to your observability stack. +=== Telling fork lines from parent lines + +Since the fork and parent share a single stdio stream, the bundled +`pipes-fork-server-default-log4j2.xml` pattern adds two orthogonal markers +so you can read the interleaved output: + +* `[fork]` -- present only on lines emitted by a forked `PipesServer` + JVM. Lines from the parent process (`PipesClient`, `AsyncProcessor`, + `ConnectionHandler`, `tika-server`, `tika-grpc`, etc.) do not carry + this tag. Different mechanism on each side: the fork has it injected + via the bundled pattern's literal `[fork]` token; the parent does + not include it in its own log4j2/logback patterns. + +* `pipesClientId=N` -- *the same value on both sides of a pair*. The + parent's `PipesClient #N` always connects to the fork running with + `-DpipesClientId=N`, so the same N threads correlation across the + process boundary. Use it to gather every log line about one + conversation, regardless of which side emitted them. + +A typical interleaved snippet: + +[source] +---- +INFO [main] 14:23:45,123 [fork] pipesClientId=0 o.a.t.p.c.server.PipesServer received SHUT_DOWN +DEBUG [Thread-3] 14:23:45,124 o.a.t.p.c.async.AsyncProcessor pipesClientId=0, status=PARSE_SUCCESS +---- + +The first line is from inside fork 0 (`[fork]` present). The second is +the parent talking *about* fork 0 (`[fork]` absent, but the same client +id appears in the message body). + If you don't want the pipes-server's output interleaved with your own -- e.g. an embedded use case where the parent is producing its own structured stdout, or a test environment where you want a quieter console -- set the @@ -112,6 +143,55 @@ When the watcher fires, the child exits via `System.exit`, which runs `AbstractExternalProcessParser`'s shutdown hook and cleans up any in-flight external subprocesses. +== Log levels and sensitive data + +Tika Pipes treats `FetchKey` and `EmitKey` values as potentially sensitive -- +they typically contain file paths, URLs, object-store keys, or other identifiers +that may be private to the data owner. The convention across pipes core and the +bundled plugins is: + +[cols="1,3"] +|=== +|Level |What is logged + +|`ERROR` / `WARN` +|Failures, exceptions, and configuration problems. *Never* the literal + `fetchKey`/`emitKey` or any file content. When a failure refers to a + specific document, it is identified by the non-sensitive `FetchEmitTuple.id` + (e.g. `parse exception: id=abc-123`). + +|`INFO` +|Lifecycle events -- server start/stop, plugin start/stop, mode banners, + restart events. Per-document or per-request lines have been demoted from + INFO to DEBUG so production logs stay quiet. + +|`DEBUG` +|Per-document progress and aggregated counts (e.g. `pipesClientId=2, + status=PARSE_SUCCESS`, `successfully emitted N docs`). Safe to enable in + production for troubleshooting; correlation is by `FetchEmitTuple.id` only. + +|`TRACE` +|Verbose per-fetch and per-emit detail including the literal + `fetchKey`/`emitKey` (URL, S3 key, blob path, etc.). Enable only when you + need to correlate a Tika log line back to a specific resource, and accept + that those keys will appear in the log destination. +|=== + +The fetcher and emitter SPIs (`Fetcher.fetch`, `Emitter.emit`, +`StreamEmitter.emit`) receive the literal key but not the tuple id, so +plugin code can only log the literal key. Keeping that at TRACE keeps it +out of any log destination that is configured at DEBUG or higher. + +If you write your own fetcher or emitter plugin, please follow the same +convention: literal keys at TRACE, everything else at DEBUG or above with +no key in the message. + +NOTE: Exception messages thrown out of a fetcher may still include +response-body bytes for HTTP-style fetchers (configurable via +`maxErrMsgSize` on `HttpFetcherConfig`). Those bytes appear in whatever +log catches the thrown exception. Lower `maxErrMsgSize` -- or set it to +zero -- if your responses can contain sensitive data. + == Configuration knobs reference [cols="2,3"] diff --git a/tika-core/src/main/resources/pipes-fork-server-default-log4j2.xml b/tika-core/src/main/resources/pipes-fork-server-default-log4j2.xml deleted file mode 100644 index 9e87d34806..0000000000 --- a/tika-core/src/main/resources/pipes-fork-server-default-log4j2.xml +++ /dev/null @@ -1,32 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> -<Configuration status="WARN"> - <Appenders> - <Console name="console" target="SYSTEM_ERR"> - <PatternLayout - pattern="%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n" /> - </Console> - </Appenders> - <Loggers> - <Root level="info" additivity="false"> - <AppenderRef ref="console" /> - </Root> - </Loggers> -</Configuration> diff --git a/tika-parsers/tika-parsers-extended/tika-parser-ocr-encode-module/src/main/java/org/apache/tika/parser/ocrencode/EncodeOCRParser.java b/tika-parsers/tika-parsers-extended/tika-parser-ocr-encode-module/src/main/java/org/apache/tika/parser/ocrencode/EncodeOCRParser.java index 4174992206..f439d07e00 100644 --- a/tika-parsers/tika-parsers-extended/tika-parser-ocr-encode-module/src/main/java/org/apache/tika/parser/ocrencode/EncodeOCRParser.java +++ b/tika-parsers/tika-parsers-extended/tika-parser-ocr-encode-module/src/main/java/org/apache/tika/parser/ocrencode/EncodeOCRParser.java @@ -229,7 +229,7 @@ public class EncodeOCRParser int processed = counter != null ? counter.get() : config.getMaxImagesToOcr(); - LOG.info("Skipping OCR encode for image because " + LOG.debug("Skipping OCR encode for image because " + "the configured limit of {} images " + "has been reached ({} already processed)", config.getMaxImagesToOcr(), processed); @@ -288,7 +288,7 @@ public class EncodeOCRParser xhtml.endElement(XHTML, "div", "div"); long durationMs = (System.nanoTime() - startTime) / 1_000_000; - LOG.info("OCR encoding - input file size: {} bytes, " + LOG.debug("OCR encoding - input file size: {} bytes, " + "output size: {} characters, " + "time taken: {} ms", fileSize, sink.totalChars(), durationMs); diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java index 4e314f4322..958584137c 100644 --- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java +++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java @@ -74,7 +74,7 @@ public class GrobidNERecogniser implements NERecogniser { this.available = isServerAlive(restHostUrlStr); } catch (Exception e) { - LOG.info(e.getMessage(), e); + LOG.warn(e.getMessage(), e); } } @@ -88,10 +88,10 @@ public class GrobidNERecogniser implements NERecogniser { if (responseCode == 200) { available = true; } else { - LOG.info("Grobid Quantities REST Server is not running"); + LOG.warn("Grobid Quantities REST Server is not running"); } } catch (Exception e) { - LOG.info("Grobid Quantities REST Server is not running", e); + LOG.warn("Grobid Quantities REST Server is not running", e); } return available; @@ -146,7 +146,7 @@ public class GrobidNERecogniser implements NERecogniser { try { jsonArray = (JSONArray) obj.get(key); } catch (Exception e) { - LOG.info(e.getMessage(), e); + LOG.warn(e.getMessage(), e); } return jsonArray; } @@ -162,7 +162,7 @@ public class GrobidNERecogniser implements NERecogniser { try { jsonObject = (JSONObject) parser.parse(jsonString); } catch (Exception e) { - LOG.info(e.getMessage(), e); + LOG.warn(e.getMessage(), e); } return jsonObject; } @@ -261,7 +261,7 @@ public class GrobidNERecogniser implements NERecogniser { } } } catch (Exception e) { - LOG.info(e.getMessage(), e); + LOG.warn(e.getMessage(), e); } ENTITY_TYPES.clear(); diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java index 15cdda86cc..2aa200e366 100644 --- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java +++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java @@ -79,7 +79,7 @@ public class NLTKNERecogniser implements NERecogniser { if (responseCode == 200) { available = true; } else { - LOG.info("NLTKRest Server is not running"); + LOG.debug("NLTKRest Server is not running"); } } catch (Exception e) { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/POIFSContainerDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/POIFSContainerDetector.java index fb716cb1f2..7700267efc 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/POIFSContainerDetector.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/POIFSContainerDetector.java @@ -560,7 +560,7 @@ public class POIFSContainerDetector implements Detector { Path file = stream.getPath(); if (file == null) { - LOG.warn("Stream does not support file access; skipping POIFS detection"); + LOG.debug("Stream does not support file access; skipping POIFS detection"); return Collections.emptySet(); } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/libpst/LibPstParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/libpst/LibPstParser.java index 242380caa3..6211c15bf9 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/libpst/LibPstParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/libpst/LibPstParser.java @@ -101,7 +101,8 @@ public class LibPstParser implements Parser, Initializable { throw new TikaException("Timeout exception: " + fileProcessResult.getProcessTimeMillis()); } if (fileProcessResult.getExitValue() != 0) { - LOGGER.warn("libpst bad exit value {}: {}", fileProcessResult.getExitValue(), fileProcessResult.getStderr()); + LOGGER.warn("libpst bad exit value {}", fileProcessResult.getExitValue()); + LOGGER.debug("libpst stderr: {}", fileProcessResult.getStderr()); throw new TikaException("Bad exit value: " + fileProcessResult.getExitValue()); } xhtml.endDocument(); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/msg/ExtendedMetadataExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/msg/ExtendedMetadataExtractor.java index 877bd796e5..a9f35695b4 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/msg/ExtendedMetadataExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/msg/ExtendedMetadataExtractor.java @@ -261,7 +261,7 @@ public class ExtendedMetadataExtractor { if (knownClassIDs.containsKey(s)) { return knownClassIDs.get(s); } - LOGGER.warn("Add '{}' to list of known property set IDs", s); + LOGGER.debug("Add '{}' to list of known property set IDs", s); ClassID classID = new ClassID(s); knownClassIDs.put(classID.toUUIDString(), classID); return classID; diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaSheetXMLHandler.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaSheetXMLHandler.java index ec96a40c2e..e95506be5d 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaSheetXMLHandler.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaSheetXMLHandler.java @@ -147,11 +147,8 @@ class TikaSheetXMLHandler extends DefaultHandler { String ref = attributes.getValue("ref"); if (ref != null) { fIsOpen = true; - } else { - if (formulasNotResults) { - LOG.warn("shared formulas not yet supported!"); - } } + // shared-formula reference without a `ref` attribute is not yet supported } else { fIsOpen = true; } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/hwp/HwpTextExtractorV5.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/hwp/HwpTextExtractorV5.java index 7a517f84e0..822ab0e6ef 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/hwp/HwpTextExtractorV5.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/hwp/HwpTextExtractorV5.java @@ -264,7 +264,7 @@ public class HwpTextExtractorV5 implements Serializable { parse(reader, xhtml); } else { - LOG.warn("Unknown Entry '{}'({})", entry.getName(), entry); + LOG.debug("Unknown Entry '{}'({})", entry.getName(), entry); } } } @@ -309,7 +309,7 @@ public class HwpTextExtractorV5 implements Serializable { IOUtils.closeQuietly(input); } } else { - LOG.warn("unknown Entry '{}'({})", entry.getName(), entry); + LOG.debug("unknown Entry '{}'({})", entry.getName(), entry); } } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageDeskew.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageDeskew.java index 2c4b919812..c76b280b4a 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageDeskew.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageDeskew.java @@ -18,14 +18,10 @@ package org.apache.tika.parser.ocr.tess4j; import java.awt.image.BufferedImage; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - /** * Copied and pasted from Tess4j (https://sourceforge.net/projects/tess4j/) */ public class ImageDeskew { - private static final Logger LOG = LoggerFactory.getLogger(ImageDeskew.class); private final BufferedImage cImage; private final int cSteps = 200; @@ -112,7 +108,8 @@ public class ImageDeskew { try { this.cHMatrix[var6]++; } catch (Exception var9) { - LOG.warn("", var9); + // out-of-bounds increments are skipped intentionally; + // the Hough transform tolerates dropped pixels } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageUtil.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageUtil.java index fdc3649eff..44711cf00e 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageUtil.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/tess4j/ImageUtil.java @@ -23,11 +23,7 @@ import java.awt.geom.AffineTransform; import java.awt.image.BufferedImage; import java.awt.image.WritableRaster; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class ImageUtil { - private static final Logger LOG = LoggerFactory.getLogger(ImageUtil.class); public ImageUtil() { } @@ -53,7 +49,8 @@ public class ImageUtil { int var7 = var4 & 255; var8 = (double) var5 * 0.299D + (double) var6 * 0.587D + (double) var7 * 0.114D; } catch (Exception var11) { - LOG.warn("", var11); + // pixel access out of bounds is benign here — the + // algorithm handles it via the default var8=0 path } return var8 < (double) var3; diff --git a/tika-pipes/tika-httpclient-commons/src/main/java/org/apache/tika/client/HttpClientFactory.java b/tika-pipes/tika-httpclient-commons/src/main/java/org/apache/tika/client/HttpClientFactory.java index 5bd9c29753..f7bc3304cc 100644 --- a/tika-pipes/tika-httpclient-commons/src/main/java/org/apache/tika/client/HttpClientFactory.java +++ b/tika-pipes/tika-httpclient-commons/src/main/java/org/apache/tika/client/HttpClientFactory.java @@ -272,7 +272,7 @@ public class HttpClientFactory { sslsf = new SSLConnectionSocketFactory(sslContext, SSLConnectionSocketFactory.getDefaultHostnameVerifier()); } else { - LOG.info("http client does not verify ssl at this point. " + + LOG.warn("http client does not verify ssl at this point. " + "If you need that, please open a ticket."); TrustStrategy acceptingTrustStrategy = (cert, authType) -> true; try { @@ -438,7 +438,7 @@ public class HttpClientFactory { return true; } if (!allowedHosts.isEmpty() && !allowedHosts.contains(uri.getHost())) { - LOG.info("Not allowing external redirect. OriginalUrl={}," + + LOG.warn("Not allowing external redirect. OriginalUrl={}," + " RedirectLocation={}", request.getRequestLine().getUri(), location); return false; } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java index be1b1e6034..6eeacefa05 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java @@ -382,7 +382,7 @@ public class PipesClient implements Closeable { throw new IOException("Unexpected message type from server: " + msg.type()); } } catch (SocketTimeoutException e) { - LOG.info("clientId={}: Socket timeout exception while waiting for server", pipesClientId, e); + LOG.warn("clientId={}: Socket timeout exception while waiting for server", pipesClientId, e); // Mark for restart - server is stuck on current request and needs to be restarted serverManager.markServerForRestart(); closeConnection(); diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java index cc5f424af5..69c781eb64 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/AsyncProcessor.java @@ -416,8 +416,7 @@ public class AsyncProcessor implements Closeable { long start = System.currentTimeMillis(); try { result = pipesClient.process(t); - //TODO -- drop this back to debug or even trace once we have stability in ci - LOG.info("pipesClientId={}, status={}", pipesClient.getPipesClientId(), result.status()); + LOG.debug("pipesClientId={}, status={}", pipesClient.getPipesClientId(), result.status()); } catch (IOException e) { LOG.warn("pipesClientId={} crash", pipesClient.getPipesClientId(), e); result = PipesResults.UNSPECIFIED_CRASH; diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java index aeede97346..78a21bfa23 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java @@ -107,10 +107,10 @@ class EmitHandler { emitter = emitterManager.getEmitter(emitKey.getEmitterId()); } catch (org.apache.tika.pipes.api.emitter.EmitterNotFoundException e) { String noEmitterMsg = getNoEmitterMsg(taskId); - LOG.info(noEmitterMsg); + LOG.warn(noEmitterMsg); return new PipesResult(PipesResult.RESULT_STATUS.EMITTER_NOT_FOUND, noEmitterMsg); } catch (IOException | TikaException e) { - LOG.info("Couldn't initialize emitter for task id '" + taskId + "'", e); + LOG.warn("Couldn't initialize emitter for task id '" + taskId + "'", e); return new PipesResult(PipesResult.RESULT_STATUS.EMITTER_INITIALIZATION_EXCEPTION, ExceptionUtils.getStackTrace(e)); } try { @@ -124,7 +124,7 @@ class EmitHandler { emitter.emit(emitKey.getEmitKey(), parseData.getMetadataList(), parseContext); } } catch (IOException e) { - LOG.info("emit exception", e); + LOG.warn("emit exception", e); String msg = ExceptionUtils.getStackTrace(e); //for now, we're hiding the parse exception if there was also an emit exception return new PipesResult(PipesResult.RESULT_STATUS.EMIT_EXCEPTION, msg); @@ -134,7 +134,7 @@ class EmitHandler { try { passbackFilter.filter(parseData.metadataList); } catch (TikaException e) { - LOG.info("problem filtering for pass back", e); + LOG.warn("problem filtering for pass back", e); } if (StringUtils.isBlank(parseExceptionStack)) { return new PipesResult(PipesResult.RESULT_STATUS.EMIT_SUCCESS_PASSBACK, new EmitDataImpl(emitKey.getEmitKey(), parseData.metadataList)); @@ -250,7 +250,7 @@ class EmitHandler { try { parseData.filter(filter, parseContext); } catch (TikaException e) { - LOG.info("failed to filter metadata list", e); + LOG.warn("failed to filter metadata list", e); } } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/FetchHandler.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/FetchHandler.java index 915ba5b057..c14ee24656 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/FetchHandler.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/FetchHandler.java @@ -60,10 +60,10 @@ class FetchHandler { return new FetcherOrResult(fetcherManager.getFetcher(t.getFetchKey().getFetcherId()), null); } catch (IllegalArgumentException e) { String noFetcherMsg = getNoFetcherMsg(t.getFetchKey().getFetcherId()); - LOG.info(noFetcherMsg); + LOG.warn(noFetcherMsg); return new FetcherOrResult(null, new PipesResult(PipesResult.RESULT_STATUS.FETCHER_NOT_FOUND, noFetcherMsg)); } catch (IOException | TikaException e) { - LOG.info("Couldn't initialize fetcher for fetch id={}", t.getId(), e); + LOG.warn("Couldn't initialize fetcher for fetch id={}", t.getId(), e); return new FetcherOrResult(null, new PipesResult(PipesResult.RESULT_STATUS.FETCHER_INITIALIZATION_EXCEPTION, ExceptionUtils.getStackTrace(e))); } diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java index 8916acbafe..cd02d99767 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java @@ -142,7 +142,7 @@ class ParseHandler { parseContext.set(SkipContainerDocumentDigest.class, SkipContainerDocumentDigest.INSTANCE); } catch (IOException e) { - LOG.info("problem digesting: " + t.getId(), e); + LOG.warn("problem digesting: " + t.getId(), e); } } // Signal to detectors that parsing will follow, so they can prepare @@ -154,7 +154,7 @@ class ParseHandler { EmbeddedDocumentUtil.normalizeMediaType(mt.toString())); metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE, mt.toString()); } catch (IOException e) { - LOG.info("problem detecting: " + t.getId(), e); + LOG.warn("problem detecting: " + t.getId(), e); } UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class); if (unpackConfig != null && @@ -163,7 +163,7 @@ class ParseHandler { try (InputStream is = Files.newInputStream(tis.getPath())) { unpackHandler.add(0, metadata, is); } catch (IOException e) { - LOG.info("problem reading source file into embedded document byte store", e); + LOG.warn("problem reading source file into embedded document byte store", e); } } } @@ -201,14 +201,14 @@ class ParseHandler { try { recursiveParserWrapper.parse(stream, handler, metadata, parseContext); } catch (SAXException e) { - LOG.info("sax problem:" + fetchEmitTuple.getId(), e); + LOG.warn("sax problem:" + fetchEmitTuple.getId(), e); } catch (EncryptedDocumentException e) { - LOG.info("encrypted document:" + fetchEmitTuple.getId(), e); + LOG.warn("encrypted document:" + fetchEmitTuple.getId(), e); } catch (SecurityException e) { - LOG.info("security exception:" + fetchEmitTuple.getId(), e); + LOG.warn("security exception:" + fetchEmitTuple.getId(), e); throw e; } catch (Exception e) { - LOG.info("parse exception: " + fetchEmitTuple.getId(), e); + LOG.warn("parse exception: " + fetchEmitTuple.getId(), e); } finally { if (LOG.isTraceEnabled()) { LOG.trace("timer -- parse only time: {} ms", System.currentTimeMillis() - start); @@ -242,19 +242,19 @@ class ParseHandler { autoDetectParser.parse(stream, handler, metadata, parseContext); } catch (SAXException e) { containerException = ExceptionUtils.getStackTrace(e); - LOG.info("sax problem:" + fetchEmitTuple.getId(), e); + LOG.warn("sax problem:" + fetchEmitTuple.getId(), e); if (WriteLimitReachedException.isWriteLimitReached(e)) { writeLimitReached = true; } } catch (EncryptedDocumentException e) { containerException = ExceptionUtils.getStackTrace(e); - LOG.info("encrypted document:" + fetchEmitTuple.getId(), e); + LOG.warn("encrypted document:" + fetchEmitTuple.getId(), e); } catch (SecurityException e) { - LOG.info("security exception:" + fetchEmitTuple.getId(), e); + LOG.warn("security exception:" + fetchEmitTuple.getId(), e); throw e; } catch (Exception e) { containerException = ExceptionUtils.getStackTrace(e); - LOG.info("parse exception: " + fetchEmitTuple.getId(), e); + LOG.warn("parse exception: " + fetchEmitTuple.getId(), e); } finally { metadata.add(TikaCoreProperties.TIKA_CONTENT, handler.toString()); metadata.set(TikaCoreProperties.TIKA_CONTENT_HANDLER_TYPE, diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java index e84e0c9048..06f1f98ea2 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java @@ -131,7 +131,7 @@ public class PipesServer implements AutoCloseable { public static PipesServer load(int port, Path tikaConfigPath) throws Exception { String pipesClientId = System.getProperty("pipesClientId", "unknown"); - LOG.debug("pipesClientId={}: connecting to client on port={}", pipesClientId, port); + LOG.debug("connecting to client on port={}", port); Socket socket = new Socket(); socket.connect(new InetSocketAddress(InetAddress.getLoopbackAddress(), port), PipesClient.SOCKET_CONNECT_TIMEOUT_MS); socket.setTcpNoDelay(true); // Disable Nagle's algorithm to avoid ~40ms delays on small writes @@ -152,7 +152,7 @@ public class PipesServer implements AutoCloseable { MetadataWriteLimiterFactory metadataWriteLimiterFactory = tikaLoader.loadParseContext().get(MetadataWriteLimiterFactory.class); PipesServer pipesServer = new PipesServer(pipesClientId, tikaLoader, pipesConfig, socket, dis, dos, metadataFilter, contentHandlerFactory, metadataWriteLimiterFactory); pipesServer.initializeResources(); - LOG.debug("pipesClientId={}: PipesServer loaded and ready", pipesClientId); + LOG.debug("PipesServer loaded and ready"); return pipesServer; } catch (Exception e) { LOG.error("Failed to start up", e); @@ -231,14 +231,14 @@ public class PipesServer implements AutoCloseable { int port = Integer.parseInt(args[0]); Path tikaConfig = Paths.get(args[1]); String pipesClientId = System.getProperty("pipesClientId", "unknown"); - LOG.debug("pipesClientId={}: starting pipes server on port={}", pipesClientId, port); + LOG.debug("starting pipes server on port={}", port); try (PipesServer server = PipesServer.load(port, tikaConfig)) { server.mainLoop(); } catch (Throwable t) { - LOG.error("pipesClientId={}: crashed", pipesClientId, t); + LOG.error("crashed", t); throw t; } finally { - LOG.debug("pipesClientId={}: server shutting down", pipesClientId); + LOG.debug("server shutting down"); } } } @@ -324,11 +324,11 @@ public class PipesServer implements AutoCloseable { try { PipesMessage.ready().write(output); } catch (IOException e) { - LOG.error("pipesClientId={}: failed to send READY", pipesClientId, e); + LOG.error("failed to send READY", e); exit(PipesMessageType.UNSPECIFIED_CRASH.getExitCode().orElse(19)); return; } - LOG.debug("pipesClientId={}: sent READY, entering main loop", pipesClientId); + LOG.debug("sent READY, entering main loop"); ArrayBlockingQueue<Metadata> intermediateResult = new ArrayBlockingQueue<>(1); //main loop @@ -340,8 +340,7 @@ public class PipesServer implements AutoCloseable { } catch (SocketTimeoutException e) { // Socket timeout while idle is the normal inactivity shutdown path. // Exit cleanly — PipesClient will restart the server if needed. - LOG.info("pipesClientId={}: socket timeout while waiting for task, shutting down", - pipesClientId); + LOG.info("socket timeout while waiting for task, shutting down"); try { close(); } catch (Exception ex) { @@ -350,7 +349,7 @@ public class PipesServer implements AutoCloseable { System.exit(0); return; // unreachable, but needed for compilation } - LOG.trace("pipesClientId={}: received message type={}", pipesClientId, msg.type()); + LOG.trace("received message type={}", msg.type()); switch (msg.type()) { case PING: @@ -651,7 +650,7 @@ public class PipesServer implements AutoCloseable { } private void handleShutDown() { - LOG.info("pipesClientId={}: received SHUT_DOWN, shutting down gracefully", pipesClientId); + LOG.info("received SHUT_DOWN, shutting down gracefully"); try { close(); } catch (Exception e) { diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java index 9354517c7a..bd1b54ab08 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java @@ -480,7 +480,7 @@ class PipesWorker implements Callable<PipesResult> { try { localContext = setupParseContext(); } catch (IOException e) { - LOG.info("fetcher initialization exception id={}", fetchEmitTuple.getId(), e); + LOG.warn("fetcher initialization exception id={}", fetchEmitTuple.getId(), e); return new ParseDataOrPipesResult(null, new PipesResult(PipesResult.RESULT_STATUS.FETCHER_INITIALIZATION_EXCEPTION, ExceptionUtils.getStackTrace(e))); } @@ -509,7 +509,7 @@ class PipesWorker implements Callable<PipesResult> { LOG.error("security exception id={}", fetchEmitTuple.getId(), e); throw e; } catch (TikaException | IOException e) { - LOG.info("fetch exception id={}", fetchEmitTuple.getId(), e); + LOG.warn("fetch exception id={}", fetchEmitTuple.getId(), e); return new ParseDataOrPipesResult(null, new PipesResult(PipesResult.RESULT_STATUS.UNSPECIFIED_CRASH, ExceptionUtils.getStackTrace(e))); } diff --git a/tika-pipes/tika-pipes-core/src/main/resources/pipes-fork-server-default-log4j2.xml b/tika-pipes/tika-pipes-core/src/main/resources/pipes-fork-server-default-log4j2.xml index 9e87d34806..dbf51846cd 100644 --- a/tika-pipes/tika-pipes-core/src/main/resources/pipes-fork-server-default-log4j2.xml +++ b/tika-pipes/tika-pipes-core/src/main/resources/pipes-fork-server-default-log4j2.xml @@ -21,8 +21,30 @@ <Appenders> <Console name="console" target="SYSTEM_ERR"> <PatternLayout - pattern="%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n" /> + pattern="%-5p [%t] %d{HH:mm:ss,SSS} [fork] pipesClientId=${sys:pipesClientId:-?} %c %m%n" /> </Console> + + <!-- + Per-fork file logging — to give each PipesServer fork its own log file + (e.g. pipes-server-0.log, pipes-server-1.log, ...), uncomment this + RollingFile appender and switch the Root logger's AppenderRef below + from "console" to "forkFile" (or list both for tee'd output). + + The ${sys:pipesClientId} substitution reads the -DpipesClientId=N + property set by the parent when spawning this fork, so the file name + and the log lines self-identify which fork they came from. + + <RollingFile name="forkFile" + fileName="logs/pipes-server-${sys:pipesClientId:-unknown}.log" + filePattern="logs/pipes-server-${sys:pipesClientId:-unknown}-%d{yyyy-MM-dd}-%i.log.gz"> + <PatternLayout pattern="%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n" /> + <Policies> + <TimeBasedTriggeringPolicy /> + <SizeBasedTriggeringPolicy size="50MB" /> + </Policies> + <DefaultRolloverStrategy max="10" /> + </RollingFile> + --> </Appenders> <Loggers> <Root level="info" additivity="false"> diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java index e93ac5701b..a1e136939a 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/main/java/org/apache/tika/pipes/fetcher/atlassianjwt/AtlassianJwtFetcher.java @@ -201,7 +201,7 @@ public class AtlassianJwtFetcher extends AbstractTikaExtension implements Fetche updateMetadata(get.getURI().toString(), response, context, metadata); int code = response.getStatusLine().getStatusCode(); - LOG.info("Fetch id {} status code {}", get.getURI(), code); + LOG.trace("Fetch fetchKey={} status code {}", get.getURI(), code); if (code < 200 || code > 299) { throw new IOException("bad status code: " + code + " :: " + responseToString(response)); } @@ -210,7 +210,7 @@ public class AtlassianJwtFetcher extends AbstractTikaExtension implements Fetche } } catch (ConnectionClosedException e) { if (retryOnBadLength && e.getMessage() != null && e.getMessage().contains("Premature end of Content-Length delimited message")) { - LOG.warn("premature end of content-length delimited message; retrying with content compression disabled for {}", get.getURI()); + LOG.warn("premature end of content-length delimited message; retrying with content compression disabled"); return execute(get, metadata, noCompressHttpClient, false); } throw e; diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java index 423445e056..27534a4af8 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/main/java/org/apache/tika/pipes/fetcher/azblob/AZBlobFetcher.java @@ -96,7 +96,7 @@ public class AZBlobFetcher extends AbstractTikaExtension implements Fetcher { public TikaInputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws TikaException, IOException { - LOGGER.debug("about to fetch fetchkey={} from endpoint ({})", fetchKey, config.getEndpoint()); + LOGGER.trace("about to fetch fetchkey={} from endpoint ({})", fetchKey, config.getEndpoint()); try { BlobClient blobClient = blobClientFactory.getClient(fetchKey); diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/main/java/org/apache/tika/pipes/iterator/csv/CSVPipesIterator.java b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/main/java/org/apache/tika/pipes/iterator/csv/CSVPipesIterator.java index 317db26e13..89c3fb89c5 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/main/java/org/apache/tika/pipes/iterator/csv/CSVPipesIterator.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/main/java/org/apache/tika/pipes/iterator/csv/CSVPipesIterator.java @@ -111,7 +111,7 @@ public class CSVPipesIterator extends PipesIteratorBase { String fetchKey = record.get(fetchEmitKeyIndices.fetchKeyIndex); String emitKey = record.get(fetchEmitKeyIndices.emitKeyIndex); if (StringUtils.isBlank(fetchKey) && !StringUtils.isBlank(fetcherId)) { - LOGGER.debug("Fetcher specified ({}), but no fetchkey was found in ({})", fetcherId, record); + LOGGER.debug("Fetcher specified ({}), but no fetchkey was found in record id={}", fetcherId, id); } if (StringUtils.isBlank(emitKey)) { throw new IOException("emitKey must not be blank in :" + record); diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/main/java/org/apache/tika/pipes/emitter/es/ESEmitter.java b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/main/java/org/apache/tika/pipes/emitter/es/ESEmitter.java index 4fdc58ae97..31315a8c1a 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/main/java/org/apache/tika/pipes/emitter/es/ESEmitter.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/main/java/org/apache/tika/pipes/emitter/es/ESEmitter.java @@ -79,7 +79,7 @@ public class ESEmitter extends AbstractEmitter { try { LOG.debug("about to emit {} docs", emitData.size()); esClient.emitDocuments(emitData); - LOG.info("successfully emitted {} docs", emitData.size()); + LOG.debug("successfully emitted {} docs", emitData.size()); } catch (TikaClientException e) { LOG.warn("problem emitting docs", e); throw new IOException(e.getMessage(), e); @@ -97,7 +97,7 @@ public class ESEmitter extends AbstractEmitter { LOG.debug("about to emit one doc with {} metadata entries", metadataList.size()); esClient.emitDocument(emitKey, metadataList); - LOG.info("successfully emitted one doc"); + LOG.debug("successfully emitted one doc"); } catch (TikaClientException e) { LOG.warn("problem emitting doc", e); throw new IOException("failed to add document", e); diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java index 32887a047e..dfa422a568 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/main/java/org/apache/tika/pipes/fetcher/gcs/GCSFetcher.java @@ -76,7 +76,7 @@ public class GCSFetcher extends AbstractTikaExtension implements Fetcher { public TikaInputStream fetch(String fetchKey, Metadata metadata, ParseContext parseContext) throws TikaException, IOException { - LOGGER.debug("about to fetch fetchkey={} from bucket ({})", fetchKey, config.getBucket()); + LOGGER.trace("about to fetch fetchkey={} from bucket ({})", fetchKey, config.getBucket()); try { Blob blob = storage.get(BlobId.of(config.getBucket(), fetchKey)); diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java index e059a53565..a6251cbb2e 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java @@ -291,7 +291,7 @@ public class HttpFetcher extends AbstractTikaExtension implements Fetcher, Range int code = response .getStatusLine() .getStatusCode(); - LOG.info("Fetch id {} status code {}", get.getURI(), code); + LOG.trace("Fetch fetchKey={} status code {}", get.getURI(), code); if (code < 200 || code > 299) { throw new IOException("bad status code: " + code + " :: " + responseToString(response)); } @@ -307,8 +307,7 @@ public class HttpFetcher extends AbstractTikaExtension implements Fetcher, Range .contains("Premature " + "end of " + "Content-Length delimited message")) { //one trigger for this is if the server sends the uncompressed length //and then compresses the stream. See HTTPCLIENT-2176 - LOG.warn("premature end of content-length delimited message; retrying with " + "content compression" + - " disabled for {}", get.getURI()); + LOG.warn("premature end of content-length delimited message; retrying with content compression disabled"); return execute(get, metadata, noCompressHttpClient, false); } throw e; diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java index 86ea09edee..0d9dece048 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java @@ -217,11 +217,11 @@ public class JDBCEmitter extends AbstractEmitter implements Closeable { } else { insertAll(emitKey, metadataList); } - if (LOGGER.isDebugEnabled()) { + if (LOGGER.isTraceEnabled()) { long start = System.currentTimeMillis(); insertStatement.executeBatch(); connection.commit(); - LOGGER.debug("took {}ms to insert row for key: {}", System.currentTimeMillis() - start, emitKey); + LOGGER.trace("took {}ms to insert row for key: {}", System.currentTimeMillis() - start, emitKey); } else { insertStatement.executeBatch(); connection.commit(); diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/iterator/jdbc/JDBCPipesIterator.java b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/iterator/jdbc/JDBCPipesIterator.java index be0fccfdfa..d638c30347 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/iterator/jdbc/JDBCPipesIterator.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/iterator/jdbc/JDBCPipesIterator.java @@ -139,11 +139,11 @@ public class JDBCPipesIterator extends PipesIteratorBase { try { processRow(fetcherId, emitterId, headers, fetchEmitKeyIndices, rs); } catch (SQLException e) { - LOGGER.warn("Failed to insert: " + rs, e); + LOGGER.warn("Failed to insert row", e); } rowCount++; if (rowCount % 1000 == 0) { - LOGGER.info("added " + rowCount + " rows to the queue"); + LOGGER.debug("added " + rowCount + " rows to the queue"); } } } @@ -190,7 +190,10 @@ public class JDBCPipesIterator extends PipesIteratorBase { if (i == fetchEmitKeyIndices.fetchKeyIndex) { fetchKey = getString(i, rs); if (StringUtils.isBlank(fetchKey)) { - LOGGER.debug("fetchKey is empty for record " + toString(rs)); + LOGGER.debug("fetchKey is empty for a record (enable TRACE on this class for row contents)"); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("fetchKey is empty for record {}", toString(rs)); + } } fetchKey = (fetchKey == null) ? "" : fetchKey; isUsed = true; @@ -198,7 +201,10 @@ public class JDBCPipesIterator extends PipesIteratorBase { if (i == fetchEmitKeyIndices.emitKeyIndex) { emitKey = getString(i, rs); if (StringUtils.isBlank(emitKey)) { - LOGGER.debug("emitKey is empty for record " + toString(rs)); + LOGGER.debug("emitKey is empty for a record (enable TRACE on this class for row contents)"); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("emitKey is empty for record {}", toString(rs)); + } } emitKey = (emitKey == null) ? "" : emitKey; isUsed = true; @@ -206,7 +212,10 @@ public class JDBCPipesIterator extends PipesIteratorBase { if (i == fetchEmitKeyIndices.idIndex) { id = getString(i, rs); if (StringUtils.isBlank(id)) { - LOGGER.warn("id is empty for record " + toString(rs)); + LOGGER.warn("id is empty for a record (enable TRACE on this class for row contents)"); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("id is empty for record {}", toString(rs)); + } } id = (id == null) ? "" : id; isUsed = true; diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java index 8ed44719a0..bf7ceecb26 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java @@ -64,7 +64,7 @@ public class JsonPipesIterator extends PipesIteratorBase { while (line != null) { try (Reader r = new StringReader(line)) { FetchEmitTuple t = JsonFetchEmitTuple.fromJson(r); - LOGGER.info("from json: " + t); + LOGGER.debug("from json: id={}", t.getId()); tryToAdd(t); line = reader.readLine(); } diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/main/java/org/apache/tika/pipes/emitter/kafka/KafkaEmitter.java b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/main/java/org/apache/tika/pipes/emitter/kafka/KafkaEmitter.java index 9d9b3a00c9..7807bad73f 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/main/java/org/apache/tika/pipes/emitter/kafka/KafkaEmitter.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/main/java/org/apache/tika/pipes/emitter/kafka/KafkaEmitter.java @@ -132,7 +132,7 @@ public class KafkaEmitter extends AbstractEmitter { throw new IOException("metadata list must not be null or of size 0"); } for (Metadata metadata : metadataList) { - LOGGER.debug("about to emit to target topic: ({}) path:({})", config.topic(), emitKey); + LOGGER.trace("about to emit to target topic: ({}) path:({})", config.topic(), emitKey); Map<String, Object> fields = new HashMap<>(); for (String n : metadata.names()) { diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchEmitter.java b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchEmitter.java index f7040b34c3..fa6b5a82ce 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchEmitter.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchEmitter.java @@ -66,7 +66,7 @@ public class OpenSearchEmitter extends AbstractEmitter { try { LOG.debug("about to emit {} docs", emitData.size()); openSearchClient.emitDocuments(emitData); - LOG.info("successfully emitted {} docs", emitData.size()); + LOG.debug("successfully emitted {} docs", emitData.size()); } catch (TikaClientException e) { LOG.warn("problem emitting docs", e); throw new IOException(e.getMessage(), e); @@ -81,9 +81,9 @@ public class OpenSearchEmitter extends AbstractEmitter { return; } try { - LOG.warn("about to emit one doc {}", metadataList.size()); + LOG.debug("about to emit one doc {}", metadataList.size()); openSearchClient.emitDocument(emitKey, metadataList); - LOG.info("successfully emitted one doc"); + LOG.debug("successfully emitted one doc"); } catch (TikaClientException e) { LOG.warn("problem emitting doc", e); throw new IOException("failed to add document", e); diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java index 35503aad7a..4d4ba92afb 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java @@ -163,12 +163,12 @@ public class S3Fetcher extends AbstractTikaExtension implements Fetcher, RangeFe String prefix = config.getPrefix(); String theFetchKey = StringUtils.isBlank(prefix) ? fetchKey : prefix + fetchKey; - if (LOGGER.isDebugEnabled()) { + if (LOGGER.isTraceEnabled()) { if (startRange > -1) { - LOGGER.debug("about to fetch fetchkey={} (start={} end={}) from bucket ({})", + LOGGER.trace("about to fetch fetchkey={} (start={} end={}) from bucket ({})", theFetchKey, startRange, endRange, config.getBucket()); } else { - LOGGER.debug("about to fetch fetchkey={} from bucket ({})", + LOGGER.trace("about to fetch fetchkey={} from bucket ({})", theFetchKey, config.getBucket()); } } diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/main/java/org/apache/tika/pipes/iterator/solr/SolrPipesIterator.java b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/main/java/org/apache/tika/pipes/iterator/solr/SolrPipesIterator.java index 993b4b6467..1cffca33f9 100644 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/main/java/org/apache/tika/pipes/iterator/solr/SolrPipesIterator.java +++ b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/main/java/org/apache/tika/pipes/iterator/solr/SolrPipesIterator.java @@ -160,7 +160,7 @@ public class SolrPipesIterator extends PipesIteratorBase { for (String nextField : allFields) { metadata.add(nextField, (String) sd.getFieldValue(nextField)); } - LOGGER.info("iterator doc: {}, idField={}, fetchKey={}", sd, config.getIdField(), fetchKey); + LOGGER.debug("iterator doc: idField={}", config.getIdField()); ParseContext parseContext = new ParseContext(); tryToAdd(new FetchEmitTuple(fetchKey, new FetchKey(fetcherId, fetchKey), new EmitKey(emitterId, emitKey), new Metadata(), parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT)); diff --git a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java index 0da685418e..2034c7cc58 100644 --- a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java +++ b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java @@ -142,12 +142,10 @@ public class TikaClientCLI { return 1l; } try { - LOGGER.debug("about to parse: {}", t.getFetchKey()); + LOGGER.debug("about to parse: id={}", t.getId()); client.parse(t); } catch (IOException | TikaException e) { - LOGGER.warn(t - .getFetchKey() - .toString(), e); + LOGGER.warn("parse failure id={}", t.getId(), e); } } } diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java index 60b675b879..4a2efcf909 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java @@ -127,14 +127,14 @@ public class AsyncResource { try { boolean offered = asyncProcessor.offer(request.getTuples(), maxQueuePauseMs); if (offered) { - LOG.info("accepted {} tuples, capacity={}", request + LOG.debug("accepted {} tuples, capacity={}", request .getTuples() .size(), asyncProcessor.getCapacity()); return ok(request .getTuples() .size()); } else { - LOG.info("throttling {} tuples, capacity={}", request + LOG.debug("throttling {} tuples, capacity={}", request .getTuples() .size(), asyncProcessor.getCapacity()); return throttle(request @@ -142,7 +142,7 @@ public class AsyncResource { .size()); } } catch (OfferLargerThanQueueSize e) { - LOG.info("throttling {} tuples, capacity={}", request + LOG.debug("throttling {} tuples, capacity={}", request .getTuples() .size(), asyncProcessor.getCapacity()); return throttle(request diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/DetectorResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/DetectorResource.java index 6c3d3e254a..cd34bff138 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/DetectorResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/DetectorResource.java @@ -55,7 +55,7 @@ public class DetectorResource { Metadata met = Metadata.newInstance(parseContext); String filename = TikaResource.detectFilename(httpHeaders.getRequestHeaders()); - LOG.info("Detecting media type for Filename: {}", filename); + LOG.debug("Detecting media type for Filename: {}", filename); met.add(TikaCoreProperties.RESOURCE_NAME_KEY, filename); long taskId = serverStatus.start(ServerStatus.TASK.DETECT, filename); diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java index 6b48d18d9f..4f14edbeba 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java @@ -81,7 +81,7 @@ public class LanguageResource { .loadModels() .detect(string); String detectedLang = toIso1(language.getLanguage()); - LOG.info("Detecting language for incoming resource: [{}]", detectedLang); + LOG.debug("Detecting language for incoming resource: [{}]", detectedLang); return detectedLang; } diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java index 6a7e5a5b2d..123a780e18 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java @@ -149,7 +149,7 @@ public class MetadataResource { defaultErrorResponse = Response.Status.NOT_FOUND; success = true; } catch (Exception e) { - LOG.info("Failed to process field {}", field, e); + LOG.warn("Failed to process field {}", field, e); } if (success == false || metadata.get(field) == null) { diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TranslateResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TranslateResource.java index e74b5ff287..d90a48d763 100644 --- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TranslateResource.java +++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TranslateResource.java @@ -91,11 +91,11 @@ public class TranslateResource { } private String doTranslate(String content, String translator, String sLang, String dLang) throws TikaException, IOException { - LOG.info("Using translator: [{}]: src: [{}]: dest: [{}]", translator, sLang, dLang); + LOG.debug("Using translator: [{}]: src: [{}]: dest: [{}]", translator, sLang, dLang); Translator translate = byClassName(translator); if (translate == null) { translate = this.defaultTranslator; - LOG.info("Using default translator"); + LOG.debug("Using default translator"); } long taskId = serverStatus.start(ServerStatus.TASK.TRANSLATE, null); try { @@ -117,7 +117,7 @@ public class TranslateResource { } String sLang = language.getLanguage(); - LOG.info("LanguageIdentifier: detected source lang: [{}]", sLang); + LOG.debug("LanguageIdentifier: detected source lang: [{}]", sLang); return doTranslate(content, translator, sLang, dLang); }
