This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4034 in repository https://gitbox.apache.org/repos/asf/tika.git
commit d4d6333885638c0e1fbcb75342929c0801abdb1e Author: tallison <talli...@apache.org> AuthorDate: Wed May 10 10:19:58 2023 -0400 TIKA-4034 -- enable configuration of pretty print in FileSystemEmitter --- CHANGES.txt | 2 ++ .../test/java/org/apache/tika/cli/TikaCLITest.java | 22 ++++++++++++++++++++-- .../tika/pipes/emitter/fs/FileSystemEmitter.java | 12 +++++++++++- .../metadata/serialization/JsonMetadataList.java | 18 +++++++++++++++--- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index cfe44d844..4692e6b2c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Release 2.8.1 - ???? + * Allow pretty printing in FileSystemEmitter (TIKA-4034). + Release 2.8.0 - 5/9/2023 diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java index 03544163d..170ccb500 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java @@ -30,6 +30,7 @@ import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; +import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -71,8 +72,8 @@ public class TikaCLITest { "</basePath>" + "</fetcher>" + "</fetchers>" + "<emitters>" + "<emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\">" + "<name>fse</name>" + "<basePath>" + ASYNC_OUTPUT_DIR.toAbsolutePath() + - "</basePath>" + "</emitter>" + "</emitters>" + "<pipesIterator " + - "class=\"org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator\">" + + "</basePath>" + "<prettyPrint>true</prettyPrint>" + "</emitter>" + "</emitters>" + + "<pipesIterator class=\"org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator\">" + "<basePath>" + TEST_DATA_FILE.getAbsolutePath() + "</basePath>" + "<fetcherName>fsf</fetcherName>" + "<emitterName>fse</emitterName>" + "</pipesIterator>" + "</properties>"; @@ -595,12 +596,29 @@ public class TikaCLITest { int json = 0; for (File f : ASYNC_OUTPUT_DIR.toFile().listFiles()) { if (f.getName().endsWith(".json")) { + //check first file for pretty print + if (json == 0) { + checkForPrettyPrint(f); + } json++; } } assertEquals(17, json); } + private void checkForPrettyPrint(File f) throws IOException { + String json = FileUtils.readFileToString(f, UTF_8); + int previous = json.indexOf("Content-Length"); + assertTrue(previous > -1); + for (String k : new String[]{"Content-Type", "dc:creator", + "dcterms:created", "dcterms:modified", "X-TIKA:content\""}) { + int i = json.indexOf(k); + assertTrue( i > -1, "should have found " + k); + assertTrue(i > previous, "bad order: " + k + " at " + i + " not less than " + previous); + previous = i; + } + } + /** * reset outContent and errContent if they are not empty diff --git a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java index 47c517721..a90c5e509 100644 --- a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java +++ b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java @@ -56,6 +56,9 @@ import org.apache.tika.pipes.emitter.TikaEmitterException; * options ('skip', 'replace', 'exception') * default is 'exception' --> * <param name="onExists" type="string">skip</param> + * <!-- optional; whether or not to pretty print the output + * default is false --> + * <param name="prettyPrint" type="boolean">true</param> * </params> * </emitter> * </emitters> @@ -67,6 +70,8 @@ public class FileSystemEmitter extends AbstractEmitter implements StreamEmitter private String fileExtension = "json"; private ON_EXISTS onExists = ON_EXISTS.EXCEPTION; + private boolean prettyPrint = false; + @Override public void emit(String emitKey, List<Metadata> metadataList) throws IOException, TikaEmitterException { @@ -88,7 +93,7 @@ public class FileSystemEmitter extends AbstractEmitter implements StreamEmitter Files.createDirectories(output.getParent()); } try (Writer writer = Files.newBufferedWriter(output, StandardCharsets.UTF_8)) { - JsonMetadataList.toJson(metadataList, writer); + JsonMetadataList.toJson(metadataList, writer, prettyPrint); } } @@ -133,6 +138,11 @@ public class FileSystemEmitter extends AbstractEmitter implements StreamEmitter } } + @Field + public void setPrettyPrint(boolean prettyPrint) { + this.prettyPrint = prettyPrint; + } + @Override public void emit(String path, InputStream inputStream, Metadata userMetadata) throws IOException, TikaEmitterException { diff --git a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java index 1b8968dbf..e008e0564 100644 --- a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java +++ b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonMetadataList.java @@ -40,26 +40,38 @@ public class JsonMetadataList { * * @param metadataList list of metadata to write * @param writer writer + * @param prettyPrint whether or not to pretty print the output * @throws org.apache.tika.exception.TikaException if there is an IOException during writing */ - public static void toJson(List<Metadata> metadataList, Writer writer) throws IOException { + public static void toJson(List<Metadata> metadataList, Writer writer, boolean prettyPrint) throws IOException { if (metadataList == null) { writer.write("null"); return; } try (JsonGenerator jsonGenerator = new JsonFactory() .createGenerator(new CloseShieldWriter(writer))) { - if (PRETTY_PRINT) { + if (prettyPrint) { jsonGenerator.useDefaultPrettyPrinter(); } jsonGenerator.writeStartArray(); for (Metadata m : metadataList) { - JsonMetadata.writeMetadataObject(m, jsonGenerator, PRETTY_PRINT); + JsonMetadata.writeMetadataObject(m, jsonGenerator, prettyPrint); } jsonGenerator.writeEndArray(); } } + /** + * Serializes a Metadata object to Json. This does not flush or close the writer. + * + * @param metadataList list of metadata to write + * @param writer writer + * @throws org.apache.tika.exception.TikaException if there is an IOException during writing + */ + public static void toJson(List<Metadata> metadataList, Writer writer) throws IOException { + toJson(metadataList, writer, PRETTY_PRINT); + } + /** * Read metadata from reader. This does not close the reader *