This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 2468e43b1 TIKA-3799 -- Refactor FuzzingCLI to use PipesParser
2468e43b1 is described below
commit 2468e43b1eed19409bfeb8749b02e0d0350d872b
Author: tallison <[email protected]>
AuthorDate: Wed Jun 22 16:14:50 2022 -0400
TIKA-3799 -- Refactor FuzzingCLI to use PipesParser
---
CHANGES.txt | 2 +
tika-fuzzing/pom.xml | 11 +-
.../org/apache/tika/fuzzing/cli/FuzzingCLI.java | 325 ++++++++++++---------
.../apache/tika/fuzzing/cli/FuzzingCLIConfig.java | 97 +++---
.../tika/fuzzing/general/GeneralTransformer.java | 1 +
tika-fuzzing/src/main/resources/log4j2.xml | 6 +
.../test/resources/configs/tika-fuzzing-config.xml | 69 +++++
tika-fuzzing/src/test/resources/log4j2.xml | 14 +-
8 files changed, 319 insertions(+), 206 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 76fa225c0..11f170b1b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
Release 2.4.2 - ???
+ * Refactor FuzzingCLI to use PipesParser (TIKA-3799).
+
* ServiceLoader's loadServiceProviders() now guarantees
unique classes (TIKA-3797).
diff --git a/tika-fuzzing/pom.xml b/tika-fuzzing/pom.xml
index 6cd5d0a0a..835d4e125 100644
--- a/tika-fuzzing/pom.xml
+++ b/tika-fuzzing/pom.xml
@@ -37,7 +37,16 @@
<groupId>${project.groupId}</groupId>
<artifactId>tika-core</artifactId>
<version>${project.version}</version>
- <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-serialization</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-emitter-fs</artifactId>
+ <version>${project.version}</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
diff --git
a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
index 10453e6be..53cb22b40 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
@@ -17,12 +17,12 @@
package org.apache.tika.fuzzing.cli;
import java.io.IOException;
-import java.nio.file.FileVisitResult;
-import java.nio.file.FileVisitor;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.attribute.BasicFileAttributes;
+import java.util.Locale;
+import java.util.UUID;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
@@ -33,205 +33,250 @@ import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.tika.utils.ProcessUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.fuzzing.general.ByteDeleter;
+import org.apache.tika.fuzzing.general.ByteFlipper;
+import org.apache.tika.fuzzing.general.ByteInjector;
+import org.apache.tika.fuzzing.general.GeneralTransformer;
+import org.apache.tika.fuzzing.general.SpanSwapper;
+import org.apache.tika.fuzzing.general.Truncator;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.FetchEmitTuple;
+import org.apache.tika.pipes.PipesConfig;
+import org.apache.tika.pipes.PipesParser;
+import org.apache.tika.pipes.PipesResult;
+import org.apache.tika.pipes.emitter.EmitKey;
+import org.apache.tika.pipes.fetcher.FetchKey;
+import org.apache.tika.pipes.fetcher.FetcherManager;
+import org.apache.tika.pipes.pipesiterator.PipesIterator;
public class FuzzingCLI {
- private static final Logger LOG =
LoggerFactory.getLogger(FuzzingCLI.class);
-
- private static final Path POISON = Paths.get("");
- private final int maxFiles = -1;
+ private static final Logger LOG =
LoggerFactory.getLogger(FuzzingCLI.class);
+ private static final String TEMP_FETCHER_NAME = "temp";
+ private static final String TEMP_EMITTER_NAME = "temp";
public static void main(String[] args) throws Exception {
FuzzingCLIConfig config = FuzzingCLIConfig.parse(args);
if (config.getMaxTransformers() == 0) {
LOG.warn("max transformers == 0!");
}
- if (!Files.isDirectory(config.inputDir)) {
- throw new IllegalArgumentException("input directory doesn't exist:
" + config.inputDir);
- }
+
FuzzingCLI fuzzingCLI = new FuzzingCLI();
- Files.createDirectories(config.getOutputDirectory());
+ Files.createDirectories(config.getProblemsDirectory());
fuzzingCLI.execute(config);
}
- private void execute(FuzzingCLIConfig config) {
- ArrayBlockingQueue<Path> q = new ArrayBlockingQueue(10000);
- ExecutorService executorService =
Executors.newFixedThreadPool(config.getNumThreads() + 1);
+
+ private void execute(FuzzingCLIConfig config) throws Exception {
+ ArrayBlockingQueue<FetchEmitTuple> q = new ArrayBlockingQueue(10000);
+
+ PipesConfig pipesConfig = PipesConfig.load(config.getTikaConfig());
+ FetcherManager fetcherManager =
FetcherManager.load(config.getTikaConfig());
+
+ int totalThreads = pipesConfig.getNumClients() + 1;
+
+ ExecutorService executorService =
Executors.newFixedThreadPool(totalThreads);
ExecutorCompletionService executorCompletionService =
new ExecutorCompletionService(executorService);
- FileAdder fileAdder = new FileAdder(config.getInputDirectory(),
config.getNumThreads(), q);
+ PipesIterator pipesIterator =
PipesIterator.build(config.getTikaConfig());
+
+ FileAdder fileAdder = new FileAdder(pipesIterator, q);
executorCompletionService.submit(fileAdder);
- for (int i = 0; i < config.numThreads; i++) {
- executorCompletionService.submit(new Fuzzer(q, config));
- }
- int finished = 0;
- while (finished < config.getNumThreads() + 1) {
- Future<Integer> future = null;
- try {
- future = executorCompletionService.poll(1, TimeUnit.SECONDS);
- if (future != null) {
- future.get();
- finished++;
+ try (PipesParser parser = new PipesParser(pipesConfig)) {
+
+ for (int i = 0; i < pipesConfig.getNumClients(); i++) {
+ executorCompletionService.submit(new Fuzzer(q, config, parser,
fetcherManager));
+ }
+ int finished = 0;
+ while (finished < totalThreads) {
+ Future<Integer> future = null;
+ try {
+ future = executorCompletionService.poll(1,
TimeUnit.SECONDS);
+ if (future != null) {
+ future.get();
+ finished++;
+ }
+ LOG.info("Finished thread {} threads of {}", finished,
totalThreads);
+ } catch (InterruptedException | ExecutionException e) {
+ e.printStackTrace();
+ break;
}
- } catch (InterruptedException | ExecutionException e) {
- e.printStackTrace();
- break;
}
+ executorService.shutdown();
+ executorService.shutdownNow();
}
- executorService.shutdownNow();
+
}
private static class Fuzzer implements Callable<Integer> {
static AtomicInteger COUNTER = new AtomicInteger();
+ static AtomicInteger FUZZED = new AtomicInteger();
+ static AtomicInteger SOURCE_FILES = new AtomicInteger();
private final int threadId = COUNTER.getAndIncrement();
- private final ArrayBlockingQueue<Path> q;
+ private final ArrayBlockingQueue<FetchEmitTuple> q;
private final FuzzingCLIConfig config;
- public Fuzzer(ArrayBlockingQueue<Path> q, FuzzingCLIConfig config) {
+ private final PipesParser pipesParser;
+
+ private final Transformer transformer;
+
+ private final FetcherManager fetcherManager;
+
+ public Fuzzer(ArrayBlockingQueue<FetchEmitTuple> q, FuzzingCLIConfig
config,
+ PipesParser pipesParser, FetcherManager fetcherManager) {
this.q = q;
this.config = config;
+ this.pipesParser = pipesParser;
+ //TODO - parameterize this
+ this.transformer =
+ new GeneralTransformer(config.getMaxTransformers(), new
ByteDeleter(),
+ new ByteFlipper(), new ByteInjector(), new
Truncator(),
+ new SpanSwapper());
+ this.fetcherManager = fetcherManager;
}
@Override
public Integer call() throws Exception {
while (true) {
- Path p = q.take();
- if (p.equals(POISON)) {
+ FetchEmitTuple fetchEmitTuple = q.take();
+ if (fetchEmitTuple.equals(PipesIterator.COMPLETED_SEMAPHORE)) {
LOG.debug("Thread " + threadId + " stopping");
+ q.put(PipesIterator.COMPLETED_SEMAPHORE);
return 1;
}
- boolean success = false;
- int tries = 0;
- while (!success && tries < config.getRetries()) {
- if (tries > 0) {
- LOG.warn("Retrying (" + tries + ") " + p);
+ int inputFiles = SOURCE_FILES.getAndIncrement();
+ if (inputFiles % 100 == 0) {
+ LOG.info("Processed {} source files", inputFiles);
+ }
+ for (int i = 0; i < config.perFileIterations; i++) {
+ try {
+ fuzzIt(fetchEmitTuple);
+ } catch (InterruptedException e) {
+ throw e;
+ } catch (Exception e) {
+ LOG.warn("serious problem with", e);
}
- success = fuzzIt(config, p, tries);
- tries++;
}
}
}
- private boolean fuzzIt(FuzzingCLIConfig config, Path p, int retryId) {
- //the target files should be flattened so that
- //problematic files are all in one directory...may rethink this
option later
- Path target = config.getOutputDirectory().resolve(p.getFileName());
- String cp = System.getProperty("java.class.path");
-
- String[] args =
- new String[]{"java", "-XX:-OmitStackTraceInFastThrow",
"-Xmx" + config.xmx,
- "-ea", "-cp", ProcessUtils.escapeCommandLine(cp),
- "org.apache.tika.fuzzing.cli.FuzzOne", "-i",
-
ProcessUtils.escapeCommandLine(p.toAbsolutePath().toString()), "-o",
-
ProcessUtils.escapeCommandLine(target.toAbsolutePath().toString()),
- "-p",
Integer.toString(config.getPerFileIterations()), "-t",
- Integer.toString(config.getMaxTransformers()),
"-n",
- Integer.toString(threadId), "-r",
Integer.toString(retryId), "-m",
- Long.toString(config.getTimeoutMs())};
- ProcessBuilder pb = new ProcessBuilder(args);
- pb.inheritIO();
- Process process = null;
- boolean success = false;
- try {
- process = pb.start();
- } catch (IOException e) {
- LOG.warn("problem starting process", e);
- }
+ private void fuzzIt(FetchEmitTuple fetchEmitTuple)
+ throws IOException, InterruptedException, TikaException {
+ Path cwd = Files.createTempDirectory("tika-fuzz-");
try {
- long totalTime = 2 * config.getTimeoutMs() *
config.getPerFileIterations();
- success = process.waitFor(totalTime, TimeUnit.MILLISECONDS);
- } catch (InterruptedException e) {
- LOG.warn("problem waiting for process to finish", e);
- } finally {
- if (process.isAlive()) {
- LOG.warn("process still alive for " +
target.toAbsolutePath());
- process.destroyForcibly();
+ Path fuzzedPath = fuzz(fetchEmitTuple, cwd);
+ Path extract = Files.createTempFile(cwd, "tika-extract-",
".json");
+ FetchEmitTuple fuzzedTuple = new
FetchEmitTuple(fetchEmitTuple.getId(),
+ new FetchKey(TEMP_FETCHER_NAME,
fuzzedPath.toAbsolutePath().toString()),
+ new EmitKey(TEMP_EMITTER_NAME,
extract.toAbsolutePath().toString()));
+ int count = FUZZED.getAndIncrement();
+ if (count % 100 == 0) {
+ LOG.info("processed {} fuzzed files", count);
}
- try {
- int exitValue = process.exitValue();
- if (exitValue != 0) {
- success = false;
- LOG.warn("bad exit value for " +
target.toAbsolutePath());
+ boolean tryAgain = true;
+ int tries = 0;
+ while (tryAgain && tries < config.getRetries()) {
+ tries++;
+ try {
+ PipesResult result = pipesParser.parse(fuzzedTuple);
+ tryAgain = handleResult(result.getStatus(),
+ fetchEmitTuple.getFetchKey().getFetchKey(),
fuzzedPath, tries,
+ config.getRetries());
+ } catch (InterruptedException e) {
+ throw e;
+ } catch (Exception e) {
+ tryAgain =
handleResult(PipesResult.STATUS.UNSPECIFIED_CRASH,
+ fetchEmitTuple.getFetchKey().getFetchKey(),
fuzzedPath, tries,
+ config.getRetries());
}
- } catch (IllegalThreadStateException e) {
- success = false;
- LOG.warn("not exited");
- process.destroyForcibly();
+ }
+ } finally {
+ try {
+ FileUtils.deleteDirectory(cwd.toFile());
+ } catch (IOException e) {
+ e.printStackTrace();
+ LOG.warn("Couldn't delete " + cwd.toAbsolutePath(), e);
+ }
+ }
+ }
+
+ private Path fuzz(FetchEmitTuple fetchEmitTuple, Path cwd)
+ throws IOException, TikaException {
+ Path target = Files.createTempFile(cwd, "tika-fuzz-target-",
+ "." +
FilenameUtils.getExtension(fetchEmitTuple.getFetchKey().getFetchKey()));
+ try (InputStream is = fetcherManager.getFetcher(
+ fetchEmitTuple.getFetchKey().getFetcherName())
+ .fetch(fetchEmitTuple.getFetchKey().getFetchKey(), new
Metadata())) {
+ try (OutputStream os = Files.newOutputStream(target)) {
+ transformer.transform(is, os);
}
}
- return success;
+ return target;
+ }
+
+ private boolean handleResult(PipesResult.STATUS status, String
origFetchKey,
+ Path fuzzedPath, int tries, int
maxRetries)
+ throws IOException {
+ switch (status) {
+ case OOM:
+ case TIMEOUT:
+ case UNSPECIFIED_CRASH:
+ if (tries < maxRetries) {
+ LOG.info("trying again ({} of {}) {} : {}", tries,
maxRetries,
+ status.name());
+ return true;
+ }
+ Path problemFilePath = getProblemFile(status,
origFetchKey);
+ LOG.info("found a problem {} -> {} : {}", origFetchKey,
problemFilePath,
+ status.name());
+ Files.copy(fuzzedPath, problemFilePath);
+ return false;
+ default:
+ //if there wasn't a problem
+ return false;
+ }
+ }
+
+ private Path getProblemFile(PipesResult.STATUS status, String
origFetchKey)
+ throws IOException {
+ String name = FilenameUtils.getName(origFetchKey) + "-" +
UUID.randomUUID();
+ Path problemFile =
+
config.getProblemsDirectory().resolve(status.name().toLowerCase(Locale.US))
+ .resolve(name);
+ Files.createDirectories(problemFile.getParent());
+ return problemFile;
}
}
private class FileAdder implements Callable<Integer> {
- private final Path inputDir;
- private final int numThreads;
- private final ArrayBlockingQueue<Path> queue;
+ private final PipesIterator pipesIterator;
+ private final ArrayBlockingQueue<FetchEmitTuple> queue;
private int added = 0;
- public FileAdder(Path inputDirectory, int numThreads,
ArrayBlockingQueue<Path> queue) {
- this.inputDir = inputDirectory;
- this.numThreads = numThreads;
+ public FileAdder(PipesIterator pipesIterator,
ArrayBlockingQueue<FetchEmitTuple> queue) {
+ this.pipesIterator = pipesIterator;
this.queue = queue;
}
@Override
public Integer call() throws Exception {
- Files.walkFileTree(inputDir, new DirWalker());
- for (int i = 0; i < numThreads; i++) {
- queue.add(POISON);
+ int added = 0;
+ for (FetchEmitTuple tuple : pipesIterator) {
+ //hang forever -- should offer and timeout
+ queue.put(tuple);
+ added++;
}
+ queue.put(PipesIterator.COMPLETED_SEMAPHORE);
+ LOG.info("file adder finished " + added);
return 1;
}
-
- private class DirWalker implements FileVisitor<Path> {
-
- @Override
- public FileVisitResult preVisitDirectory(Path dir,
BasicFileAttributes attrs)
- throws IOException {
- return FileVisitResult.CONTINUE;
- }
-
- @Override
- public FileVisitResult visitFile(Path file, BasicFileAttributes
attrs)
- throws IOException {
- if (maxFiles > -1 && added >= maxFiles) {
- LOG.info("hit maxfiles; file crawler is stopping early");
- return FileVisitResult.TERMINATE;
- }
- if (!file.getFileName().toString().contains("sas7bdat")) {
- return FileVisitResult.CONTINUE;
- }
- try {
- boolean offered = queue.offer(file, 10, TimeUnit.MINUTES);
- if (offered) {
- added++;
- return FileVisitResult.CONTINUE;
- } else {
- LOG.error("couldn't add a file after 10 minutes!");
- return FileVisitResult.TERMINATE;
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- return FileVisitResult.TERMINATE;
- }
- }
-
- @Override
- public FileVisitResult visitFileFailed(Path file, IOException exc)
throws IOException {
- return FileVisitResult.CONTINUE;
- }
-
- @Override
- public FileVisitResult postVisitDirectory(Path dir, IOException
exc)
- throws IOException {
- return FileVisitResult.CONTINUE;
- }
- }
}
}
diff --git
a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
index c741616d8..1a58b72a5 100644
---
a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
+++
b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
@@ -28,105 +28,76 @@ import org.apache.commons.cli.ParseException;
public class FuzzingCLIConfig {
- private static final int DEFAULT_NUM_THREADS = 4;
- private static final int DEFAULT_NUM_ITERATIONS = 1000;
- //allow all transformers to operate
- private static final int DEFAULT_MAX_TRANSFORMERS = -1;
-
- private static final long DEFAULT_TIMEOUT_MS = 120000;
+ private static final int DEFAULT_NUM_ITERATIONS = 100;
- private static final int DEFAULT_RETRIES = 2;
+ //allow all transformers to operate
+ private static final int DEFAULT_MAX_TRANSFORMERS = 1;
- private static final String DEFAULT_XMX = "512m";
+ private static final int DEFAULT_RETRIES = 1;
static Options OPTIONS;
static {
- //By the time this commandline is parsed, there should be both an
extracts and an inputDir
- Option extracts = new Option("extracts", true, "directory for extract
files");
- extracts.setRequired(true);
-
-
- OPTIONS = new Options().addOption(
- Option.builder("i").longOpt("inputDir").desc("input directory
for seed files")
- .hasArg(true).required(true).build()).addOption(
- Option.builder("o").longOpt("outputDir")
- .desc("output directory for files that triggered
problems").hasArg(true)
- .required(true).build()).addOption(
- Option.builder("n").longOpt("numThreads").desc("number of
threads").hasArg(true)
-
.required(false).build()).addOption(Option.builder("p").longOpt("perFile")
+ Option problems = new Option("o", "output", true, "directory for
problems files");
+ problems.setRequired(true);
+
+
+ OPTIONS = new Options().addOption(problems)
+ .addOption(Option.builder("c").longOpt("config").hasArg(true)
+ .desc("tika config " +
+ "file with " +
+ "specs for pipes parser, pipes iterator,
fetchers and emitters")
+ .required(true).build())
+ .addOption(Option.builder("p").longOpt("perFile")
.desc("number of iterations to run per seed
file").hasArg(true).required(false)
-
.build()).addOption(Option.builder("t").longOpt("maxTransformers")
+ .build())
+ .addOption(Option.builder("t").longOpt("maxTransformers")
.desc("maximum number of transformers to run per
iteration").hasArg(true)
-
.required(false).build()).addOption(Option.builder("m").longOpt("timeoutMs")
- .desc("timeout in ms -- max time allowed to parse a
file").hasArg(true)
-
.required(false).build()).addOption(Option.builder("x").longOpt("xmx")
- .desc("e.g. 1G, max heap appended to -Xmx in the forked
process").hasArg(true)
-
.required(false).build()).addOption(Option.builder("r").longOpt("retries")
+ .required(false).build())
+ .addOption(Option.builder("r").longOpt("retries")
.desc("number of times to retry a seed file if there's a
catastrophic failure")
.hasArg(true).required(false).build());
}
-
- int numThreads;
//number of variants tried per file
- int perFileIterations;
+ int perFileIterations = DEFAULT_NUM_ITERATIONS;
//maxTransformers per file
- int maxTransformers;
+ int maxTransformers = DEFAULT_MAX_TRANSFORMERS;
//max time allowed to process each file in milliseconds
long timeoutMS;
//times to retry a seed file after a catastrophic failure
- int retries;
- //xmx for forked process, e.g. 512m or 1G
- String xmx;
- Path inputDir;
- Path outputDir;
+ int retries = DEFAULT_RETRIES;
+
+ Path tikaConfig;
+
+ Path problemsDir;
public static FuzzingCLIConfig parse(String[] args) throws ParseException {
CommandLineParser parser = new DefaultParser();
CommandLine commandLine = parser.parse(OPTIONS, args);
FuzzingCLIConfig config = new FuzzingCLIConfig();
- config.inputDir = Paths.get(commandLine.getOptionValue("i"));
- config.outputDir = Paths.get(commandLine.getOptionValue("o"));
- config.numThreads =
- (commandLine.hasOption("n")) ?
Integer.parseInt(commandLine.getOptionValue("n")) :
- DEFAULT_NUM_THREADS;
- config.perFileIterations =
- (commandLine.hasOption("p")) ?
Integer.parseInt(commandLine.getOptionValue("p")) :
- DEFAULT_NUM_ITERATIONS;
- config.maxTransformers =
- (commandLine.hasOption("t")) ?
Integer.parseInt(commandLine.getOptionValue("t")) :
- DEFAULT_MAX_TRANSFORMERS;
- config.timeoutMS =
- (commandLine.hasOption("m")) ?
Integer.parseInt(commandLine.getOptionValue("m")) :
- DEFAULT_TIMEOUT_MS;
+ config.tikaConfig = Paths.get(commandLine.getOptionValue("c"));
+ config.problemsDir = Paths.get(commandLine.getOptionValue("o"));
config.retries =
(commandLine.hasOption("r")) ?
Integer.parseInt(commandLine.getOptionValue("r")) :
DEFAULT_RETRIES;
- config.xmx = (commandLine.hasOption("x")) ?
commandLine.getOptionValue("x") : DEFAULT_XMX;
+ config.maxTransformers = (commandLine.hasOption("t")) ?
+ Integer.parseInt(commandLine.getOptionValue("t")) :
DEFAULT_MAX_TRANSFORMERS;
return config;
}
- public int getNumThreads() {
- return numThreads;
- }
-
- public Path getInputDirectory() {
- return inputDir;
+ public Path getProblemsDirectory() {
+ return problemsDir;
}
- public Path getOutputDirectory() {
- return outputDir;
+ public Path getTikaConfig() {
+ return tikaConfig;
}
public int getMaxTransformers() {
return maxTransformers;
}
- public long getTimeoutMs() {
- return timeoutMS;
- }
-
public int getPerFileIterations() {
return perFileIterations;
}
diff --git
a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
index 62f3fc940..20ca55ff3 100644
---
a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
+++
b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
@@ -70,6 +70,7 @@ public class GeneralTransformer implements Transformer {
public void transform(InputStream is, OutputStream os) throws IOException,
TikaException {
//used for debugging
if (maxTransforms == 0) {
+ IOUtils.copy(is, os);
return;
}
int transformerCount = (maxTransforms == 1) ? 1 : 1 +
random.nextInt(maxTransforms);
diff --git a/tika-fuzzing/src/main/resources/log4j2.xml
b/tika-fuzzing/src/main/resources/log4j2.xml
index 513b667a8..94ac22b3e 100644
--- a/tika-fuzzing/src/main/resources/log4j2.xml
+++ b/tika-fuzzing/src/main/resources/log4j2.xml
@@ -28,5 +28,11 @@
<Root level="info">
<AppenderRef ref="Console"/>
</Root>
+ <Logger name="org.apache.tika.pipes" level="error" additivity="false">
+ <AppenderRef ref="Console"/>
+ </Logger>
+ <Logger name="com.github.junrar" level="error" additivity="false">
+ <AppenderRef ref="Console"/>
+ </Logger>
</Loggers>
</Configuration>
diff --git a/tika-fuzzing/src/test/resources/configs/tika-fuzzing-config.xml
b/tika-fuzzing/src/test/resources/configs/tika-fuzzing-config.xml
new file mode 100644
index 000000000..4b255b06b
--- /dev/null
+++ b/tika-fuzzing/src/test/resources/configs/tika-fuzzing-config.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<!-- this is an example configuration file to run the fuzzer against
+ an input directory. Make sure to specify the input file directory
+ in the base paths. We need the "empty" fetchers and emitters to
+ handle the temp files that are created via fuzzing-->
+<properties>
+ <fetchers>
+ <fetcher
class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
+ <params>
+ <name>fsf</name>
+ <basePath>{FILL_IN_HERE}</basePath>
+ </params>
+ </fetcher>
+ <fetcher
class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
+ <params>
+ <name>temp</name>
+ </params>
+ </fetcher>
+ </fetchers>
+ <emitters>
+ <emitter
class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
+ <params>
+ <name>fse</name>
+ <basePath>{FILL_IN_HERE}</basePath>
+ </params>
+ </emitter>
+ <emitter
class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
+ <params>
+ <name>temp</name>
+ </params>
+ </emitter>
+ </emitters>
+ <pipesIterator
class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator">
+ <params>
+ <basePath>{FILL_IN_HERE}</basePath>
+ <fetcherName>fsf</fetcherName>
+ <emitterName>fse</emitterName>
+ </params>
+ </pipesIterator>
+ <pipes>
+ <params>
+ <numClients>5</numClients>
+ <forkedJvmArgs>
+ <arg>-Xmx1g</arg>
+ <arg>-XX:ParallelGCThreads=2</arg>
+
<arg>-Dlog4j.configurationFile={FILL_IN_HERE}</arg>
+ </forkedJvmArgs>
+ <timeoutMillis>10000</timeoutMillis>
+ </params>
+ </pipes>
+</properties>
\ No newline at end of file
diff --git a/tika-fuzzing/src/test/resources/log4j2.xml
b/tika-fuzzing/src/test/resources/log4j2.xml
index 611f36d37..eaeca677e 100644
--- a/tika-fuzzing/src/test/resources/log4j2.xml
+++ b/tika-fuzzing/src/test/resources/log4j2.xml
@@ -25,8 +25,18 @@
</Console>
</Appenders>
<Loggers>
- <Root level="debug">
+ <Root level="info">
<AppenderRef ref="Console"/>
</Root>
+ <Logger name="org.apache.tika.pipes" level="error" additivity="false">
+ <AppenderRef ref="Console"/>
+ </Logger>
+ <Logger name="com.github.junrar" level="error" additivity="false">
+ <AppenderRef ref="Console"/>
+ </Logger>
+ <Logger name="org.apache.pdfbox" level="fatal" additivity="false">
+ <AppenderRef ref="Console"/>
+ </Logger>
+
</Loggers>
-</Configuration>
+</Configuration>
\ No newline at end of file