This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new da5570826 [NUTCH-3160] Remove System.exit(..) from reusable code (#903)
da5570826 is described below
commit da557082662227cde52959652f406446496044da
Author: Luca <[email protected]>
AuthorDate: Fri Feb 27 00:25:16 2026 +0000
[NUTCH-3160] Remove System.exit(..) from reusable code (#903)
---
src/java/org/apache/nutch/metrics/NutchMetrics.java | 3 +++
.../apache/nutch/tools/CommonCrawlDataDumper.java | 8 +++++++-
.../nutch/tools/TestCommonCrawlDataDumper.java | 21 +++++++++++++++++++++
3 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/src/java/org/apache/nutch/metrics/NutchMetrics.java
b/src/java/org/apache/nutch/metrics/NutchMetrics.java
index c65a4f0ce..ef4fe79e5 100644
--- a/src/java/org/apache/nutch/metrics/NutchMetrics.java
+++ b/src/java/org/apache/nutch/metrics/NutchMetrics.java
@@ -82,6 +82,9 @@ public final class NutchMetrics {
/** Counter group for WARC export operations. */
public static final String GROUP_WARC_EXPORTER = "nutch_warc_exporter";
+ /** Counter group for Common Crawl data dumper tool. */
+ public static final String GROUP_COMMONCRAWL_DUMPER =
"nutch_commoncrawl_dumper";
+
/** Counter group for domain statistics operations. */
public static final String GROUP_DOMAIN_STATS = "nutch_domain_stats";
diff --git a/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
b/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
index d5d5035e8..8e37c21fc 100644
--- a/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
+++ b/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
@@ -67,6 +67,8 @@ import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.crawl.LinkDbReader;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.metadata.Nutch;
+import org.apache.nutch.metrics.ErrorTracker;
+import org.apache.nutch.metrics.NutchMetrics;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.DumpFileUtil;
import org.apache.nutch.util.NutchConfiguration;
@@ -188,6 +190,7 @@ public class CommonCrawlDataDumper extends NutchTool
implements Tool {
private GzipCompressorOutputStream gzipOutput = null;
private TarArchiveOutputStream tarOutput = null;
private ArrayList<String> fileList = null;
+ private ErrorTracker errorTracker;
/**
* Main method for invoking this tool
@@ -210,6 +213,7 @@ public class CommonCrawlDataDumper extends NutchTool
implements Tool {
* @param config A populated {@link CommonCrawlConfig}
*/
public CommonCrawlDataDumper(CommonCrawlConfig config) {
+ this();
this.config = config;
}
@@ -217,6 +221,7 @@ public class CommonCrawlDataDumper extends NutchTool
implements Tool {
* Constructor
*/
public CommonCrawlDataDumper() {
+ this.errorTracker = new
ErrorTracker(NutchMetrics.GROUP_COMMONCRAWL_DUMPER);
}
/**
@@ -274,7 +279,8 @@ public class CommonCrawlDataDumper extends NutchTool
implements Tool {
if (parts == null || parts.size() == 0) {
LOG.error( "No segment directories found in {} ",
segmentRootDir.getAbsolutePath());
- System.exit(1);
+ this.errorTracker.recordError(ErrorTracker.ErrorType.OTHER);
+ return;
}
LOG.info("Found {} segment parts", parts.size());
if (gzip && !warc) {
diff --git a/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
b/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
index fee72b65a..8124fe20b 100644
--- a/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
+++ b/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
@@ -17,14 +17,19 @@
package org.apache.nutch.tools;
import java.io.File;
+import java.lang.reflect.Field;
import java.nio.file.Files;
import java.util.Collection;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.nutch.metrics.ErrorTracker;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
/**
*
@@ -106,6 +111,22 @@ public class TestCommonCrawlDataDumper {
}
+ @Test
+ public void testDumpWithNoSegmentDirectoriesRecordsOtherError() throws
Exception {
+ File emptySegmentDir =
Files.createTempDirectory("empty-segments").toFile();
+ File outputDir = Files.createTempDirectory("dump-output").toFile();
+
+ ErrorTracker mockErrorTracker = mock(ErrorTracker.class);
+ CommonCrawlDataDumper dumper = new CommonCrawlDataDumper();
+ Field errorTrackerField =
CommonCrawlDataDumper.class.getDeclaredField("errorTracker");
+ errorTrackerField.setAccessible(true);
+ errorTrackerField.set(dumper, mockErrorTracker);
+
+ dumper.dump(outputDir, emptySegmentDir, null, false, null, false, "",
false);
+
+ verify(mockErrorTracker,
times(1)).recordError(ErrorTracker.ErrorType.OTHER);
+ }
+
private boolean hasFile(String fileName, Collection<File> files) {
for (File f : files) {
if (f.getName().equals(fileName)) {