[
https://issues.apache.org/jira/browse/TIKA-4704?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18071452#comment-18071452
]
ASF GitHub Bot commented on TIKA-4704:
--------------------------------------
Copilot commented on code in PR #2743:
URL: https://github.com/apache/tika/pull/2743#discussion_r3039981543
##########
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/UnpackModeTest.java:
##########
@@ -63,202 +63,203 @@ private PipesClient init(Path tmp, String testFileName)
throws Exception {
@Test
public void testUnpackModeBasic(@TempDir Path tmp) throws Exception {
// Test that UNPACK mode works and returns metadata like RMETA
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(), "UNPACK mode should succeed.
Status: " + pipesResult.status() +
- ", Message: " + pipesResult.message());
-
- // UNPACK mode may return EMIT_SUCCESS (without emitData) if passback
filter is not used
- // Check if we have emitData, otherwise just verify success
- if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
- // With RMETA-like behavior, we should get metadata for container
+ embedded docs
- // mock-embedded.xml has 4 embedded documents, so we expect 5
metadata objects
- List<Metadata> metadataList =
pipesResult.emitData().getMetadataList();
- assertEquals(5, metadataList.size(),
- "UNPACK should return RMETA-style metadata list (container
+ 4 embedded docs)");
-
- // Verify container metadata
- assertEquals("Nikolai Lobachevsky",
metadataList.get(0).get("author"));
-
- // Verify embedded metadata
- for (int i = 1; i < metadataList.size(); i++) {
- assertEquals("embeddedAuthor",
metadataList.get(i).get("author"),
- "Embedded document " + i + " should have embedded
author");
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(), "UNPACK mode should succeed.
Status: " + pipesResult.status() +
+ ", Message: " + pipesResult.message());
+
+ // UNPACK mode may return EMIT_SUCCESS (without emitData) if
passback filter is not used
+ // Check if we have emitData, otherwise just verify success
+ if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
+ // With RMETA-like behavior, we should get metadata for
container + embedded docs
+ // mock-embedded.xml has 4 embedded documents, so we expect 5
metadata objects
+ List<Metadata> metadataList =
pipesResult.emitData().getMetadataList();
+ assertEquals(5, metadataList.size(),
+ "UNPACK should return RMETA-style metadata list
(container + 4 embedded docs)");
+
+ // Verify container metadata
+ assertEquals("Nikolai Lobachevsky",
metadataList.get(0).get("author"));
+
+ // Verify embedded metadata
+ for (int i = 1; i < metadataList.size(); i++) {
+ assertEquals("embeddedAuthor",
metadataList.get(i).get("author"),
+ "Embedded document " + i + " should have embedded
author");
+ }
}
+ // Even without emitData passback, the fact that isSuccess() is
true means UNPACK worked
}
- // Even without emitData passback, the fact that isSuccess() is true
means UNPACK worked
}
@Test
public void testUnpackModeAutoSetup(@TempDir Path tmp) throws Exception {
// Test that UNPACK mode works without explicit UnpackConfig
- // It should automatically set up UnpackExtractor and
EmittingUnpackHandler
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
- // No UnpackConfig set - should be created automatically
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK should work without explicit UnpackConfig. Status: " +
pipesResult.status() +
- ", Message: " + pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ // It should automatically set up UnpackExtractor and
EmittingUnpackHandler
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+ // No UnpackConfig set - should be created automatically
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK should work without explicit UnpackConfig. Status:
" + pipesResult.status() +
+ ", Message: " + pipesResult.message());
+ }
}
@Test
- public void testUnpackModeRequiresEmitter(@TempDir Path tmp) throws
Exception {
+ public void testUnpackModeRequiresEmitter(@TempDir Path tmp) throws
Exception
+ {
Review Comment:
`testUnpackModeRequiresEmitter` uses a brace-on-next-line style for the
method body, which is inconsistent with the rest of this class and the
project's usual K&R formatting. This can also cause formatting/lint tools
(e.g., Spotless) to fail. Please move the opening `{` onto the same line as the
method signature.
```suggestion
public void testUnpackModeRequiresEmitter(@TempDir Path tmp) throws
Exception {
```
> Avoid remaining temp files
> --------------------------
>
> Key: TIKA-4704
> URL: https://issues.apache.org/jira/browse/TIKA-4704
> Project: Tika
> Issue Type: Task
> Affects Versions: 3.3.0
> Reporter: Tilman Hausherr
> Priority: Minor
> Fix For: 4.0.0, 3.3.1
>
> Attachments: screenshot-1.png
>
>
> This is my temp directory after a successful build of tika 3. We should try
> to lessen this.
> !screenshot-1.png!
--
This message was sent by Atlassian Jira
(v8.20.10#820010)