[
https://issues.apache.org/jira/browse/TIKA-4704?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18071432#comment-18071432
]
ASF GitHub Bot commented on TIKA-4704:
--------------------------------------
Copilot commented on code in PR #2743:
URL: https://github.com/apache/tika/pull/2743#discussion_r3039537074
##########
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java:
##########
@@ -88,31 +88,32 @@ public void testWriteLimiterFromConfig(@TempDir Path tmp)
throws Exception {
*/
@Test
public void testWriteLimiterOverrideViaParseContext(@TempDir Path tmp)
throws Exception {
- PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC);
-
+ Metadata metadata;
// Create a ParseContext with an override that allows
X-TIKA:parse_time_millis
// The default config's includeFields (dc:creator, Content-Type,
X-TIKA:content)
// does NOT include X-TIKA:parse_time_millis, but this override does.
- ParseContext parseContext = new ParseContext();
- String overrideJson = """
- {
- "includeFields": ["Content-Type",
"X-TIKA:parse_time_millis"],
- "maxKeySize": 100,
- "maxFieldSize": 1000,
- "maxTotalBytes": 10000,
- "maxValuesPerField": 5
- }
- """;
- parseContext.setJsonConfig("standard-metadata-limiter-factory", () ->
overrideJson);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC, new FetchKey(FETCHER_NAME,
TEST_DOC),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
-
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
+ try (PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC)) {
+ // Create a ParseContext with an override that allows
X-TIKA:parse_time_millis
+ // The default config's includeFields (dc:creator, Content-Type,
X-TIKA:content)
+ // does NOT include X-TIKA:parse_time_millis, but this override
does.
Review Comment:
The ParseContext override explanation comment is duplicated (once before the
try-with-resources block and again immediately inside it). Please remove one of
the duplicates to avoid drift and keep the test easier to read.
```suggestion
```
##########
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java:
##########
@@ -88,31 +88,32 @@ public void testWriteLimiterFromConfig(@TempDir Path tmp)
throws Exception {
*/
@Test
public void testWriteLimiterOverrideViaParseContext(@TempDir Path tmp)
throws Exception {
- PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC);
-
+ Metadata metadata;
// Create a ParseContext with an override that allows
X-TIKA:parse_time_millis
// The default config's includeFields (dc:creator, Content-Type,
X-TIKA:content)
// does NOT include X-TIKA:parse_time_millis, but this override does.
- ParseContext parseContext = new ParseContext();
- String overrideJson = """
- {
- "includeFields": ["Content-Type",
"X-TIKA:parse_time_millis"],
- "maxKeySize": 100,
- "maxFieldSize": 1000,
- "maxTotalBytes": 10000,
- "maxValuesPerField": 5
- }
- """;
- parseContext.setJsonConfig("standard-metadata-limiter-factory", () ->
overrideJson);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC, new FetchKey(FETCHER_NAME,
TEST_DOC),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
-
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
+ try (PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC)) {
+ // Create a ParseContext with an override that allows
X-TIKA:parse_time_millis
+ // The default config's includeFields (dc:creator, Content-Type,
X-TIKA:content)
+ // does NOT include X-TIKA:parse_time_millis, but this override
does.
+ ParseContext parseContext = new ParseContext();
+ String overrideJson = """
+ {
+ "includeFields":
["Content-Type", "X-TIKA:parse_time_millis"],
+ "maxKeySize": 100,
+ "maxFieldSize": 1000,
+ "maxTotalBytes": 10000,
+ "maxValuesPerField": 5
+ }
+ """;
Review Comment:
The JSON text block for overrideJson is indented far more than surrounding
code, which makes it harder to read and increases the risk of accidentally
introducing leading whitespace into the JSON if the closing delimiter
indentation changes. Consider reindenting the text block similar to other
tests’ JSON text blocks in this module.
```suggestion
{
"includeFields": ["Content-Type",
"X-TIKA:parse_time_millis"],
"maxKeySize": 100,
"maxFieldSize": 1000,
"maxTotalBytes": 10000,
"maxValuesPerField": 5
}
""";
```
> Avoid remaining temp files
> --------------------------
>
> Key: TIKA-4704
> URL: https://issues.apache.org/jira/browse/TIKA-4704
> Project: Tika
> Issue Type: Task
> Affects Versions: 3.3.0
> Reporter: Tilman Hausherr
> Priority: Minor
> Fix For: 4.0.0, 3.3.1
>
> Attachments: screenshot-1.png
>
>
> This is my temp directory after a successful build of tika 3. We should try
> to lessen this.
> !screenshot-1.png!
--
This message was sent by Atlassian Jira
(v8.20.10#820010)