[ 
https://issues.apache.org/jira/browse/TIKA-4739?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18083905#comment-18083905
 ] 

Hudson commented on TIKA-4739:
------------------------------

SUCCESS: Integrated in Jenkins build Tika » tika-main-jdk17 #1389 (See 
[https://ci-builds.apache.org/job/Tika/job/tika-main-jdk17/1389/])
TIKA-4739 (#2837) (github: 
[https://github.com/apache/tika/commit/4f6ad8b0f3c98c47ed87c92c103999f0d30a06ea])
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
* (edit) docs/modules/ROOT/pages/pipes/configuration.adoc
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-async-cli/src/test/resources/configs/config-template.json
* (edit) 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesConfig.java
* (edit) docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
* (edit) 
tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
* (edit) 
tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInstantiator.java
* (edit) tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite.json
* (edit) tika-pipes/tika-pipes-plugins/pom.xml
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
* (edit) 
tika-serialization/src/main/java/org/apache/tika/config/loader/AbstractSpiComponentLoader.java
* (edit) 
tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParserConfig.java
* (edit) 
tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/PluginsWriter.java
* (edit) tika-pipes/tika-pipes-core/pom.xml
* (edit) 
tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
* (edit) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-eml-config.json
* (edit) 
tika-pipes/tika-pipes-fork-parser/src/main/java/org/apache/tika/pipes/fork/PipesForkParser.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
* (edit) docs/modules/ROOT/pages/migration-to-4x/migrating-tika-server-4x.adoc
* (edit) tika-app/src/test/resources/configs/tika-config2.json
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
* (edit) 
tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
* (edit) 
tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
* (edit) 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/AbstractComponentManager.java
* (edit) 
tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
* (add) 
tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/testutil/AbstractConfigExamplesTest.java
* (edit) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/configs/tika-libpst-config.json
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigOverrides.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
* (edit) 
tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/resources/configs/tika-config-rendering.json
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
* (edit) docs/modules/ROOT/pages/advanced/integration-testing/tika-app.adoc
* (edit) 
tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
* (edit) 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigMerger.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-async-cli/src/test/resources/configs/config-content-only-default.json
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
* (edit) 
tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/config/ConfigMergerTest.java
* (edit) tika-app/src/test/resources/configs/config-template.json
* (edit) 
tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
* (edit) 
tika-e2e-tests/tika-grpc/src/test/resources/tika-config-ignite-local.json


> tika-4.0.0-alpha1 - configuration file issues
> ---------------------------------------------
>
>                 Key: TIKA-4739
>                 URL: https://issues.apache.org/jira/browse/TIKA-4739
>             Project: Tika
>          Issue Type: Bug
>            Reporter: Adrian Bird
>            Priority: Major
>             Fix For: 4.0.0
>
>
> I've got some issues with the configuration and I've put them all in here. 
> *1.* Error in Tika-App Integration Test 20
> The 
> [test|https://tika.apache.org/docs/4.0.0-SNAPSHOT/advanced/integration-testing/tika-app.html#_test_20_create_custom_config_file]
>  has a custom tika-config.json file. When I tried it I got the following 
> error:
> {code:java}
> Exception in thread "main" 
> com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException: 
> Unrecognized field "timeoutMillis" (class 
> org.apache.tika.pipes.core.PipesConfig), not marked as ignorable (25 known 
> properties: "sleepOnStartupTimeoutMillis", "shutdownClientAfterMillis", 
> "numClients", "emitWithinMillis", "configStoreParams", "emitStrategy", 
> "heartbeatIntervalMs", "startupTimeoutMillis", "numEmitters", 
> "staleFetcherTimeoutSeconds", "maxFilesProcessedPerProcess", 
> "useSharedServer", "queueSize", "socketTimeoutMs", "parseMode", 
> "stopOnlyOnFatal", "tempDirectory", "onParseException", "forkedJvmArgs", 
> "maxWaitForClientMillis", "javaPath", "staleFetcherDelaySeconds", 
> "configStoreType", "emitMaxEstimatedBytes", "emitIntermediateResults")
>  at [Source: UNKNOWN; byte offset: #UNKNOWN] (through reference chain: 
> org.apache.tika.pipes.core.PipesConfig["timeoutMillis"]) {code}
> *2.* parsers '_exclude' doesn't seem to work
> Using the config file from Test 20 above, and fixing the issue by using 
> 'startupTimeoutMillis' I tried excluding a parser. I really wanted to do it 
> for Tesseract but decided an easier option was PDF.
> I removed the 'pdf-parser' section from the config and did this:
> {code:java}
>     {
>       "default-parser": {
>         "_exclude": ["pdf-parser"]
>       }
>     },{code}
> When I ran Tika it produced the same output as previously and processed my 
> PDF file.
> *2a.* There is a documentation example that has 'exclude' rather than 
> '_exclude' 
> [vlm-pdf-parsing.json|https://github.com/apache/tika/blob/main/docs/modules/ROOT/examples/vlm-pdf-parsing.json]
> *3.* [Getting Started with Tika 
> Pipes|https://tika.apache.org/docs/4.0.0-SNAPSHOT/pipes/getting-started.html#_json_configuration]
>  JSON Configuration example doesn't work.
> When I try the example using the JSON Configuration I get the following:
> {code:java}
> INFO  [pool-2-thread-1] 08:52:11,748 
> org.apache.tika.pipes.core.server.FetchHandler Couldn't initialize fetcher 
> for fetch id=MyTestFile.pdf
> org.apache.tika.pipes.api.fetcher.FetcherNotFoundException: Can't find 
> fetcher for id=fsf. Available: []{code}
> I assume it is because there is no 'pipes-iterator' in the configuration and 
> it is picking up a default.
> In my tika-config.json I changed the ids to 'fsf' and 'fse' and got the same 
> error.
> I noticed that the structure of the 'fetchers' and 'emitters' is different in 
> this example and the one in 1. above.
> This has an array with an 'id' key/value pair and the one in 1. above has a 
> map with the 'id' being the key.
> I changed the structure to reflect what is in 1. above and it worked (if I 
> left the 'id' key in there I got an error saying 'id' wasn't valid).
> I noticed a lot of test files in the repository that have the format listed 
> in the Getting Started section.
> *My questions are:*
> a. what structure(s) of the 'fetchers' and 'emitters' are supported?
> b. what should the example configuration be?
> *3a.* There is a note below the command to run the config file: ??'The -i and 
> -o flags override the basePath values in the config when used with 
> tika-app.'?? 
> I'm not seeing this. The values used are from the 'basePath'. If neither the 
> '-i' value on the command line, or in the config file exist, I get this 
> message about the value in the config file: 
> Exception in thread "main" java.lang.RuntimeException: 
> java.lang.IllegalArgumentException: "basePath" directory does not exist: 
> L:\Apache-Tika\batch-inputxxx
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to