[
https://issues.apache.org/jira/browse/TIKA-3226?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17272443#comment-17272443
]
Hudson commented on TIKA-3226:
------------------------------
SUCCESS: Integrated in Jenkins build Tika ยป tika-main-jdk8 #139 (See
[https://ci-builds.apache.org/job/Tika/job/tika-main-jdk8/139/])
TIKA-3226 (#399) (github:
[https://github.com/apache/tika/commit/4ec97b87e14593e7120f73dc1d614eb5b6b7ec78])
* (add)
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/IntegrationTestBase.java
* (add)
tika-pipes/tika-httpclient-commons/src/main/java/org/apache/tika/client/HttpClientUtil.java
* (add)
tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java
* (delete)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/URLEnabledInputStreamFactory.java
* (edit) tika-server/tika-server-core/pom.xml
* (add)
tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-config.xml
* (add)
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
* (add) tika-pipes/tika-httpclient-commons/pom.xml
* (add)
tika-pipes/tika-httpclient-commons/src/main/java/org/apache/tika/client/TikaClientException.java
* (add)
tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/java/org/apache/tika/pipes/fetcher/s3/TestS3Fetcher.java
* (add) tika-core/src/main/java/org/apache/tika/pipes/emitter/Emitter.java
* (add) tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/pom.xml
* (edit)
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
* (edit)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
* (add)
tika-core/src/test/resources/org/apache/tika/config/fetch-iterator-config.xml
* (add) tika-server/tika-server-client/src/test/resources/log4j.properties
* (edit)
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
* (add) tika-core/src/test/resources/org/apache/tika/config/fetchers-config.xml
* (add) tika-core/src/main/java/org/apache/tika/pipes/fetcher/EmptyFetcher.java
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-csv/src/test/java/TestCSVFetchIterator.java
* (edit)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/InputStreamFactory.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetcher/SimpleUrlFetcher.java
* (add)
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/PipeIntegrationTests.java
* (add) tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/pom.xml
* (add) tika-pipes/tika-fetch-iterators/pom.xml
* (add) tika-pipes/tika-fetch-iterators/tika-fetch-iterator-csv/pom.xml
* (add)
tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/FetcherTest.java
* (add) tika-pipes/pom.xml
* (edit)
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceTest.java
* (add)
tika-core/src/test/resources/org/apache/tika/config/emitters-duplicate-config.xml
* (add)
tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/emitter/solr/TestBasic.java
* (add)
tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/log4j.properties
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/test/resources/log4j.properties
* (edit)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/DefaultInputStreamFactory.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/emitter/EmitterManager.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetcher/FileSystemFetcher.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherStringException.java
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/src/test/resources/log4j.properties
* (edit)
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceOffTest.java
* (add)
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/test/java/org/apache/tika/pipes/fetchiterator/s3/TestS3FetchIterator.java
* (add)
tika-core/src/test/resources/org/apache/tika/config/fetchers-noname-config.xml
* (edit) tika-server/pom.xml
* (add) tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetchId.java
* (add) tika-pipes/tika-emitters/pom.xml
* (add)
tika-core/src/main/java/org/apache/tika/pipes/emitter/AbstractEmitter.java
* (add) tika-server/tika-server-client/src/main/resources/log4j.properties
* (edit)
tika-server/tika-server-core/src/test/resources/test-documents/mock/hello_world.xml
* (add)
tika-server/tika-server-client/src/test/java/org/apache/tika/server/client/TestBasic.java
* (add)
tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/emitter/solr/SolrEmitter.java
* (add)
tika-pipes/tika-pipes-integration-tests/src/test/resources/tika-config-s3ToFs.xml
* (add)
tika-pipes/tika-fetchers/tika-fetcher-s3/src/main/java/org/apache/tika/pipes/fetcher/s3/S3Fetcher.java
* (add)
tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
* (edit) tika-server/tika-server-client/pom.xml
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-csv/src/main/java/org/apache/tika/pipes/fetchiterator/csv/CSVFetchIterator.java
* (edit)
tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
* (add)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
* (edit) tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
* (add) tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml
* (add)
tika-core/src/main/java/org/apache/tika/pipes/emitter/TikaEmitterException.java
* (add) tika-pipes/tika-emitters/tika-emitter-fs/pom.xml
* (edit)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
* (add)
tika-core/src/test/resources/org/apache/tika/config/fetch-iterator-multiple-config.xml
* (add)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
* (add) tika-core/src/main/java/org/apache/tika/pipes/fetcher/Fetcher.java
* (add) tika-pipes/tika-pipes-integration-tests/pom.xml
* (add)
tika-pipes/tika-pipes-integration-tests/src/test/resources/log4j.properties
* (add)
tika-core/src/test/resources/org/apache/tika/config/fetchers-duplicate-config.xml
* (add)
tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientConfigException.java
* (add)
tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java
* (add) tika-core/src/test/resources/org/apache/tika/config/emitters-config.xml
* (add)
tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetcher/AbstractFetcher.java
* (add)
tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java
* (edit)
tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
* (add) tika-pipes/tika-fetchers/pom.xml
* (add)
tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FetchIterator.java
* (edit) tika-core/src/main/java/org/apache/tika/config/Param.java
* (add)
tika-server/tika-server-classic/src/test/resources/config/tika-config-url-fetcher.xml
* (edit) pom.xml
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/src/test/java/org/apache/tika/pipes/fetchiterator/jdbc/TestJDBCFetchIterator.java
* (edit) tika-parent/pom.xml
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-csv/src/test/resources/test-simple.csv
* (add) tika-core/src/main/java/org/apache/tika/pipes/emitter/EmptyEmitter.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetcherManager.java
* (add) tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/FileSystemFetchIterator.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetcher/FetchIdMetadataPair.java
* (add)
tika-core/src/test/java/org/apache/tika/pipes/fetchiterator/FileSystemFetchIteratorTest.java
* (add)
tika-core/src/test/java/org/apache/tika/pipes/fetcher/FileSystemFetcherTest.java
* (edit) tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
* (add)
tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/resources/tika-config-s3.xml
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/src/main/java/org/apache/tika/pipes/fetchiterator/jdbc/JDBCFetchIterator.java
* (edit) tika-server/tika-server-classic/pom.xml
* (add)
tika-pipes/tika-fetch-iterators/tika-fetch-iterator-s3/src/main/java/org/apache/tika/pipes/fetchiterator/s3/S3FetchIterator.java
* (edit)
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
* (add)
tika-core/src/main/java/org/apache/tika/pipes/fetchiterator/EmptyFetchIterator.java
* (add) tika-core/src/test/java/org/apache/tika/pipes/emitter/MockEmitter.java
* (add)
tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/emitter/fs/FileSystemEmitter.java
* (add)
tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
> Add custom connector endpoint
> -----------------------------
>
> Key: TIKA-3226
> URL: https://issues.apache.org/jira/browse/TIKA-3226
> Project: Tika
> Issue Type: New Feature
> Components: server
> Reporter: Nicholas DiPiazza
> Assignee: Tim Allison
> Priority: Major
> Fix For: 2.0.0
>
>
> Let's say you call the following api to parse a file and get its metadata and
> body content:
> {code}
> /rmeta/text
> {code}
> In order to do this, the caller needs to send the file to the tika server,
> then get the metadata and body sent to the caller. When you are working in
> microservices, this causes a lot of inner-service network communication.
> You can cut down on a majority of this overhead by using the local file
> system optimization. So that you send a file path instead of the entire file.
> But this obviously only works when you are on the same machine.
> Ideally - we would have a way to deploy "connector plugins" into tika, and be
> able to send files to be parsed with these plugins (asynchronously?).
> {code}
> /connector/{fetcherId}/{emitterId}
> {code}
> The Fetcher interface:
> init(Map initParams)
> - initializes the fetcher (for example, initialize an http connection pool,
> etc)
> void fetch(Map parseParams, Metadata metadata, OutputStream bodyOutputStream)
> - fetches the document indicated by parseParams and does whatever it is you
> want with it (for example, download a file from a web data source, then index
> the document into Solr). Sends the body to bodyOutputStream and metadata
> object will be populated with the metadata).
> The Emitter interface would be
> init(Map initParams)
> - initializes the emitter. (for example, initialize a buffer to store
> output documents to solr, connect to solr, etc)
> void emit(Map parseParams, Fetcher fetcher)
> - fetches and parses the "document" using the passed in fetcher, then emits
> it meaningfully.
> We could provide the most common fetchers and emitters such as:
> HttpFetcher
> S3Fetcher
> SolrEmitter
> ...
--
This message was sent by Atlassian Jira
(v8.3.4#803005)