This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-3251 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 94efdade049c29717829455224fa335d204b4f15 Author: tallison <[email protected]> AuthorDate: Wed Dec 16 17:29:26 2020 -0500 TIKA-3251 -- WIP -- do not merge -- first steps towards adding fetchers --- pom.xml | 1 + .../java/org/apache/tika/config/TikaConfig.java | 101 +++++++++++++++++++++ .../exception/NoFetcherAvailableException.java | 27 ++++++ .../org/apache/tika/fetcher/DefaultFetcher.java | 79 ++++++++++++++++ .../main/java/org/apache/tika/fetcher/Fetcher.java | 37 ++++++++ .../java/org/apache/tika/fetcher/FileFetcher.java | 44 +++++++++ .../java/org/apache/tika/fetcher/URLFetcher.java | 49 ++++++++++ .../services/org.apache.tika.fetcher.Fetcher | 16 ++++ tika-fetchers/pom.xml | 21 +++++ tika-fetchers/s3-fetcher/pom.xml | 34 +++++++ .../java/org/apache/tika/fetcher/s3/S3Fetcher.java | 96 ++++++++++++++++++++ .../services/org.apache.tika.fetcher.Fetcher | 15 +++ .../org/apache/tika/fetcher/s3/S3FetcherTest.java | 27 ++++++ .../org/apache/tika/fetcher/s3/S3TikaConfig.xml | 28 ++++++ 14 files changed, 575 insertions(+) diff --git a/pom.xml b/pom.xml index 87328c4..920e937 100644 --- a/pom.xml +++ b/pom.xml @@ -50,6 +50,7 @@ <module>tika-example</module> <module>tika-java7</module> <module>tika-eval</module> + <module>tika-fetchers</module> </modules> <profiles> diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java index 3eeb3e7..0a50914 100644 --- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java +++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java @@ -48,6 +48,8 @@ import org.apache.tika.detect.Detector; import org.apache.tika.detect.EncodingDetector; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.exception.TikaException; +import org.apache.tika.fetcher.DefaultFetcher; +import org.apache.tika.fetcher.Fetcher; import org.apache.tika.language.translate.DefaultTranslator; import org.apache.tika.language.translate.Translator; import org.apache.tika.metadata.filter.CompositeMetadataFilter; @@ -112,6 +114,10 @@ public class TikaConfig { return new DefaultMetadataFilter(loader); } + private static Fetcher getDefaultFetcher(ServiceLoader loader) { + return new DefaultFetcher(loader); + } + //use this to look for unneeded instantiations of TikaConfig protected static AtomicInteger TIMES_INSTANTIATED = new AtomicInteger(); @@ -124,6 +130,7 @@ public class TikaConfig { private final ExecutorService executorService; private final EncodingDetector encodingDetector; private final MetadataFilter metadataFilter; + private final Fetcher fetcher; public TikaConfig(String file) throws TikaException, IOException, SAXException { @@ -200,6 +207,7 @@ public class TikaConfig { this.translator = translatorLoader.loadOverall(element, mimeTypes, loader); this.executorService = executorLoader.loadOverall(element, mimeTypes, loader); this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, loader); + this.fetcher = new FetcherXmlLoader().loadOverall(element, mimeTypes, loader); this.serviceLoader = loader; TIMES_INSTANTIATED.incrementAndGet(); } @@ -226,6 +234,7 @@ public class TikaConfig { this.translator = getDefaultTranslator(serviceLoader); this.executorService = getDefaultExecutorService(); this.metadataFilter = getDefaultMetadataFilter(serviceLoader); + this.fetcher = getDefaultFetcher(serviceLoader); TIMES_INSTANTIATED.incrementAndGet(); } @@ -262,6 +271,7 @@ public class TikaConfig { this.translator = getDefaultTranslator(serviceLoader); this.executorService = getDefaultExecutorService(); this.metadataFilter = getDefaultMetadataFilter(serviceLoader); + this.fetcher = getDefaultFetcher(serviceLoader); } else { ServiceLoader tmpServiceLoader = new ServiceLoader(); try (InputStream stream = getConfigInputStream(config, tmpServiceLoader)) { @@ -284,6 +294,7 @@ public class TikaConfig { this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader); this.executorService = executorLoader.loadOverall(element, mimeTypes, serviceLoader); this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, serviceLoader); + this.fetcher = new FetcherXmlLoader().loadOverall(element, mimeTypes, serviceLoader); } catch (SAXException e) { throw new TikaException( "Specified Tika configuration has syntax errors: " @@ -560,6 +571,10 @@ public class TikaConfig { initializableProblemHandler)); } + public Fetcher getFetcher() { + return fetcher; + } + private static abstract class XmlLoader<CT,T> { protected static final String PARAMS_TAG_NAME = "params"; @@ -1262,4 +1277,90 @@ public class TikaConfig { } } + private static class FetcherXmlLoader extends + XmlLoader<Fetcher, Fetcher> { + + boolean supportsComposite() { + return true; + } + + String getParentTagName() { + return "fetchers"; + } + + String getLoaderTagName() { + return "fetcher"; + } + + @Override + Class<? extends Fetcher> getLoaderClass() { + return Fetcher.class; + } + + + @Override + boolean isComposite(Fetcher loaded) { + return loaded instanceof DefaultFetcher; + } + + @Override + boolean isComposite(Class<? extends Fetcher> loadedClass) { + return DefaultFetcher.class.isAssignableFrom(loadedClass); + } + + @Override + Fetcher preLoadOne(Class<? extends Fetcher> loadedClass, + String classname, MimeTypes mimeTypes) throws TikaException { + // Check for classes which can't be set in config + // Continue with normal loading + return null; + } + + @Override + Fetcher createDefault(MimeTypes mimeTypes, ServiceLoader loader) { + return getDefaultFetcher(loader); + } + + //this ignores the service loader + @Override + Fetcher createComposite(List<Fetcher> loaded, MimeTypes mimeTypes, ServiceLoader loader) { + return new DefaultFetcher(loaded); + } + + @Override + Fetcher createComposite(Class<? extends Fetcher> fetcherClass, + List<Fetcher> childFetchers, + Set<Class<? extends Fetcher>> excludeFilters, + Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader) + throws InvocationTargetException, IllegalAccessException, + InstantiationException { + Fetcher fetcher = null; + Constructor<? extends Fetcher> c; + + // Try the possible default and composite detector constructors + if (fetcher == null) { + try { + c = fetcherClass.getConstructor(ServiceLoader.class, Collection.class); + fetcher = c.newInstance(loader, excludeFilters); + } catch (NoSuchMethodException me) { + me.printStackTrace(); + } + } + if (fetcher == null) { + try { + c = fetcherClass.getConstructor(List.class); + fetcher = c.newInstance(childFetchers); + } catch (NoSuchMethodException me) { + me.printStackTrace(); + } + } + + return fetcher; + } + + @Override + Fetcher decorate(Fetcher created, Element element) { + return created; // No decoration of Fetchers + } + } } diff --git a/tika-core/src/main/java/org/apache/tika/exception/NoFetcherAvailableException.java b/tika-core/src/main/java/org/apache/tika/exception/NoFetcherAvailableException.java new file mode 100644 index 0000000..7f02ff9 --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/exception/NoFetcherAvailableException.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.exception; + +public class NoFetcherAvailableException extends TikaException { + public NoFetcherAvailableException(String msg) { + super(msg); + } + + public NoFetcherAvailableException(String msg, Throwable cause) { + super(msg, cause); + } +} diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java new file mode 100644 index 0000000..868edab --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.fetcher; + +import org.apache.tika.config.ServiceLoader; +import org.apache.tika.exception.NoFetcherAvailableException; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.utils.ServiceLoaderUtils; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.Optional; + +public class DefaultFetcher implements Fetcher { + + private final List<Fetcher> fetchers; + + public DefaultFetcher() { + this(new ServiceLoader()); + } + + public DefaultFetcher(ServiceLoader serviceLoader) { + fetchers = serviceLoader.loadServiceProviders(Fetcher.class); + ServiceLoaderUtils.sortLoadedClasses(fetchers); + } + + public DefaultFetcher(List<Fetcher> fetchers) { + this.fetchers = fetchers; + } + + @Override + public boolean canFetch(String url) { + for (Fetcher fetcher : fetchers) { + if (fetcher.canFetch(url)) { + return true; + } + } + return false; + } + + @Override + public Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException { + for (Fetcher fetcher : fetchers) { + if (fetcher.canFetch(url)) { + return fetcher.fetch(url, metadata); + } + } + StringBuilder sb = new StringBuilder(); + int i = 0; + for (Fetcher fetcher : fetchers) { + if (i++ > 0) { + sb.append(", "); + } + sb.append(fetcher.getClass()); + } + throw new NoFetcherAvailableException("No suitable fetcher found for: " + + url + " in " + sb.toString()); + } + + public List<Fetcher> getFetchers() { + return fetchers; + } +} diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java new file mode 100644 index 0000000..688e687 --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.fetcher; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Optional; + +/** + * Based on a key, this will fetch a resource and update the + * metadata. There are some use cases, where the goal is simply + * to update the metadata, in which cases, the InputStream is + * not present. + */ +public interface Fetcher { + + boolean canFetch(String url); + + Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException; +} diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/FileFetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/FileFetcher.java new file mode 100644 index 0000000..6b8ad90 --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/fetcher/FileFetcher.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.fetcher; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Paths; +import java.util.Optional; + +public class FileFetcher implements Fetcher { + + private static final String PREFIX = "file:"; + + @Override + public boolean canFetch(String url) { + if (url.startsWith(PREFIX)) { + return true; + } + return false; + } + + @Override + public Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException { + return Optional.of(TikaInputStream.get(Paths.get(url))); + } +} diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/URLFetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/URLFetcher.java new file mode 100644 index 0000000..4a09e26 --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/fetcher/URLFetcher.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.fetcher; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.Optional; + +public class URLFetcher implements Fetcher { + + private static final String HTTP_PREFIX = "http:"; + private static final String HTTPS_PREFIX = "https:"; + private static final String FTP_PREFIX = "ftp:"; + + @Override + public boolean canFetch(String url) { + if (url.startsWith(HTTP_PREFIX) || + url.startsWith(HTTPS_PREFIX) || + url.startsWith(FTP_PREFIX)) { + return true; + } + return false; + } + + @Override + public Optional<InputStream> fetch(String url, Metadata metadata) + throws TikaException, IOException { + return Optional.of(TikaInputStream.get(new URL(url))); + } +} diff --git a/tika-core/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher b/tika-core/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher new file mode 100644 index 0000000..decdd2f --- /dev/null +++ b/tika-core/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +org.apache.tika.fetcher.FileFetcher +org.apache.tika.fetcher.URLFetcher \ No newline at end of file diff --git a/tika-fetchers/pom.xml b/tika-fetchers/pom.xml new file mode 100644 index 0000000..e7954d8 --- /dev/null +++ b/tika-fetchers/pom.xml @@ -0,0 +1,21 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>tika-parent</artifactId> + <groupId>org.apache.tika</groupId> + <version>2.0.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + <modelVersion>4.0.0</modelVersion> + + <artifactId>tika-fetchers</artifactId> + <packaging>pom</packaging> + + <modules> + <module>s3-fetcher</module> + </modules> + + +</project> \ No newline at end of file diff --git a/tika-fetchers/s3-fetcher/pom.xml b/tika-fetchers/s3-fetcher/pom.xml new file mode 100644 index 0000000..a8e4b6d --- /dev/null +++ b/tika-fetchers/s3-fetcher/pom.xml @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>tika-fetchers</artifactId> + <groupId>org.apache.tika</groupId> + <version>2.0.0-SNAPSHOT</version> + </parent> + <modelVersion>4.0.0</modelVersion> + + <artifactId>s3-fetcher</artifactId> + + <dependencies> + <dependency> + <groupId>org.apache.tika</groupId> + <artifactId>tika-core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.amazonaws</groupId> + <artifactId>aws-java-sdk-s3</artifactId> + <version>1.11.920</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + +</project> \ No newline at end of file diff --git a/tika-fetchers/s3-fetcher/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java b/tika-fetchers/s3-fetcher/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java new file mode 100644 index 0000000..0ea5452 --- /dev/null +++ b/tika-fetchers/s3-fetcher/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.fetcher.s3; + +import com.amazonaws.auth.profile.ProfileCredentialsProvider; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.S3Object; +import org.apache.tika.config.Field; +import org.apache.tika.exception.TikaException; +import org.apache.tika.fetcher.Fetcher; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; +import java.util.Optional; + +public class S3Fetcher implements Fetcher { + + private static final String PREFIX = "s3:"; + + @Field + private String bucket; + + @Field + private String key; + + @Field + private String region; + + @Override + public boolean canFetch(String url) { + return url.startsWith(PREFIX); + } + + @Override + public Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException { + //TODO cache this client so we're not starting a new one with every request + S3Object fullObject = null; + try { + AmazonS3 s3Client = AmazonS3ClientBuilder.standard() + .withRegion(getRegion()) + .withCredentials(new ProfileCredentialsProvider()) + .build(); + fullObject = s3Client.getObject(new GetObjectRequest(bucket, key)); + updateMetadata(fullObject.getObjectMetadata(), metadata); + return Optional.of(TikaInputStream.get(fullObject.getObjectContent())); + } finally { + if (fullObject != null) { + fullObject.close(); + } + } + } + + private void updateMetadata(ObjectMetadata objectMetadata, Metadata metadata) { + //TODO: what else do we want to grab? + for (Map.Entry<String, String> e : objectMetadata.getUserMetadata().entrySet()) { + metadata.add(PREFIX+e.getKey(), e.getValue()); + } + } + + public Regions getRegion() { + if (region == null) { + return Regions.DEFAULT_REGION; + } else { + return Regions.fromName(region); + } + } + + public String getBucket() { + return bucket; + } + + public String getKey() { + return key; + } +} diff --git a/tika-fetchers/s3-fetcher/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher b/tika-fetchers/s3-fetcher/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher new file mode 100644 index 0000000..a41f741 --- /dev/null +++ b/tika-fetchers/s3-fetcher/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +org.apache.tika.fetcher.s3.S3Fetcher \ No newline at end of file diff --git a/tika-fetchers/s3-fetcher/src/test/java/org/apache/tika/fetcher/s3/S3FetcherTest.java b/tika-fetchers/s3-fetcher/src/test/java/org/apache/tika/fetcher/s3/S3FetcherTest.java new file mode 100644 index 0000000..e964739 --- /dev/null +++ b/tika-fetchers/s3-fetcher/src/test/java/org/apache/tika/fetcher/s3/S3FetcherTest.java @@ -0,0 +1,27 @@ +package org.apache.tika.fetcher.s3; + +import com.amazonaws.regions.Regions; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.fetcher.DefaultFetcher; +import org.apache.tika.fetcher.Fetcher; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class S3FetcherTest { + + @Test + public void testBasic() throws Exception { + TikaConfig config = new TikaConfig( + S3FetcherTest.class.getResourceAsStream("/org/apache/tika/fetcher/s3/S3TikaConfig.xml")); + Fetcher defaultFetcher = config.getFetcher(); + for (Fetcher fetcher : ((DefaultFetcher)defaultFetcher).getFetchers()) { + if (fetcher instanceof S3Fetcher) { + S3Fetcher s3Fetcher = (S3Fetcher) fetcher; + assertEquals(Regions.US_WEST_2, s3Fetcher.getRegion()); + assertEquals("myKey", s3Fetcher.getKey()); + assertEquals("myBucket", s3Fetcher.getBucket()); + } + } + } +} diff --git a/tika-fetchers/s3-fetcher/src/test/resources/org/apache/tika/fetcher/s3/S3TikaConfig.xml b/tika-fetchers/s3-fetcher/src/test/resources/org/apache/tika/fetcher/s3/S3TikaConfig.xml new file mode 100644 index 0000000..ffbb022 --- /dev/null +++ b/tika-fetchers/s3-fetcher/src/test/resources/org/apache/tika/fetcher/s3/S3TikaConfig.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<properties> + <fetchers class="org.apache.tika.fetcher.DefaultFetcher"> + <fetcher class="org.apache.tika.fetcher.s3.S3Fetcher"> + <params> + <param name="region" type="string">us-west-2</param> + <param name="bucket" type="string">myBucket</param> + <param name="key" type="string">myKey</param> + </params> + </fetcher> + </fetchers> +</properties> \ No newline at end of file
