This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 93d2211037b01ca237a51f83879ae35f3f76dca8
Author: tallison <[email protected]>
AuthorDate: Tue May 18 05:42:18 2021 -0400

    TIKA-3384 -- convert transcribe to a traditional parser
---
 pom.xml                                            |   1 -
 .../org/apache/tika/transcribe/Transcriber.java    |  60 ---
 tika-example/pom.xml                               |   8 +-
 .../tika/example/TranscribeTranslateExample.java   |  71 +--
 tika-parsers/tika-parsers-ml/pom.xml               |   1 +
 tika-transcribe/pom.xml                            | 159 -------
 .../apache/tika/transcribe/AmazonTranscribe.java   | 406 ----------------
 .../org.apache.tika.language.translate.Translator  |  16 -
 .../transcribe.amazon.properties                   |  18 -
 .../tika/transcribe/AmazonTranscribeTest.java      | 527 ---------------------
 .../src/test/resources/ShortAudioSampleFrench.mp3  | Bin 25861 -> 0 bytes
 .../test/resources/de-DE_(We_Are_At_School_x2).mp3 | Bin 38547 -> 0 bytes
 .../resources/en-AU_(A_Little_Bottle_Of_Water).mp3 | Bin 33365 -> 0 bytes
 .../resources/en-GB_(A_Little_Bottle_Of_Water).mp3 | Bin 35872 -> 0 bytes
 .../resources/en-US_(A_Little_Bottle_Of_Water).mp3 | Bin 29603 -> 0 bytes
 tika-transcribe/src/test/resources/en-US_(Hi).mp4  | Bin 21739 -> 0 bytes
 .../resources/it-IT_(We_Are_Having_Class_x2).mp3   | Bin 42219 -> 0 bytes
 .../test/resources/ja-JP_(We_Are_At_School).mp3    | Bin 21699 -> 0 bytes
 .../src/test/resources/ko-KR_(Annyeonghaseyo).mp4  | Bin 144151 -> 0 bytes
 .../resources/ko-KR_(We_Are_Having_Class_x2).mp3   | Bin 66843 -> 0 bytes
 .../test/resources/pt-BR_(We_Are_At_School).mp3    | Bin 29043 -> 0 bytes
 21 files changed, 47 insertions(+), 1220 deletions(-)

diff --git a/pom.xml b/pom.xml
index f8c6591..d0e43d4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -52,7 +52,6 @@
     <module>tika-translate</module>
     <module>tika-example</module>
     <module>tika-java7</module>
-    <module>tika-transcribe</module>
   </modules>
 
   <profiles>
diff --git 
a/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java 
b/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
deleted file mode 100644
index 3546256..0000000
--- a/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.transcribe;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.tika.exception.TikaException;
-
-/**
- * Interface for Transcriber services.
- *
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-94";>TIKA-94</a>
- * @since Tika 2.1
- */
-public interface Transcriber {
-    /**
-     * Transcribe the given file.
-     *
-     * @param inputStream the source input stream.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException When there is an error transcribing.
-     * @throws IOException   If an I/O exception of some sort has occurred.
-     * @since 2.1
-     */
-    public String transcribe(InputStream inputStream) throws TikaException, 
IOException;
-
-    /**
-     * Transcribe the given the file and the source language.
-     *
-     * @param inputStream    the source input stream.
-     * @param sourceLanguage The language code for the language used in the 
input media file.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException When there is an error transcribing.
-     * @throws IOException   If an I/O exception of some sort has occurred.
-     * @since 2.1
-     */
-    public String transcribe(InputStream inputStream, String sourceLanguage) 
throws TikaException, IOException;
-
-    /**
-     * @return true if this Transcriber is probably able to transcribe right 
now.
-     * @since Tika 2.1
-     */
-    public boolean isAvailable();
-}
diff --git a/tika-example/pom.xml b/tika-example/pom.xml
index f12304e..ce6a2b3 100644
--- a/tika-example/pom.xml
+++ b/tika-example/pom.xml
@@ -64,13 +64,13 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.tika</groupId>
+      <groupId>${project.groupId}</groupId>
       <artifactId>tika-eval-core</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika-transcribe</artifactId>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-transcribe-aws</artifactId>
       <version>${project.version}</version>
       <exclusions>
         <exclusion>
@@ -88,7 +88,7 @@
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>org.apache.tika</groupId>
+      <groupId>${project.groupId}</groupId>
       <artifactId>tika-core</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>
diff --git 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
index 12dd7e5..f77af72 100644
--- 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
+++ 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
@@ -17,12 +17,14 @@
 
 package org.apache.tika.example;
 
-import java.io.FileInputStream;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.language.translate.GoogleTranslator;
 import org.apache.tika.language.translate.Translator;
-import org.apache.tika.transcribe.AmazonTranscribe;
-import org.apache.tika.transcribe.Transcriber;
+import org.apache.tika.parser.transcribe.aws.AmazonTranscribe;
 
 /**
  * This example demonstrates primitive logic for
@@ -30,8 +32,8 @@ import org.apache.tika.transcribe.Transcriber;
  * could be considered as a downstream process to 
  * transcription.
  * We simply pass the output of
- * a call to {@link Transcriber#transcribe(java.io.InputStream)}
- * into {@link Translator#translate(String, String)}. 
+ * a call to {@link Tika#parseToString(Path)}
+ * into {@link Translator#translate(String, String)}.
  * The {@link GoogleTranslator} is configured with a target 
  * language of "en-US".
  * @author lewismc
@@ -62,42 +64,53 @@ public class TranscribeTranslateExample {
 
     /**
      * Use {@link AmazonTranscribe} to execute transcription on input data.
-     * This implementation needs configured as explained in the Javadoc.
+     * This implementation needs to be configured as explained in the Javadoc.
      * @param file the name of the file (which needs to be on the Java 
Classpath) to transcribe.
      * @return transcribed text.
      */
-    public static String amazonTranscribe(String file) {
-        String filePath = 
TranscribeTranslateExample.class.getClassLoader().getResource(file).getPath();
-        String result = null;
-        Transcriber transcriber = new AmazonTranscribe();
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(filePath));
-            } catch (Exception e) {
-                e.printStackTrace();
-            }
-        }
-        return result;
+    public static String amazonTranscribe(Path tikaConfig, Path file) throws 
Exception {
+        return new Tika(new TikaConfig(tikaConfig)).parseToString(file);
     }
 
     /**
      * Main method to run this example. This program can be invoked as follows
      * <ol>
-     * <li><code>transcribe-translate ${file}</code>; which executes both 
+     * <li><code>transcribe-translate ${tika-config.xml} ${file}</code>; which 
executes both
      * transcription then translation on the given resource, or 
-     * <li><code>transcribe ${file}</code>; which executes only 
translation</li>
+     * <li><code>transcribe ${tika-config.xml} ${file}</code>; which executes 
only translation</li>
      * @param args either of the commands described above and the input file 
-     * (which needs to be on the Java Classpath). 
+     * (which needs to be on the Java Classpath).
+     *
+     *
+     *
+     * ${tika-config.xml} must include credentials for aws and a temporary 
storage bucket:
+     * <pre>
+     * {@code
+     *  <properties>
+     *   <parsers>
+     *     <parser class="org.apache.tika.parser.DefaultParser"/>
+     *     <parser 
class="org.apache.tika.parser.transcribe.aws.AmazonTranscribe">
+     *       <params>
+     *         <param name="bucket" type="string">bucket</param>
+     *         <param name="clientId" type="string">clientId</param>
+     *         <param name="clientSecret" type="string">clientSecret</param>
+     *       </params>
+     *     </parser>
+     *   </parsers>
+     * </properties>
+     * }
+     * </pre>
      */
-    public static void main (String[] args) {
+    public static void main (String[] args) throws Exception {
         String text = null;
-        if (args.length != 0) {
-            if ("transcribe-translate".equals(args[0])) {
-                text = googleTranslateToEnglish(amazonTranscribe(args[1]));
-                System.out.print("Transcription and translation 
successful!\nEXTRAXCTED TEXT: " + text);
-            } else if ("transcribe".equals(args[0])) {
-                text = amazonTranscribe(args[1]);
-                System.out.print("Transcription successful!\nEXTRAXCTED TEXT: 
" + text);
+        if (args.length > 1) {
+            if ("transcribe-translate".equals(args[1])) {
+                text = 
googleTranslateToEnglish(amazonTranscribe(Paths.get(args[0]),
+                        Paths.get(args[1])));
+                System.out.print("Transcription and translation 
successful!\nEXTRACTED TEXT: " + text);
+            } else if ("transcribe".equals(args[1])) {
+                text = amazonTranscribe(Paths.get(args[0]), 
Paths.get(args[1]));
+                System.out.print("Transcription successful!\nEXTRACTED TEXT: " 
+ text);
             } else {
                 System.out.print("Incorrect invocation, see Javadoc.");
             }
diff --git a/tika-parsers/tika-parsers-ml/pom.xml 
b/tika-parsers/tika-parsers-ml/pom.xml
index ba9bd38..2dcde9e 100644
--- a/tika-parsers/tika-parsers-ml/pom.xml
+++ b/tika-parsers/tika-parsers-ml/pom.xml
@@ -40,6 +40,7 @@
     <module>tika-age-recogniser</module>
     <module>tika-parser-advancedmedia-module</module>
     <module>tika-dl</module>
+    <module>tika-transcribe-aws</module>
   </modules>
 
   <build>
diff --git a/tika-transcribe/pom.xml b/tika-transcribe/pom.xml
deleted file mode 100644
index aadb137..0000000
--- a/tika-transcribe/pom.xml
+++ /dev/null
@@ -1,159 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0";
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>org.apache.tika</groupId>
-        <artifactId>tika-parent</artifactId>
-        <version>2.0.0-SNAPSHOT</version>
-        <relativePath>../tika-parent/pom.xml</relativePath>
-    </parent>
-
-    <artifactId>tika-transcribe</artifactId>
-    <packaging>bundle</packaging>
-    <name>Apache Tika transcribe</name>
-    <url>http://tika.apache.org/</url>
-    <!--TODO use latest aws version or the one defined in the tika-parent-->
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.tika</groupId>
-            <artifactId>tika-core</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.amazonaws</groupId>
-            <artifactId>aws-java-sdk-transcribe</artifactId>
-            <version>${aws.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>commons-logging</groupId>
-                    <artifactId>commons-logging</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>commons-codec</groupId>
-                    <artifactId>commons-codec</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>com.amazonaws</groupId>
-            <artifactId>aws-java-sdk-s3</artifactId>
-            <version>${aws.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.googlecode.json-simple</groupId>
-            <artifactId>json-simple</artifactId>
-            <version>${json.simple.version}</version>
-        </dependency>
-        <!-- Test dependencies -->
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-        </dependency>
-    </dependencies>
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.felix</groupId>
-                <artifactId>maven-bundle-plugin</artifactId>
-                <version>${maven.bundle.version}</version>
-                <extensions>true</extensions>
-                <configuration>
-                    <instructions>
-                        <Bundle-DocURL>${project.url}</Bundle-DocURL>
-                        <Bundle-Activator>
-                            org.apache.tika.parser.internal.Activator
-                        </Bundle-Activator>
-                        <Import-Package>
-                            org.w3c.dom,
-                            org.apache.tika.*,
-                            *;resolution:=optional
-                        </Import-Package>
-                    </instructions>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.rat</groupId>
-                <artifactId>apache-rat-plugin</artifactId>
-                <version>${rat.version}</version>
-                <configuration>
-                    <excludes>
-                        
<exclude>src/main/java/org/apache/tika/parser/txt/Charset*.java</exclude>
-                        <exclude>src/test/resources/test-documents/**</exclude>
-                    </excludes>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-jar-plugin</artifactId>
-                <configuration>
-                    <archive>
-                        <manifestEntries>
-                            
<Automatic-Module-Name>org.apache.tika.translate</Automatic-Module-Name>
-                        </manifestEntries>
-                    </archive>
-                </configuration>
-                <executions>
-                    <execution>
-                        <goals>
-                            <goal>test-jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-
-        <pluginManagement>
-            <plugins>
-                <!-- This plugin's configuration is used to store Eclipse m2e  
    -->
-                <!-- settings only. It has no influence on the Maven build 
itself. -->
-                <plugin>
-                    <groupId>org.eclipse.m2e</groupId>
-                    <artifactId>lifecycle-mapping</artifactId>
-                    <version>1.0.0</version>
-                    <configuration>
-                        <lifecycleMappingMetadata>
-                            <pluginExecutions>
-                                <pluginExecution>
-                                    <pluginExecutionFilter>
-                                        <groupId>org.apache.felix</groupId>
-                                        
<artifactId>maven-scr-plugin</artifactId>
-                                        <version>${maven.scr.version}</version>
-                                        <goals>
-                                            <goal>scr</goal>
-                                        </goals>
-                                    </pluginExecutionFilter>
-                                    <action>
-                                        <execute/>
-                                    </action>
-                                </pluginExecution>
-                            </pluginExecutions>
-                        </lifecycleMappingMetadata>
-                    </configuration>
-                </plugin>
-            </plugins>
-        </pluginManagement>
-    </build>
-</project>
\ No newline at end of file
diff --git 
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
 
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
deleted file mode 100644
index 5b50491..0000000
--- 
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.transcribe;
-
-import com.amazonaws.AmazonServiceException;
-import com.amazonaws.SdkClientException;
-import com.amazonaws.auth.AWSStaticCredentialsProvider;
-import com.amazonaws.auth.BasicAWSCredentials;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.model.AmazonS3Exception;
-import com.amazonaws.services.s3.model.CompressionType;
-import com.amazonaws.services.s3.model.ExpressionType;
-import com.amazonaws.services.s3.model.InputSerialization;
-import com.amazonaws.services.s3.model.JSONInput;
-import com.amazonaws.services.s3.model.JSONOutput;
-import com.amazonaws.services.s3.model.JSONType;
-import com.amazonaws.services.s3.model.OutputSerialization;
-import com.amazonaws.services.s3.model.PutObjectRequest;
-import com.amazonaws.services.s3.model.PutObjectResult;
-import com.amazonaws.services.s3.model.SelectObjectContentEvent;
-import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor;
-import com.amazonaws.services.s3.model.SelectObjectContentRequest;
-import com.amazonaws.services.s3.model.SelectObjectContentResult;
-import com.amazonaws.services.transcribe.AmazonTranscribeAsync;
-import com.amazonaws.services.transcribe.AmazonTranscribeAsyncClientBuilder;
-import com.amazonaws.services.transcribe.model.Media;
-import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest;
-import com.amazonaws.services.transcribe.model.TranscriptionJob;
-import com.amazonaws.services.transcribe.model.TranscriptionJobStatus;
-import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest;
-import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult;
-import com.amazonaws.services.transcribe.model.LanguageCode;
-import org.apache.tika.exception.TikaException;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.json.simple.parser.ParseException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Properties;
-import java.util.UUID;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.stream.Collectors;
-
-/**
- * <a href="https://aws.amazon.com/transcribe/";>Amazon Transcribe</a> 
- * {@link Transcriber} implementation. See Javadoc for configiration options.
- *
- * @since Tika 2.1
- */
-public class AmazonTranscribe implements Transcriber {
-
-    public static final String PROPERTIES_FILE = 
"transcribe.amazon.properties";
-    public static final String ID_PROPERTY = "transcribe.AWS_ACCESS_KEY";
-    public static final String SECRET_PROPERTY = "transcribe.AWS_SECRET_KEY";
-    public static final String DEFAULT_ID = "dummy-id";
-    public static final String DEFAULT_SECRET = "dummy-secret";
-    public static final String DEFAULT_BUCKET = "dummy-bucket";
-    public static final String BUCKET_NAME = "transcribe.BUCKET_NAME";
-    public static final String REGION = "transcribe.REGION";
-    private static final Logger LOG = LoggerFactory
-            .getLogger(AmazonTranscribe.class);
-    private AmazonTranscribeAsync amazonTranscribeAsync;
-    private AmazonS3 amazonS3;
-    private String bucketName;
-    private String region;
-    private boolean isAvailable; // Flag for whether or not transcription is
-    // available.
-    private String clientId;
-    private String clientSecret; // Keys used for the API calls.
-    private AWSStaticCredentialsProvider credsProvider;
-
-    /**
-     * Create a new AmazonTranscribe instance with the client keys specified in
-     * <code>transcribe.amazon.properties</code> which needs to be available on
-     * the Java Classpath.
-     * Silently becomes unavailable when client keys are unavailable.
-     * <code>transcribe.AWS_ACCESS_KEY</code>,
-     * <code>transcribe.AWS_SECRET_KEY</code>,
-     * <code>transcribe.BUCKET_NAME</code> and 
-     * <code>transcribe.REGION</code> must be set in
-     * <code>transcribe.amazon.properties</code>.
-     * <b>N.B.</b> it is not necessary to create the bucket before hand. 
-     * This implementation will automatically create the bucket if one
-     * does not alrerady exist, per the name defined above.
-     *
-     * @since Tika 2.0
-     */
-    public AmazonTranscribe() {
-        Properties config = new Properties();
-        try {
-            config.load(AmazonTranscribe.class
-                    .getResourceAsStream(PROPERTIES_FILE));
-            this.clientId = config.getProperty(ID_PROPERTY);
-            this.clientSecret = config.getProperty(SECRET_PROPERTY);
-            this.bucketName = config.getProperty(BUCKET_NAME);
-            this.region = config.getProperty(REGION);
-            BasicAWSCredentials creds = new BasicAWSCredentials(this.clientId,
-                    this.clientSecret);
-            this.credsProvider = new AWSStaticCredentialsProvider(creds);
-            amazonS3 = AmazonS3ClientBuilder.standard()
-                    .withCredentials(credsProvider).withRegion(this.region)
-                    .build();
-            this.isAvailable = checkAvailable();
-            if (!this.amazonS3.doesBucketExistV2(this.bucketName)) {
-                try {
-                    amazonS3.createBucket(this.bucketName);
-                } catch (AmazonS3Exception e) {
-                    throw new RuntimeException(e.getErrorMessage());
-                }
-            }
-            this.amazonTranscribeAsync = AmazonTranscribeAsyncClientBuilder
-                    .standard().withCredentials(credsProvider)
-                    .withRegion(this.region).build();
-        } catch (Exception e) {
-            LOG.warn("Exception reading config file", e);
-            isAvailable = false;
-        }
-    }
-
-    /**
-     * private method to get a unique job key.
-     *
-     * @return unique job key.
-     */
-    private String getJobKey() {
-        return UUID.randomUUID().toString();
-    }
-
-    /**
-     * Constructs a new {@link PutObjectRequest} object to upload a file to the
-     * specified bucket and jobName. After constructing the request, users may
-     * optionally specify object metadata or a canned ACL as well.
-     *
-     * @param inputStream, null
-     *            The file to upload to Amazon S3.
-     * @param jobName
-     *            The unique job name for each job(UUID).
-     */
-    private void uploadFileToBucket(InputStream inputStream, String jobName)
-            throws TikaException {
-        PutObjectRequest request = new PutObjectRequest(this.bucketName,
-                jobName, inputStream, null);
-        try {
-            @SuppressWarnings("unused")
-            PutObjectResult response = amazonS3.putObject(request);
-        } catch (SdkClientException e) {
-            throw (new TikaException("File Upload to AWS Failed"));
-        }
-    }
-
-    /**
-     * Starts AWS Transcribe Job without language specification.
-     *
-     * @param inputStream
-     *            the source input stream.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException
-     *             When there is an error transcribing.
-     * @throws IOException
-     *             If an I/O exception of some sort has occurred.
-     */
-    @Override
-    public String transcribe(InputStream inputStream)
-            throws TikaException, IOException {
-        if (!isAvailable())
-            return null;
-        String jobName = getJobKey();
-        uploadFileToBucket(inputStream, jobName);
-        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
-        Media media = new Media();
-        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
-        
startTranscriptionJobRequest.withIdentifyLanguage(true).withMedia(media)
-        .withOutputBucketName(this.bucketName)
-        .withTranscriptionJobName(jobName)
-        .setRequestCredentialsProvider(credsProvider);
-        amazonTranscribeAsync
-        .startTranscriptionJob(startTranscriptionJobRequest);
-        return getTranscriptText(jobName);
-    }
-
-    /**
-     * Starts AWS Transcribe Job with language specification.
-     *
-     * @param inputStream
-     *            the source input stream.
-     * @param sourceLanguage
-     *            <a href=
-     *            
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
-     *            Language Code</a> for the language used in the input media
-     *            file.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException
-     *             When there is an error transcribing.
-     * @throws IOException
-     *             If an I/O exception of some sort has occurred.
-     * @see <a href=
-     *      
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
-     *      Language Code</a>
-     */
-    @Override
-    public String transcribe(InputStream inputStream, String sourceLanguage)
-            throws TikaException, IOException {
-        if (!isAvailable())
-            return null;
-        String jobName = getJobKey();
-        uploadFileToBucket(inputStream, jobName);
-        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
-        Media media = new Media();
-        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
-        ((StartTranscriptionJobRequest) startTranscriptionJobRequest
-                .withMedia(media).withOutputBucketName(this.bucketName)
-                .withTranscriptionJobName(jobName)
-                .withRequestCredentialsProvider(credsProvider))
-        .withLanguageCode(
-                LanguageCode.fromValue(sourceLanguage));
-        amazonTranscribeAsync
-        .startTranscriptionJob(startTranscriptionJobRequest);
-        return getTranscriptText(jobName);
-    }
-
-    /**
-     * @return true if this Transcriber is probably able to transcribe right
-     *         now.
-     * @since Tika 2.1
-     */
-    @Override
-    public boolean isAvailable() {
-        return this.isAvailable;
-    }
-
-    /**
-     * Sets the client Id for the transcriber API.
-     *
-     * @param id
-     *            The ID to set.
-     */
-    public void setId(String id) {
-        this.clientId = id;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Sets the client secret for the transcriber API.
-     *
-     * @param secret
-     *            The secret to set.
-     */
-    public void setSecret(String secret) {
-        this.clientSecret = secret;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Sets the client secret for the transcriber API.
-     *
-     * @param bucket
-     *            The bucket to set.
-     */
-    public void setBucket(String bucket) {
-        this.bucketName = bucket;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Private method check if the service is available.
-     *
-     * @return if the service is available
-     */
-    private boolean checkAvailable() {
-        return clientId != null && !clientId.equals(DEFAULT_ID)
-                && clientSecret != null && !clientSecret.equals(DEFAULT_SECRET)
-                && bucketName != null && !bucketName.equals(DEFAULT_BUCKET);
-    }
-
-    /**
-     * Gets Transcription result from AWS S3 bucket given the jobName.
-     *
-     * @param fileNameS3
-     *            The path of the file to upload to Amazon S3.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws IOException possible reasons include (i) an End Event is not 
received
-     * from AWS S3 SelectObjectContentResult operation and (ii) a parse 
exception
-     * whilst processing JSON from the AWS S3 SelectObjectContentResult 
operation.
-     * @throws SdkClientException a AWS-specific exception related to 
SelectObjectContentResult
-     * operation.
-     * @throws AmazonServiceException possibly thrown if there is an issue 
selecting object content
-     * from AWS S3 objects.
-     */
-    private String getTranscriptText(String fileNameS3) throws 
AmazonServiceException, SdkClientException, IOException {
-        TranscriptionJob transcriptionJob = retrieveObjectWhenJobCompleted(
-                fileNameS3);
-        String text = null;
-        if (transcriptionJob != null && !TranscriptionJobStatus.FAILED.name()
-                .equals(transcriptionJob.getTranscriptionJobStatus())) {
-            InputSerialization inputSerialization = new 
InputSerialization().withJson(new JSONInput().withType(JSONType.DOCUMENT))
-                    .withCompressionType(CompressionType.NONE);
-            OutputSerialization outputSerialization = new 
OutputSerialization().withJson(new JSONOutput());
-            SelectObjectContentRequest request = new 
SelectObjectContentRequest()
-                    .withBucketName(this.bucketName).withKey(fileNameS3 + 
".json")
-                    .withExpression("Select 
s.results.transcripts[0].transcript from S3Object s")//WHERE transcript IS NOT 
MISSING
-                    
.withExpressionType(ExpressionType.SQL).withRequestCredentialsProvider(credsProvider);
-            request.setInputSerialization(inputSerialization);
-            request.setOutputSerialization(outputSerialization);
-
-            final AtomicBoolean isResultComplete = new AtomicBoolean(false);
-
-            try (SelectObjectContentResult result = amazonS3
-                    .selectObjectContent(request)) {
-                InputStream resultInputStream = result.getPayload()
-                        .getRecordsInputStream(
-                                new SelectObjectContentEventVisitor() {
-                                    @Override
-                                    public void visit(
-                                            
SelectObjectContentEvent.StatsEvent event) {
-                                        LOG.debug(
-                                                "Received Stats, Bytes 
Scanned: "
-                                                        + event.getDetails()
-                                                        .getBytesScanned()
-                                                        + " Bytes Processed: "
-                                                        + event.getDetails()
-                                                        .getBytesProcessed());
-                                    }
-
-                                    /*
-                                     * An End Event informs that the request 
has
-                                     * finished successfully.
-                                     */
-                                    @Override
-                                    public void visit(
-                                            SelectObjectContentEvent.EndEvent 
event) {
-                                        isResultComplete.set(true);
-                                        LOG.debug(
-                                                "Received End Event. Result is 
complete.");
-                                    }
-                                });
-                text = new BufferedReader(
-                        new InputStreamReader(resultInputStream, 
StandardCharsets.UTF_8))
-                        .lines()
-                        .collect(Collectors.joining("\n"));
-            }
-            /*
-             * The End Event indicates all matching records have been
-             * transmitted. If the End Event is not received, the results
-             * may be incomplete.
-             */
-            if (!isResultComplete.get()) {
-                throw new IOException(
-                        "S3 Select request was incomplete as End Event was not 
received.");
-            }
-        }
-        JSONParser parser = new JSONParser();
-        JSONObject obj = null;
-        try {
-            obj = (JSONObject) parser.parse(text);
-        } catch (ParseException e) {
-            throw new IOException(e.getMessage(), e);
-        }
-        return obj.get("transcript").toString();
-    }
-
-    /**
-     * Private helper function to get object from s3.
-     *
-     * @param jobName
-     *            The unique job name for each job(UUID).
-     * @return TranscriptionJob object
-     */
-    private TranscriptionJob retrieveObjectWhenJobCompleted(String jobName) {
-        GetTranscriptionJobRequest getTranscriptionJobRequest = new 
GetTranscriptionJobRequest();
-        getTranscriptionJobRequest
-        .withRequestCredentialsProvider(credsProvider);
-        getTranscriptionJobRequest.setTranscriptionJobName(jobName);
-        while (true) {
-            GetTranscriptionJobResult innerResult = amazonTranscribeAsync
-                    .getTranscriptionJob(getTranscriptionJobRequest);
-            String status = innerResult.getTranscriptionJob()
-                    .getTranscriptionJobStatus();
-            if (TranscriptionJobStatus.COMPLETED.name().equals(status)
-                    || TranscriptionJobStatus.FAILED.name().equals(status)) {
-                return innerResult.getTranscriptionJob();
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git 
a/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
 
b/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
deleted file mode 100644
index 1256ab6..0000000
--- 
a/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
+++ /dev/null
@@ -1,16 +0,0 @@
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-org.apache.tika.language.translate.amazontranscribe
diff --git 
a/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
 
b/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
deleted file mode 100644
index 043a66f..0000000
--- 
a/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transcribe.AWS_ACCESS_KEY=dummy_key
-transcribe.AWS_SECRET_KEY=dummy_key
-transcribe.BUCKET_NAME=dummy_name
diff --git 
a/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
 
b/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
deleted file mode 100644
index 3b424f9..0000000
--- 
a/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.transcribe;
-
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import java.io.FileInputStream;
-
-import static junit.framework.TestCase.assertNotNull;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-//TODO: Check the ACTUAL output of Amazon Transcribe.
-
-/**
- * Tests tika-trancribe by creating an AmazonTranscribe() object.
- * 1) Tests that transcribe functions properly when it is given just a 
filepath.
- * 2) Both audio (mp3) and video (mp4) files are used in these tests.
- */
-@Ignore("Ignore until finalize AmazonTransribe Interface & build Tika")
-public class AmazonTranscribeTest {
-    AmazonTranscribe transcriber;
-
-    @Before
-    public void setUp() {
-        transcriber = new AmazonTranscribe();
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enUS() {
-        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-US");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enUS() {
-        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeVideo_enUS() {
-        String videoFilePath = "en-US_(Hi).mp4";
-        String expected = "Hi";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(videoFilePath), "en-US");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with a video file without passing in the source 
language.
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownVideo_enUS() {
-        String videoFilePath = "en-US_(Hi).mp4";
-        String expected = "Hi";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-GB (English - Great Britain)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enGB() {
-        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-GB");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-GB (English - Great Britain)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enGB() {
-        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-AU (English - Australia)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enAU() {
-        String source = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(source), 
"en-AU");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-AU (English - Australian)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enAU() {
-        String videoFilePath = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is de-DE (German)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_deDE() {
-        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
-        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "de-DE");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is de-DE (German)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_deDE() {
-        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
-        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is it-IT (Italian)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_itIT() {
-        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
-        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "it-IT");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is it-IT (Italian)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_itIT() {
-        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
-        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is ja-JP (Japanese)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_jaJP() {
-        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
-        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ja-JP");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is ja-JP (Japanese)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_jaJP() {
-        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
-        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_koKR() {
-        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
-        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ko-KR");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_koKR() {
-        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
-        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with a video file given the source language
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeVideo_koKR() {
-        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
-        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
-        String expected = "Annyeonghaseyo";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(source), 
"ko-KR");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an video file without passing in the source 
language.
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownVideo_koKR() {
-        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
-        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
-        String expected = "Annyeonghaseyo";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(source));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is pt-BR (Portuguese - Brazil)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_ptBR() {
-        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
-        String expected = "nós estamos na escola.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "pt-BR");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is pt-BR (Portuguese - Brazil)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_ptBR() {
-        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
-        String expected = "nós estamos na escola.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-}
diff --git a/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3 
b/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3
deleted file mode 100644
index a718047..0000000
Binary files a/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3 
and /dev/null differ
diff --git a/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3 
b/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3
deleted file mode 100644
index 9d4df04..0000000
Binary files 
a/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3 and 
/dev/null differ
diff --git 
a/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3 
b/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3
deleted file mode 100644
index 16f840d..0000000
Binary files 
a/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3 and 
/dev/null differ
diff --git 
a/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3 
b/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3
deleted file mode 100644
index 2c6ae35..0000000
Binary files 
a/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3 and 
/dev/null differ
diff --git 
a/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3 
b/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3
deleted file mode 100644
index 3d69b68..0000000
Binary files 
a/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3 and 
/dev/null differ
diff --git a/tika-transcribe/src/test/resources/en-US_(Hi).mp4 
b/tika-transcribe/src/test/resources/en-US_(Hi).mp4
deleted file mode 100644
index d697b13..0000000
Binary files a/tika-transcribe/src/test/resources/en-US_(Hi).mp4 and /dev/null 
differ
diff --git 
a/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3 
b/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3
deleted file mode 100644
index 5fa69c3..0000000
Binary files 
a/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3 and 
/dev/null differ
diff --git a/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3 
b/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3
deleted file mode 100644
index 5ddf6e5..0000000
Binary files a/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3 
and /dev/null differ
diff --git a/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4 
b/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4
deleted file mode 100644
index d757d42..0000000
Binary files a/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4 
and /dev/null differ
diff --git 
a/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3 
b/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3
deleted file mode 100644
index 444098c..0000000
Binary files 
a/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3 and 
/dev/null differ
diff --git a/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3 
b/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3
deleted file mode 100644
index 7dfc811..0000000
Binary files a/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3 
and /dev/null differ

Reply via email to