This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 2c951a3  TIKA-3384 -- convert transcribe to a traditional parser
2c951a3 is described below

commit 2c951a35e57cf6624457798d51c1b8cbffff0f7b
Author: tallison <[email protected]>
AuthorDate: Tue May 18 07:44:18 2021 -0400

    TIKA-3384 -- convert transcribe to a traditional parser
---
 pom.xml                                            |   1 -
 .../org/apache/tika/transcribe/Transcriber.java    |  60 ---
 .../tika/example/TranscribeTranslateExample.java   |  83 ++--
 tika-parsers/tika-parsers-ml/pom.xml               |   1 +
 .../tika-parsers-ml/tika-transcribe-aws}/pom.xml   |  47 +-
 .../parser/transcribe/aws/AmazonTranscribe.java    | 398 ++++++++++++++++
 .../transcribe/aws/AmazonTranscribeTest.java       | 310 ++++++++++++
 .../test-documents}/ShortAudioSampleFrench.mp3     | Bin
 .../de-DE_(We_Are_At_School_x2).mp3                | Bin
 .../en-AU_(A_Little_Bottle_Of_Water).mp3           | Bin
 .../en-GB_(A_Little_Bottle_Of_Water).mp3           | Bin
 .../en-US_(A_Little_Bottle_Of_Water).mp3           | Bin
 .../test/resources/test-documents}/en-US_(Hi).mp4  | Bin
 .../it-IT_(We_Are_Having_Class_x2).mp3             | Bin
 .../test-documents}/ja-JP_(We_Are_At_School).mp3   | Bin
 .../test-documents}/ko-KR_(Annyeonghaseyo).mp4     | Bin
 .../ko-KR_(We_Are_Having_Class_x2).mp3             | Bin
 .../test-documents}/pt-BR_(We_Are_At_School).mp3   | Bin
 .../test/resources/tika-config-transcribe-aws.xml  |  32 ++
 .../apache/tika/transcribe/AmazonTranscribe.java   | 406 ----------------
 .../org.apache.tika.language.translate.Translator  |  16 -
 .../transcribe.amazon.properties                   |  18 -
 .../tika/transcribe/AmazonTranscribeTest.java      | 527 ---------------------
 23 files changed, 830 insertions(+), 1069 deletions(-)

diff --git a/pom.xml b/pom.xml
index f8c6591..d0e43d4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -52,7 +52,6 @@
     <module>tika-translate</module>
     <module>tika-example</module>
     <module>tika-java7</module>
-    <module>tika-transcribe</module>
   </modules>
 
   <profiles>
diff --git 
a/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java 
b/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
deleted file mode 100644
index 3546256..0000000
--- a/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.transcribe;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.tika.exception.TikaException;
-
-/**
- * Interface for Transcriber services.
- *
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-94";>TIKA-94</a>
- * @since Tika 2.1
- */
-public interface Transcriber {
-    /**
-     * Transcribe the given file.
-     *
-     * @param inputStream the source input stream.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException When there is an error transcribing.
-     * @throws IOException   If an I/O exception of some sort has occurred.
-     * @since 2.1
-     */
-    public String transcribe(InputStream inputStream) throws TikaException, 
IOException;
-
-    /**
-     * Transcribe the given the file and the source language.
-     *
-     * @param inputStream    the source input stream.
-     * @param sourceLanguage The language code for the language used in the 
input media file.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException When there is an error transcribing.
-     * @throws IOException   If an I/O exception of some sort has occurred.
-     * @since 2.1
-     */
-    public String transcribe(InputStream inputStream, String sourceLanguage) 
throws TikaException, IOException;
-
-    /**
-     * @return true if this Transcriber is probably able to transcribe right 
now.
-     * @since Tika 2.1
-     */
-    public boolean isAvailable();
-}
diff --git 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
index 12dd7e5..a90d322 100644
--- 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
+++ 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
@@ -17,22 +17,23 @@
 
 package org.apache.tika.example;
 
-import java.io.FileInputStream;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.language.translate.GoogleTranslator;
 import org.apache.tika.language.translate.Translator;
-import org.apache.tika.transcribe.AmazonTranscribe;
-import org.apache.tika.transcribe.Transcriber;
 
 /**
  * This example demonstrates primitive logic for
  * chaining Tika API calls. In this case translation
- * could be considered as a downstream process to 
+ * could be considered as a downstream process to
  * transcription.
  * We simply pass the output of
- * a call to {@link Transcriber#transcribe(java.io.InputStream)}
- * into {@link Translator#translate(String, String)}. 
- * The {@link GoogleTranslator} is configured with a target 
+ * a call to {@link Tika#parseToString(Path)}
+ * into {@link Translator#translate(String, String)}.
+ * The {@link GoogleTranslator} is configured with a target
  * language of "en-US".
  * @author lewismc
  *
@@ -42,7 +43,7 @@ public class TranscribeTranslateExample {
     /**
      * Use {@link GoogleTranslator} to execute translation on
      * input data. This implementation needs configured as explained in the 
Javadoc.
-     * In this implementation, Google will try to guess the input language. 
The target 
+     * In this implementation, Google will try to guess the input language. 
The target
      * language is "en-US".
      * @param text input text to translate.
      * @return translated text String.
@@ -61,43 +62,55 @@ public class TranscribeTranslateExample {
     }
 
     /**
-     * Use {@link AmazonTranscribe} to execute transcription on input data.
-     * This implementation needs configured as explained in the Javadoc.
+     * Use {@link org.apache.tika.parser.transcribe.aws.AmazonTranscribe} to 
execute transcription
+     * on input data.
+     * This implementation needs to be configured as explained in the Javadoc.
      * @param file the name of the file (which needs to be on the Java 
Classpath) to transcribe.
      * @return transcribed text.
      */
-    public static String amazonTranscribe(String file) {
-        String filePath = 
TranscribeTranslateExample.class.getClassLoader().getResource(file).getPath();
-        String result = null;
-        Transcriber transcriber = new AmazonTranscribe();
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(filePath));
-            } catch (Exception e) {
-                e.printStackTrace();
-            }
-        }
-        return result;
+    public static String amazonTranscribe(Path tikaConfig, Path file) throws 
Exception {
+        return new Tika(new TikaConfig(tikaConfig)).parseToString(file);
     }
 
     /**
      * Main method to run this example. This program can be invoked as follows
      * <ol>
-     * <li><code>transcribe-translate ${file}</code>; which executes both 
-     * transcription then translation on the given resource, or 
-     * <li><code>transcribe ${file}</code>; which executes only 
translation</li>
-     * @param args either of the commands described above and the input file 
-     * (which needs to be on the Java Classpath). 
+     * <li><code>transcribe-translate ${tika-config.xml} ${file}</code>; which 
executes both
+     * transcription then translation on the given resource, or
+     * <li><code>transcribe ${tika-config.xml} ${file}</code>; which executes 
only translation</li>
+     * @param args either of the commands described above and the input file
+     * (which needs to be on the Java Classpath).
+     *
+     *
+     *
+     * ${tika-config.xml} must include credentials for aws and a temporary 
storage bucket:
+     * <pre>
+     * {@code
+     *  <properties>
+     *   <parsers>
+     *     <parser class="org.apache.tika.parser.DefaultParser"/>
+     *     <parser 
class="org.apache.tika.parser.transcribe.aws.AmazonTranscribe">
+     *       <params>
+     *         <param name="bucket" type="string">bucket</param>
+     *         <param name="clientId" type="string">clientId</param>
+     *         <param name="clientSecret" type="string">clientSecret</param>
+     *       </params>
+     *     </parser>
+     *   </parsers>
+     * </properties>
+     * }
+     * </pre>
      */
-    public static void main (String[] args) {
+    public static void main (String[] args) throws Exception {
         String text = null;
-        if (args.length != 0) {
-            if ("transcribe-translate".equals(args[0])) {
-                text = googleTranslateToEnglish(amazonTranscribe(args[1]));
-                System.out.print("Transcription and translation 
successful!\nEXTRAXCTED TEXT: " + text);
-            } else if ("transcribe".equals(args[0])) {
-                text = amazonTranscribe(args[1]);
-                System.out.print("Transcription successful!\nEXTRAXCTED TEXT: 
" + text);
+        if (args.length > 1) {
+            if ("transcribe-translate".equals(args[1])) {
+                text = 
googleTranslateToEnglish(amazonTranscribe(Paths.get(args[0]),
+                        Paths.get(args[1])));
+                System.out.print("Transcription and translation 
successful!\nEXTRACTED TEXT: " + text);
+            } else if ("transcribe".equals(args[1])) {
+                text = amazonTranscribe(Paths.get(args[0]), 
Paths.get(args[1]));
+                System.out.print("Transcription successful!\nEXTRACTED TEXT: " 
+ text);
             } else {
                 System.out.print("Incorrect invocation, see Javadoc.");
             }
diff --git a/tika-parsers/tika-parsers-ml/pom.xml 
b/tika-parsers/tika-parsers-ml/pom.xml
index ba9bd38..2dcde9e 100644
--- a/tika-parsers/tika-parsers-ml/pom.xml
+++ b/tika-parsers/tika-parsers-ml/pom.xml
@@ -40,6 +40,7 @@
     <module>tika-age-recogniser</module>
     <module>tika-parser-advancedmedia-module</module>
     <module>tika-dl</module>
+    <module>tika-transcribe-aws</module>
   </modules>
 
   <build>
diff --git a/tika-transcribe/pom.xml 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/pom.xml
similarity index 78%
rename from tika-transcribe/pom.xml
rename to tika-parsers/tika-parsers-ml/tika-transcribe-aws/pom.xml
index aadb137..1e287c5 100644
--- a/tika-transcribe/pom.xml
+++ b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/pom.xml
@@ -25,20 +25,19 @@
     <modelVersion>4.0.0</modelVersion>
 
     <parent>
+        <artifactId>tika-parsers-ml</artifactId>
         <groupId>org.apache.tika</groupId>
-        <artifactId>tika-parent</artifactId>
         <version>2.0.0-SNAPSHOT</version>
-        <relativePath>../tika-parent/pom.xml</relativePath>
     </parent>
 
-    <artifactId>tika-transcribe</artifactId>
+    <artifactId>tika-transcribe-aws</artifactId>
     <packaging>bundle</packaging>
-    <name>Apache Tika transcribe</name>
+    <name>Apache Tika transcribe aws</name>
     <url>http://tika.apache.org/</url>
     <!--TODO use latest aws version or the one defined in the tika-parent-->
     <dependencies>
         <dependency>
-            <groupId>org.apache.tika</groupId>
+            <groupId>${project.groupId}</groupId>
             <artifactId>tika-core</artifactId>
             <version>${project.version}</version>
         </dependency>
@@ -55,9 +54,37 @@
                     <groupId>commons-codec</groupId>
                     <artifactId>commons-codec</artifactId>
                 </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
         <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+            <version>${jackson.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>${jackson.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+            <version>${commons.logging.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>${commons.codec.version}</version>
+        </dependency>
+        <dependency>
             <groupId>com.amazonaws</groupId>
             <artifactId>aws-java-sdk-s3</artifactId>
             <version>${aws.version}</version>
@@ -71,6 +98,14 @@
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+            <type>test-jar</type>
         </dependency>
     </dependencies>
     <build>
@@ -111,7 +146,7 @@
                 <configuration>
                     <archive>
                         <manifestEntries>
-                            
<Automatic-Module-Name>org.apache.tika.translate</Automatic-Module-Name>
+                            
<Automatic-Module-Name>org.apache.tika.parser.transcribe.aws</Automatic-Module-Name>
                         </manifestEntries>
                     </archive>
                 </configuration>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/main/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribe.java
 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/main/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribe.java
new file mode 100644
index 0000000..91e8452
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/main/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribe.java
@@ -0,0 +1,398 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.transcribe.aws;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.AWSStaticCredentialsProvider;
+import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.AmazonS3Exception;
+import com.amazonaws.services.s3.model.CompressionType;
+import com.amazonaws.services.s3.model.ExpressionType;
+import com.amazonaws.services.s3.model.InputSerialization;
+import com.amazonaws.services.s3.model.JSONInput;
+import com.amazonaws.services.s3.model.JSONOutput;
+import com.amazonaws.services.s3.model.JSONType;
+import com.amazonaws.services.s3.model.OutputSerialization;
+import com.amazonaws.services.s3.model.PutObjectRequest;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.SelectObjectContentEvent;
+import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor;
+import com.amazonaws.services.s3.model.SelectObjectContentRequest;
+import com.amazonaws.services.s3.model.SelectObjectContentResult;
+import com.amazonaws.services.transcribe.AmazonTranscribeAsync;
+import com.amazonaws.services.transcribe.AmazonTranscribeAsyncClientBuilder;
+import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest;
+import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult;
+import com.amazonaws.services.transcribe.model.LanguageCode;
+import com.amazonaws.services.transcribe.model.Media;
+import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest;
+import com.amazonaws.services.transcribe.model.TranscriptionJob;
+import com.amazonaws.services.transcribe.model.TranscriptionJobStatus;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+/**
+ * <a href="https://aws.amazon.com/transcribe/";>Amazon Transcribe</a>
+ * implementation. See Javadoc for configuration options.
+ * <p>
+ * Silently becomes unavailable when client keys are unavailable.
+ *
+ * <b>N.B.</b> it is not necessary to create the bucket before hand.
+ * This implementation will automatically create the bucket if one
+ * does not already exist, per the name defined above.
+ *
+ * @since Tika 2.0
+ */
+
+public class AmazonTranscribe extends AbstractParser implements Initializable {
+    private static final Logger LOG = 
LoggerFactory.getLogger(AmazonTranscribe.class);
+    private AmazonTranscribeAsync amazonTranscribeAsync;
+    private AmazonS3 amazonS3;
+    private String bucketName;
+    private String region;
+    private boolean isAvailable; // Flag for whether or not transcription is
+    // available.
+    private String clientId;
+    private String clientSecret; // Keys used for the API calls.
+    private AWSStaticCredentialsProvider credsProvider;
+
+    //https://docs.aws.amazon.com/transcribe/latest/dg/input.html
+    protected static final Set<MediaType> SUPPORTED_TYPES = 
Collections.unmodifiableSet(
+            new HashSet<>(Arrays.asList(MediaType.audio("x-flac"), 
MediaType.audio("mp3"),
+                    MediaType.audio("mpeg"), MediaType.video("ogg"), 
MediaType.audio("vnd.wave"),
+                    MediaType.audio("mp4"), MediaType.video("mp4"), 
MediaType.application("mp4"),
+                    MediaType.video("quicktime"))));
+
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        if (!isAvailable) {
+            return Collections.EMPTY_SET;
+        }
+        return SUPPORTED_TYPES;
+    }
+
+    /**
+     * Starts AWS Transcribe Job with language specification.
+     *
+     * @param stream   the source input stream.
+     * @param handler  handler to use
+     * @param metadata
+     * @param context  -- set the {@link LanguageCode} in the ParseContext if 
known
+     * @throws TikaException When there is an error transcribing.
+     * @throws IOException   If an I/O exception of some sort has occurred.
+     * @see <a href=
+     * 
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
+     * Language Code</a>
+     */
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata,
+                      ParseContext context) throws IOException, SAXException, 
TikaException {
+
+        if (!isAvailable) {
+            return;
+        }
+        String jobName = getJobKey();
+        LanguageCode languageCode = context.get(LanguageCode.class);
+        uploadFileToBucket(stream, jobName);
+        StartTranscriptionJobRequest startTranscriptionJobRequest =
+                new StartTranscriptionJobRequest();
+        Media media = new Media();
+        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
+        
startTranscriptionJobRequest.withMedia(media).withOutputBucketName(this.bucketName)
+                
.withTranscriptionJobName(jobName).setRequestCredentialsProvider(credsProvider);
+
+        if (languageCode != null) {
+            startTranscriptionJobRequest.withLanguageCode(languageCode);
+        } else {
+            startTranscriptionJobRequest.withIdentifyLanguage(true);
+        }
+        
amazonTranscribeAsync.startTranscriptionJob(startTranscriptionJobRequest);
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        String text = getTranscriptText(jobName);
+        xhtml.startElement("p");
+        xhtml.characters(text);
+        xhtml.endElement("p");
+        xhtml.endDocument();
+
+    }
+
+
+    /**
+     * @return true if this Transcriber is probably able to transcribe right
+     * now.
+     * @since Tika 2.1
+     */
+    public boolean isAvailable() {
+        return this.isAvailable;
+    }
+
+    /**
+     * Sets the client Id for the transcriber API.
+     *
+     * @param id The ID to set.
+     */
+    @Field
+    public void setClientId(String id) {
+        this.clientId = id;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Sets the client secret for the transcriber API.
+     *
+     * @param secret The secret to set.
+     */
+    @Field
+    public void setClientSecret(String secret) {
+        this.clientSecret = secret;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Sets the client secret for the transcriber API.
+     *
+     * @param bucket The bucket to set.
+     */
+    @Field
+    public void setBucket(String bucket) {
+        this.bucketName = bucket;
+        this.isAvailable = checkAvailable();
+    }
+
+    @Field
+    public void setRegion(String region) {
+        this.region = region;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Private method check if the service is available.
+     *
+     * @return if the service is available
+     */
+    private boolean checkAvailable() {
+        return clientId != null && clientSecret != null && bucketName != null;
+    }
+
+    /**
+     * private method to get a unique job key.
+     *
+     * @return unique job key.
+     */
+    private String getJobKey() {
+        return UUID.randomUUID().toString();
+    }
+
+    /**
+     * Constructs a new {@link PutObjectRequest} object to upload a file to the
+     * specified bucket and jobName. After constructing the request, users may
+     * optionally specify object metadata or a canned ACL as well.
+     *
+     * @param inputStream, null
+     *                     The file to upload to Amazon S3.
+     * @param jobName      The unique job name for each job(UUID).
+     */
+    private void uploadFileToBucket(InputStream inputStream, String jobName) 
throws TikaException {
+        PutObjectRequest request =
+                new PutObjectRequest(this.bucketName, jobName, inputStream, 
null);
+        try {
+            @SuppressWarnings("unused") PutObjectResult response = 
amazonS3.putObject(request);
+        } catch (SdkClientException e) {
+            throw (new TikaException("File Upload to AWS Failed"));
+        }
+    }
+
+    /**
+     * Gets Transcription result from AWS S3 bucket given the jobName.
+     *
+     * @param fileNameS3 The path of the file to upload to Amazon S3.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws IOException            possible reasons include (i) an End 
Event is not received
+     *                                from AWS S3 SelectObjectContentResult 
operation and (ii) a parse exception
+     *                                whilst processing JSON from the AWS S3 
SelectObjectContentResult operation.
+     * @throws SdkClientException     a AWS-specific exception related to 
SelectObjectContentResult
+     *                                operation.
+     * @throws AmazonServiceException possibly thrown if there is an issue 
selecting object content
+     *                                from AWS S3 objects.
+     */
+    private String getTranscriptText(String fileNameS3)
+            throws AmazonServiceException, SdkClientException, IOException {
+        TranscriptionJob transcriptionJob = 
retrieveObjectWhenJobCompleted(fileNameS3);
+        String text = null;
+        if (transcriptionJob != null && !TranscriptionJobStatus.FAILED.name()
+                .equals(transcriptionJob.getTranscriptionJobStatus())) {
+            InputSerialization inputSerialization =
+                    new InputSerialization().withJson(new 
JSONInput().withType(JSONType.DOCUMENT))
+                            .withCompressionType(CompressionType.NONE);
+            OutputSerialization outputSerialization =
+                    new OutputSerialization().withJson(new JSONOutput());
+            SelectObjectContentRequest request =
+                    new 
SelectObjectContentRequest().withBucketName(this.bucketName)
+                            .withKey(fileNameS3 + ".json").withExpression(
+                            "Select s.results.transcripts[0].transcript from 
S3Object s")
+                            //WHERE transcript IS NOT MISSING
+                            .withExpressionType(ExpressionType.SQL)
+                            .withRequestCredentialsProvider(credsProvider);
+            request.setInputSerialization(inputSerialization);
+            request.setOutputSerialization(outputSerialization);
+
+            final AtomicBoolean isResultComplete = new AtomicBoolean(false);
+
+            try (SelectObjectContentResult result = 
amazonS3.selectObjectContent(request)) {
+                InputStream resultInputStream = result.getPayload()
+                        .getRecordsInputStream(new 
SelectObjectContentEventVisitor() {
+                            @Override
+                            public void 
visit(SelectObjectContentEvent.StatsEvent event) {
+                                LOG.debug("Received Stats, Bytes Scanned: " +
+                                        event.getDetails().getBytesScanned() +
+                                        " Bytes Processed: " +
+                                        
event.getDetails().getBytesProcessed());
+                            }
+
+                            /*
+                             * An End Event informs that the request has
+                             * finished successfully.
+                             */
+                            @Override
+                            public void 
visit(SelectObjectContentEvent.EndEvent event) {
+                                isResultComplete.set(true);
+                                LOG.debug("Received End Event. Result is 
complete.");
+                            }
+                        });
+                text = new BufferedReader(
+                        new InputStreamReader(resultInputStream, 
StandardCharsets.UTF_8)).lines()
+                        .collect(Collectors.joining("\n"));
+            }
+            /*
+             * The End Event indicates all matching records have been
+             * transmitted. If the End Event is not received, the results
+             * may be incomplete.
+             */
+            if (!isResultComplete.get()) {
+                throw new IOException(
+                        "S3 Select request was incomplete as End Event was not 
received.");
+            }
+        }
+        JSONParser parser = new JSONParser();
+        JSONObject obj = null;
+        try {
+            obj = (JSONObject) parser.parse(text);
+        } catch (ParseException e) {
+            throw new IOException(e.getMessage(), e);
+        }
+        return obj.get("transcript").toString();
+    }
+
+    /**
+     * Private helper function to get object from s3.
+     *
+     * @param jobName The unique job name for each job(UUID).
+     * @return TranscriptionJob object
+     */
+    private TranscriptionJob retrieveObjectWhenJobCompleted(String jobName) {
+        GetTranscriptionJobRequest getTranscriptionJobRequest = new 
GetTranscriptionJobRequest();
+        
getTranscriptionJobRequest.withRequestCredentialsProvider(credsProvider);
+        getTranscriptionJobRequest.setTranscriptionJobName(jobName);
+        while (true) {
+            GetTranscriptionJobResult innerResult =
+                    
amazonTranscribeAsync.getTranscriptionJob(getTranscriptionJobRequest);
+            String status = 
innerResult.getTranscriptionJob().getTranscriptionJobStatus();
+            if (TranscriptionJobStatus.COMPLETED.name().equals(status) ||
+                    TranscriptionJobStatus.FAILED.name().equals(status)) {
+                return innerResult.getTranscriptionJob();
+            }
+        }
+    }
+
+    @Override
+    public void initialize(Map<String, Param> params) throws 
TikaConfigException {
+        if (!checkAvailable()) {
+            return;
+        }
+
+        try {
+            BasicAWSCredentials creds = new BasicAWSCredentials(this.clientId, 
this.clientSecret);
+            this.credsProvider = new AWSStaticCredentialsProvider(creds);
+            if (region != null) {
+                this.amazonS3 = 
AmazonS3ClientBuilder.standard().withCredentials(credsProvider)
+                        .withRegion(this.region).build();
+            } else {
+                this.amazonS3 =
+                        
AmazonS3ClientBuilder.standard().withCredentials(credsProvider).build();
+
+            }
+            if (!this.amazonS3.doesBucketExistV2(this.bucketName)) {
+                try {
+                    amazonS3.createBucket(this.bucketName);
+                } catch (AmazonS3Exception e) {
+                    throw new TikaConfigException("couldn't create bucket", e);
+                }
+            }
+            this.amazonTranscribeAsync =
+                    
AmazonTranscribeAsyncClientBuilder.standard().withCredentials(credsProvider)
+                            .withRegion(this.region).build();
+        } catch (Exception e) {
+            LOG.warn("Exception reading config file", e);
+            isAvailable = false;
+        }
+
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+        //TODO alert user if they've gotten 1 or 2 out of three?
+        this.isAvailable = checkAvailable();
+    }
+}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribeTest.java
 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribeTest.java
new file mode 100644
index 0000000..be4f76a
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribeTest.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.transcribe.aws;
+
+import java.io.InputStream;
+
+import com.amazonaws.services.transcribe.model.LanguageCode;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+
+//TODO: Check the ACTUAL output of Amazon Transcribe.
+
+/**
+ * Tests tika-trancribe by creating an AmazonTranscribe() object.
+ * 1) Tests that transcribe functions properly when it is given just a 
filepath.
+ * 2) Both audio (mp3) and video (mp4) files are used in these tests.
+ */
+@Ignore("Ignore until finalize AmazonTrancsribe Interface & build Tika")
+public class AmazonTranscribeTest extends TikaTest {
+
+    static Parser PARSER;
+
+    @BeforeClass
+    public static void setUp() throws Exception {
+        try (InputStream is = AmazonTranscribeTest.class
+                .getResourceAsStream("tika-config-aws-transcribe.xml")) {
+            PARSER = new TikaConfig(is).getParser();
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enUS() throws Exception {
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.EnUS);
+        String xml = getXML("en-US_(A_Little_Bottle_Of_Water).mp3", PARSER, 
context).xml;
+        String expected = "a little bottle of water.";
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enUS() throws Exception {
+        String xml = getXML("en-US_(A_Little_Bottle_Of_Water).mp3", 
PARSER).xml;
+        String expected = "a little bottle of water.";
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeVideo_enUS() throws Exception {
+        String expected = "Hi";
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.EnUS);
+        String xml = getXML("en-US_(Hi).mp4", PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with a video file without passing in the source 
language.
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownVideo_enUS() throws Exception {
+        String expected = "Hi";
+        String xml = getXML("en-US_(Hi).mp4", PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-GB (English - Great Britain)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enGB() throws Exception {
+        String file = "en-GB_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.EnGB);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-GB (English - Great Britain)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enGB() throws Exception {
+        String file = "en-GB_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-AU (English - Australia)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enAU() throws Exception {
+        String file = "en-AU_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.EnAU);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-AU (English - Australian)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enAU() throws Exception {
+        String file = "en-AU_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is de-DE (German)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_deDE() throws Exception {
+        String file = "de-DE_(We_Are_At_School_x2).mp3";
+        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.DeDE);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is de-DE (German)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_deDE() throws Exception {
+        String file = "de-DE_(We_Are_At_School_x2).mp3";
+        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is it-IT (Italian)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_itIT() throws Exception {
+        String file = "it-IT_(We_Are_Having_Class_x2).mp3";
+        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.ItIT);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is it-IT (Italian)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_itIT() throws Exception {
+        String file = "it-IT_(We_Are_Having_Class_x2).mp3";
+        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is ja-JP (Japanese)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_jaJP() throws Exception {
+        String file = "ja-JP_(We_Are_At_School).mp3";
+        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.JaJP);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is ja-JP (Japanese)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_jaJP() throws Exception {
+        String file = "ja-JP_(We_Are_At_School).mp3";
+        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_koKR() throws Exception {
+        String file = "ko-KR_(We_Are_Having_Class_x2).mp3";
+        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.KoKR);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_koKR() throws Exception {
+        String file = "ko-KR_(We_Are_Having_Class_x2).mp3";
+        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with a video file given the source language
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeVideo_koKR() throws Exception {
+        String file = "ko-KR_(Annyeonghaseyo).mp4";
+        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
+        String expected = "Annyeonghaseyo";
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.KoKR);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an video file without passing in the source 
language.
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownVideo_koKR() throws Exception {
+        String file = "ko-KR_(Annyeonghaseyo).mp4";
+        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
+        String expected = "Annyeonghaseyo";
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is pt-BR (Portuguese - Brazil)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_ptBR() throws Exception {
+        String file = "pt-BR_(We_Are_At_School).mp3";
+        String expected = "nós estamos na escola.";
+        ParseContext context = new ParseContext();
+        context.set(LanguageCode.class, LanguageCode.PtBR);
+        String xml = getXML(file, PARSER, context).xml;
+        assertContains(expected, xml);
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is pt-BR (Portuguese - Brazil)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_ptBR() throws Exception {
+        String file = "pt-BR_(We_Are_At_School).mp3";
+        String expected = "nós estamos na escola.";
+        String xml = getXML(file, PARSER).xml;
+        assertContains(expected, xml);
+    }
+
+}
diff --git a/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ShortAudioSampleFrench.mp3
similarity index 100%
rename from tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ShortAudioSampleFrench.mp3
diff --git a/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/de-DE_(We_Are_At_School_x2).mp3
similarity index 100%
rename from tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/de-DE_(We_Are_At_School_x2).mp3
diff --git 
a/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-AU_(A_Little_Bottle_Of_Water).mp3
similarity index 100%
rename from 
tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-AU_(A_Little_Bottle_Of_Water).mp3
diff --git 
a/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-GB_(A_Little_Bottle_Of_Water).mp3
similarity index 100%
rename from 
tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-GB_(A_Little_Bottle_Of_Water).mp3
diff --git 
a/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(A_Little_Bottle_Of_Water).mp3
similarity index 100%
rename from 
tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(A_Little_Bottle_Of_Water).mp3
diff --git a/tika-transcribe/src/test/resources/en-US_(Hi).mp4 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(Hi).mp4
similarity index 100%
rename from tika-transcribe/src/test/resources/en-US_(Hi).mp4
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(Hi).mp4
diff --git 
a/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/it-IT_(We_Are_Having_Class_x2).mp3
similarity index 100%
rename from 
tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/it-IT_(We_Are_Having_Class_x2).mp3
diff --git a/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ja-JP_(We_Are_At_School).mp3
similarity index 100%
rename from tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ja-JP_(We_Are_At_School).mp3
diff --git a/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(Annyeonghaseyo).mp4
similarity index 100%
rename from tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(Annyeonghaseyo).mp4
diff --git 
a/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(We_Are_Having_Class_x2).mp3
similarity index 100%
rename from 
tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(We_Are_Having_Class_x2).mp3
diff --git a/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/pt-BR_(We_Are_At_School).mp3
similarity index 100%
rename from tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3
rename to 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/pt-BR_(We_Are_At_School).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/tika-config-transcribe-aws.xml
 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/tika-config-transcribe-aws.xml
new file mode 100644
index 0000000..875fe5b
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/tika-config-transcribe-aws.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <parsers>
+    <parser class="org.apache.tika.parser.DefaultParser"/>
+    <parser class="org.apache.tika.parser.transcribe.aws.AmazonTranscribe">
+      <params>
+        <!-- first three are required -->
+        <param name="bucket" type="string">bucket</param>
+        <param name="clientId" type="string">clientId</param>
+        <param name="clientSecret" type="string">clientSecret</param>
+        <!-- region is optional -->
+        <param name="region" type="string">region</param>
+      </params>
+    </parser>
+  </parsers>
+</properties>
\ No newline at end of file
diff --git 
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
 
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
deleted file mode 100644
index 5b50491..0000000
--- 
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.transcribe;
-
-import com.amazonaws.AmazonServiceException;
-import com.amazonaws.SdkClientException;
-import com.amazonaws.auth.AWSStaticCredentialsProvider;
-import com.amazonaws.auth.BasicAWSCredentials;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.model.AmazonS3Exception;
-import com.amazonaws.services.s3.model.CompressionType;
-import com.amazonaws.services.s3.model.ExpressionType;
-import com.amazonaws.services.s3.model.InputSerialization;
-import com.amazonaws.services.s3.model.JSONInput;
-import com.amazonaws.services.s3.model.JSONOutput;
-import com.amazonaws.services.s3.model.JSONType;
-import com.amazonaws.services.s3.model.OutputSerialization;
-import com.amazonaws.services.s3.model.PutObjectRequest;
-import com.amazonaws.services.s3.model.PutObjectResult;
-import com.amazonaws.services.s3.model.SelectObjectContentEvent;
-import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor;
-import com.amazonaws.services.s3.model.SelectObjectContentRequest;
-import com.amazonaws.services.s3.model.SelectObjectContentResult;
-import com.amazonaws.services.transcribe.AmazonTranscribeAsync;
-import com.amazonaws.services.transcribe.AmazonTranscribeAsyncClientBuilder;
-import com.amazonaws.services.transcribe.model.Media;
-import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest;
-import com.amazonaws.services.transcribe.model.TranscriptionJob;
-import com.amazonaws.services.transcribe.model.TranscriptionJobStatus;
-import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest;
-import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult;
-import com.amazonaws.services.transcribe.model.LanguageCode;
-import org.apache.tika.exception.TikaException;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.json.simple.parser.ParseException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Properties;
-import java.util.UUID;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.stream.Collectors;
-
-/**
- * <a href="https://aws.amazon.com/transcribe/";>Amazon Transcribe</a> 
- * {@link Transcriber} implementation. See Javadoc for configiration options.
- *
- * @since Tika 2.1
- */
-public class AmazonTranscribe implements Transcriber {
-
-    public static final String PROPERTIES_FILE = 
"transcribe.amazon.properties";
-    public static final String ID_PROPERTY = "transcribe.AWS_ACCESS_KEY";
-    public static final String SECRET_PROPERTY = "transcribe.AWS_SECRET_KEY";
-    public static final String DEFAULT_ID = "dummy-id";
-    public static final String DEFAULT_SECRET = "dummy-secret";
-    public static final String DEFAULT_BUCKET = "dummy-bucket";
-    public static final String BUCKET_NAME = "transcribe.BUCKET_NAME";
-    public static final String REGION = "transcribe.REGION";
-    private static final Logger LOG = LoggerFactory
-            .getLogger(AmazonTranscribe.class);
-    private AmazonTranscribeAsync amazonTranscribeAsync;
-    private AmazonS3 amazonS3;
-    private String bucketName;
-    private String region;
-    private boolean isAvailable; // Flag for whether or not transcription is
-    // available.
-    private String clientId;
-    private String clientSecret; // Keys used for the API calls.
-    private AWSStaticCredentialsProvider credsProvider;
-
-    /**
-     * Create a new AmazonTranscribe instance with the client keys specified in
-     * <code>transcribe.amazon.properties</code> which needs to be available on
-     * the Java Classpath.
-     * Silently becomes unavailable when client keys are unavailable.
-     * <code>transcribe.AWS_ACCESS_KEY</code>,
-     * <code>transcribe.AWS_SECRET_KEY</code>,
-     * <code>transcribe.BUCKET_NAME</code> and 
-     * <code>transcribe.REGION</code> must be set in
-     * <code>transcribe.amazon.properties</code>.
-     * <b>N.B.</b> it is not necessary to create the bucket before hand. 
-     * This implementation will automatically create the bucket if one
-     * does not alrerady exist, per the name defined above.
-     *
-     * @since Tika 2.0
-     */
-    public AmazonTranscribe() {
-        Properties config = new Properties();
-        try {
-            config.load(AmazonTranscribe.class
-                    .getResourceAsStream(PROPERTIES_FILE));
-            this.clientId = config.getProperty(ID_PROPERTY);
-            this.clientSecret = config.getProperty(SECRET_PROPERTY);
-            this.bucketName = config.getProperty(BUCKET_NAME);
-            this.region = config.getProperty(REGION);
-            BasicAWSCredentials creds = new BasicAWSCredentials(this.clientId,
-                    this.clientSecret);
-            this.credsProvider = new AWSStaticCredentialsProvider(creds);
-            amazonS3 = AmazonS3ClientBuilder.standard()
-                    .withCredentials(credsProvider).withRegion(this.region)
-                    .build();
-            this.isAvailable = checkAvailable();
-            if (!this.amazonS3.doesBucketExistV2(this.bucketName)) {
-                try {
-                    amazonS3.createBucket(this.bucketName);
-                } catch (AmazonS3Exception e) {
-                    throw new RuntimeException(e.getErrorMessage());
-                }
-            }
-            this.amazonTranscribeAsync = AmazonTranscribeAsyncClientBuilder
-                    .standard().withCredentials(credsProvider)
-                    .withRegion(this.region).build();
-        } catch (Exception e) {
-            LOG.warn("Exception reading config file", e);
-            isAvailable = false;
-        }
-    }
-
-    /**
-     * private method to get a unique job key.
-     *
-     * @return unique job key.
-     */
-    private String getJobKey() {
-        return UUID.randomUUID().toString();
-    }
-
-    /**
-     * Constructs a new {@link PutObjectRequest} object to upload a file to the
-     * specified bucket and jobName. After constructing the request, users may
-     * optionally specify object metadata or a canned ACL as well.
-     *
-     * @param inputStream, null
-     *            The file to upload to Amazon S3.
-     * @param jobName
-     *            The unique job name for each job(UUID).
-     */
-    private void uploadFileToBucket(InputStream inputStream, String jobName)
-            throws TikaException {
-        PutObjectRequest request = new PutObjectRequest(this.bucketName,
-                jobName, inputStream, null);
-        try {
-            @SuppressWarnings("unused")
-            PutObjectResult response = amazonS3.putObject(request);
-        } catch (SdkClientException e) {
-            throw (new TikaException("File Upload to AWS Failed"));
-        }
-    }
-
-    /**
-     * Starts AWS Transcribe Job without language specification.
-     *
-     * @param inputStream
-     *            the source input stream.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException
-     *             When there is an error transcribing.
-     * @throws IOException
-     *             If an I/O exception of some sort has occurred.
-     */
-    @Override
-    public String transcribe(InputStream inputStream)
-            throws TikaException, IOException {
-        if (!isAvailable())
-            return null;
-        String jobName = getJobKey();
-        uploadFileToBucket(inputStream, jobName);
-        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
-        Media media = new Media();
-        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
-        
startTranscriptionJobRequest.withIdentifyLanguage(true).withMedia(media)
-        .withOutputBucketName(this.bucketName)
-        .withTranscriptionJobName(jobName)
-        .setRequestCredentialsProvider(credsProvider);
-        amazonTranscribeAsync
-        .startTranscriptionJob(startTranscriptionJobRequest);
-        return getTranscriptText(jobName);
-    }
-
-    /**
-     * Starts AWS Transcribe Job with language specification.
-     *
-     * @param inputStream
-     *            the source input stream.
-     * @param sourceLanguage
-     *            <a href=
-     *            
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
-     *            Language Code</a> for the language used in the input media
-     *            file.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws TikaException
-     *             When there is an error transcribing.
-     * @throws IOException
-     *             If an I/O exception of some sort has occurred.
-     * @see <a href=
-     *      
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
-     *      Language Code</a>
-     */
-    @Override
-    public String transcribe(InputStream inputStream, String sourceLanguage)
-            throws TikaException, IOException {
-        if (!isAvailable())
-            return null;
-        String jobName = getJobKey();
-        uploadFileToBucket(inputStream, jobName);
-        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
-        Media media = new Media();
-        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
-        ((StartTranscriptionJobRequest) startTranscriptionJobRequest
-                .withMedia(media).withOutputBucketName(this.bucketName)
-                .withTranscriptionJobName(jobName)
-                .withRequestCredentialsProvider(credsProvider))
-        .withLanguageCode(
-                LanguageCode.fromValue(sourceLanguage));
-        amazonTranscribeAsync
-        .startTranscriptionJob(startTranscriptionJobRequest);
-        return getTranscriptText(jobName);
-    }
-
-    /**
-     * @return true if this Transcriber is probably able to transcribe right
-     *         now.
-     * @since Tika 2.1
-     */
-    @Override
-    public boolean isAvailable() {
-        return this.isAvailable;
-    }
-
-    /**
-     * Sets the client Id for the transcriber API.
-     *
-     * @param id
-     *            The ID to set.
-     */
-    public void setId(String id) {
-        this.clientId = id;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Sets the client secret for the transcriber API.
-     *
-     * @param secret
-     *            The secret to set.
-     */
-    public void setSecret(String secret) {
-        this.clientSecret = secret;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Sets the client secret for the transcriber API.
-     *
-     * @param bucket
-     *            The bucket to set.
-     */
-    public void setBucket(String bucket) {
-        this.bucketName = bucket;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Private method check if the service is available.
-     *
-     * @return if the service is available
-     */
-    private boolean checkAvailable() {
-        return clientId != null && !clientId.equals(DEFAULT_ID)
-                && clientSecret != null && !clientSecret.equals(DEFAULT_SECRET)
-                && bucketName != null && !bucketName.equals(DEFAULT_BUCKET);
-    }
-
-    /**
-     * Gets Transcription result from AWS S3 bucket given the jobName.
-     *
-     * @param fileNameS3
-     *            The path of the file to upload to Amazon S3.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws IOException possible reasons include (i) an End Event is not 
received
-     * from AWS S3 SelectObjectContentResult operation and (ii) a parse 
exception
-     * whilst processing JSON from the AWS S3 SelectObjectContentResult 
operation.
-     * @throws SdkClientException a AWS-specific exception related to 
SelectObjectContentResult
-     * operation.
-     * @throws AmazonServiceException possibly thrown if there is an issue 
selecting object content
-     * from AWS S3 objects.
-     */
-    private String getTranscriptText(String fileNameS3) throws 
AmazonServiceException, SdkClientException, IOException {
-        TranscriptionJob transcriptionJob = retrieveObjectWhenJobCompleted(
-                fileNameS3);
-        String text = null;
-        if (transcriptionJob != null && !TranscriptionJobStatus.FAILED.name()
-                .equals(transcriptionJob.getTranscriptionJobStatus())) {
-            InputSerialization inputSerialization = new 
InputSerialization().withJson(new JSONInput().withType(JSONType.DOCUMENT))
-                    .withCompressionType(CompressionType.NONE);
-            OutputSerialization outputSerialization = new 
OutputSerialization().withJson(new JSONOutput());
-            SelectObjectContentRequest request = new 
SelectObjectContentRequest()
-                    .withBucketName(this.bucketName).withKey(fileNameS3 + 
".json")
-                    .withExpression("Select 
s.results.transcripts[0].transcript from S3Object s")//WHERE transcript IS NOT 
MISSING
-                    
.withExpressionType(ExpressionType.SQL).withRequestCredentialsProvider(credsProvider);
-            request.setInputSerialization(inputSerialization);
-            request.setOutputSerialization(outputSerialization);
-
-            final AtomicBoolean isResultComplete = new AtomicBoolean(false);
-
-            try (SelectObjectContentResult result = amazonS3
-                    .selectObjectContent(request)) {
-                InputStream resultInputStream = result.getPayload()
-                        .getRecordsInputStream(
-                                new SelectObjectContentEventVisitor() {
-                                    @Override
-                                    public void visit(
-                                            
SelectObjectContentEvent.StatsEvent event) {
-                                        LOG.debug(
-                                                "Received Stats, Bytes 
Scanned: "
-                                                        + event.getDetails()
-                                                        .getBytesScanned()
-                                                        + " Bytes Processed: "
-                                                        + event.getDetails()
-                                                        .getBytesProcessed());
-                                    }
-
-                                    /*
-                                     * An End Event informs that the request 
has
-                                     * finished successfully.
-                                     */
-                                    @Override
-                                    public void visit(
-                                            SelectObjectContentEvent.EndEvent 
event) {
-                                        isResultComplete.set(true);
-                                        LOG.debug(
-                                                "Received End Event. Result is 
complete.");
-                                    }
-                                });
-                text = new BufferedReader(
-                        new InputStreamReader(resultInputStream, 
StandardCharsets.UTF_8))
-                        .lines()
-                        .collect(Collectors.joining("\n"));
-            }
-            /*
-             * The End Event indicates all matching records have been
-             * transmitted. If the End Event is not received, the results
-             * may be incomplete.
-             */
-            if (!isResultComplete.get()) {
-                throw new IOException(
-                        "S3 Select request was incomplete as End Event was not 
received.");
-            }
-        }
-        JSONParser parser = new JSONParser();
-        JSONObject obj = null;
-        try {
-            obj = (JSONObject) parser.parse(text);
-        } catch (ParseException e) {
-            throw new IOException(e.getMessage(), e);
-        }
-        return obj.get("transcript").toString();
-    }
-
-    /**
-     * Private helper function to get object from s3.
-     *
-     * @param jobName
-     *            The unique job name for each job(UUID).
-     * @return TranscriptionJob object
-     */
-    private TranscriptionJob retrieveObjectWhenJobCompleted(String jobName) {
-        GetTranscriptionJobRequest getTranscriptionJobRequest = new 
GetTranscriptionJobRequest();
-        getTranscriptionJobRequest
-        .withRequestCredentialsProvider(credsProvider);
-        getTranscriptionJobRequest.setTranscriptionJobName(jobName);
-        while (true) {
-            GetTranscriptionJobResult innerResult = amazonTranscribeAsync
-                    .getTranscriptionJob(getTranscriptionJobRequest);
-            String status = innerResult.getTranscriptionJob()
-                    .getTranscriptionJobStatus();
-            if (TranscriptionJobStatus.COMPLETED.name().equals(status)
-                    || TranscriptionJobStatus.FAILED.name().equals(status)) {
-                return innerResult.getTranscriptionJob();
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git 
a/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
 
b/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
deleted file mode 100644
index 1256ab6..0000000
--- 
a/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
+++ /dev/null
@@ -1,16 +0,0 @@
-#  Licensed to the Apache Software Foundation (ASF) under one or more
-#  contributor license agreements.  See the NOTICE file distributed with
-#  this work for additional information regarding copyright ownership.
-#  The ASF licenses this file to You under the Apache License, Version 2.0
-#  (the "License"); you may not use this file except in compliance with
-#  the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-org.apache.tika.language.translate.amazontranscribe
diff --git 
a/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
 
b/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
deleted file mode 100644
index 043a66f..0000000
--- 
a/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-transcribe.AWS_ACCESS_KEY=dummy_key
-transcribe.AWS_SECRET_KEY=dummy_key
-transcribe.BUCKET_NAME=dummy_name
diff --git 
a/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
 
b/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
deleted file mode 100644
index 3b424f9..0000000
--- 
a/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.transcribe;
-
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import java.io.FileInputStream;
-
-import static junit.framework.TestCase.assertNotNull;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-//TODO: Check the ACTUAL output of Amazon Transcribe.
-
-/**
- * Tests tika-trancribe by creating an AmazonTranscribe() object.
- * 1) Tests that transcribe functions properly when it is given just a 
filepath.
- * 2) Both audio (mp3) and video (mp4) files are used in these tests.
- */
-@Ignore("Ignore until finalize AmazonTransribe Interface & build Tika")
-public class AmazonTranscribeTest {
-    AmazonTranscribe transcriber;
-
-    @Before
-    public void setUp() {
-        transcriber = new AmazonTranscribe();
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enUS() {
-        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-US");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enUS() {
-        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeVideo_enUS() {
-        String videoFilePath = "en-US_(Hi).mp4";
-        String expected = "Hi";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(videoFilePath), "en-US");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with a video file without passing in the source 
language.
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownVideo_enUS() {
-        String videoFilePath = "en-US_(Hi).mp4";
-        String expected = "Hi";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-GB (English - Great Britain)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enGB() {
-        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-GB");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-GB (English - Great Britain)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enGB() {
-        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-AU (English - Australia)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enAU() {
-        String source = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(source), 
"en-AU");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-AU (English - Australian)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enAU() {
-        String videoFilePath = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is de-DE (German)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_deDE() {
-        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
-        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "de-DE");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is de-DE (German)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_deDE() {
-        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
-        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is it-IT (Italian)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_itIT() {
-        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
-        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "it-IT");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is it-IT (Italian)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_itIT() {
-        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
-        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is ja-JP (Japanese)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_jaJP() {
-        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
-        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ja-JP");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is ja-JP (Japanese)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_jaJP() {
-        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
-        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_koKR() {
-        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
-        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ko-KR");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_koKR() {
-        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
-        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with a video file given the source language
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeVideo_koKR() {
-        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
-        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
-        String expected = "Annyeonghaseyo";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(source), 
"ko-KR");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an video file without passing in the source 
language.
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownVideo_koKR() {
-        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
-        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
-        String expected = "Annyeonghaseyo";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new FileInputStream(source));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is pt-BR (Portuguese - Brazil)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_ptBR() {
-        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
-        String expected = "nós estamos na escola.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "pt-BR");
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is pt-BR (Portuguese - Brazil)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_ptBR() {
-        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
-        String expected = "nós estamos na escola.";
-        String result;
-
-        if (transcriber.isAvailable()) {
-            try {
-                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
-                assertNotNull(result);
-                assertEquals("Result: [" + result
-                        + "]: not equal to expected: [" + expected + "]",
-                    expected, result);
-            } catch (Exception e) {
-                e.printStackTrace();
-                fail(e.getMessage());
-            }
-        }
-    }
-
-}

Reply via email to