This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ee5a82ff2e79a7aa1b5a364037fe67efe4ae0be8
Author: tallison <[email protected]>
AuthorDate: Tue May 18 08:49:07 2021 -0400

    Revert "TIKA-3384 -- convert transcribe to a traditional parser"
    
    This reverts commit 2c951a35e57cf6624457798d51c1b8cbffff0f7b.
---
 pom.xml                                            |   1 +
 .../org/apache/tika/transcribe/Transcriber.java    |  60 +++
 .../tika/example/TranscribeTranslateExample.java   |  83 ++--
 tika-parsers/tika-parsers-ml/pom.xml               |   1 -
 .../parser/transcribe/aws/AmazonTranscribe.java    | 398 ----------------
 .../transcribe/aws/AmazonTranscribeTest.java       | 310 ------------
 .../test/resources/tika-config-transcribe-aws.xml  |  32 --
 .../pom.xml                                        |  47 +-
 .../apache/tika/transcribe/AmazonTranscribe.java   | 406 ++++++++++++++++
 .../org.apache.tika.language.translate.Translator  |  16 +
 .../transcribe.amazon.properties                   |  18 +
 .../tika/transcribe/AmazonTranscribeTest.java      | 527 +++++++++++++++++++++
 .../src/test/resources}/ShortAudioSampleFrench.mp3 | Bin
 .../resources}/de-DE_(We_Are_At_School_x2).mp3     | Bin
 .../en-AU_(A_Little_Bottle_Of_Water).mp3           | Bin
 .../en-GB_(A_Little_Bottle_Of_Water).mp3           | Bin
 .../en-US_(A_Little_Bottle_Of_Water).mp3           | Bin
 .../src/test/resources}/en-US_(Hi).mp4             | Bin
 .../resources}/it-IT_(We_Are_Having_Class_x2).mp3  | Bin
 .../test/resources}/ja-JP_(We_Are_At_School).mp3   | Bin
 .../src/test/resources}/ko-KR_(Annyeonghaseyo).mp4 | Bin
 .../resources}/ko-KR_(We_Are_Having_Class_x2).mp3  | Bin
 .../test/resources}/pt-BR_(We_Are_At_School).mp3   | Bin
 23 files changed, 1069 insertions(+), 830 deletions(-)

diff --git a/pom.xml b/pom.xml
index d0e43d4..f8c6591 100644
--- a/pom.xml
+++ b/pom.xml
@@ -52,6 +52,7 @@
     <module>tika-translate</module>
     <module>tika-example</module>
     <module>tika-java7</module>
+    <module>tika-transcribe</module>
   </modules>
 
   <profiles>
diff --git 
a/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java 
b/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
new file mode 100644
index 0000000..3546256
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.transcribe;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Interface for Transcriber services.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-94";>TIKA-94</a>
+ * @since Tika 2.1
+ */
+public interface Transcriber {
+    /**
+     * Transcribe the given file.
+     *
+     * @param inputStream the source input stream.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException When there is an error transcribing.
+     * @throws IOException   If an I/O exception of some sort has occurred.
+     * @since 2.1
+     */
+    public String transcribe(InputStream inputStream) throws TikaException, 
IOException;
+
+    /**
+     * Transcribe the given the file and the source language.
+     *
+     * @param inputStream    the source input stream.
+     * @param sourceLanguage The language code for the language used in the 
input media file.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException When there is an error transcribing.
+     * @throws IOException   If an I/O exception of some sort has occurred.
+     * @since 2.1
+     */
+    public String transcribe(InputStream inputStream, String sourceLanguage) 
throws TikaException, IOException;
+
+    /**
+     * @return true if this Transcriber is probably able to transcribe right 
now.
+     * @since Tika 2.1
+     */
+    public boolean isAvailable();
+}
diff --git 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
index a90d322..12dd7e5 100644
--- 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
+++ 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
@@ -17,23 +17,22 @@
 
 package org.apache.tika.example;
 
-import java.nio.file.Path;
-import java.nio.file.Paths;
+import java.io.FileInputStream;
 
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
 import org.apache.tika.language.translate.GoogleTranslator;
 import org.apache.tika.language.translate.Translator;
+import org.apache.tika.transcribe.AmazonTranscribe;
+import org.apache.tika.transcribe.Transcriber;
 
 /**
  * This example demonstrates primitive logic for
  * chaining Tika API calls. In this case translation
- * could be considered as a downstream process to
+ * could be considered as a downstream process to 
  * transcription.
  * We simply pass the output of
- * a call to {@link Tika#parseToString(Path)}
- * into {@link Translator#translate(String, String)}.
- * The {@link GoogleTranslator} is configured with a target
+ * a call to {@link Transcriber#transcribe(java.io.InputStream)}
+ * into {@link Translator#translate(String, String)}. 
+ * The {@link GoogleTranslator} is configured with a target 
  * language of "en-US".
  * @author lewismc
  *
@@ -43,7 +42,7 @@ public class TranscribeTranslateExample {
     /**
      * Use {@link GoogleTranslator} to execute translation on
      * input data. This implementation needs configured as explained in the 
Javadoc.
-     * In this implementation, Google will try to guess the input language. 
The target
+     * In this implementation, Google will try to guess the input language. 
The target 
      * language is "en-US".
      * @param text input text to translate.
      * @return translated text String.
@@ -62,55 +61,43 @@ public class TranscribeTranslateExample {
     }
 
     /**
-     * Use {@link org.apache.tika.parser.transcribe.aws.AmazonTranscribe} to 
execute transcription
-     * on input data.
-     * This implementation needs to be configured as explained in the Javadoc.
+     * Use {@link AmazonTranscribe} to execute transcription on input data.
+     * This implementation needs configured as explained in the Javadoc.
      * @param file the name of the file (which needs to be on the Java 
Classpath) to transcribe.
      * @return transcribed text.
      */
-    public static String amazonTranscribe(Path tikaConfig, Path file) throws 
Exception {
-        return new Tika(new TikaConfig(tikaConfig)).parseToString(file);
+    public static String amazonTranscribe(String file) {
+        String filePath = 
TranscribeTranslateExample.class.getClassLoader().getResource(file).getPath();
+        String result = null;
+        Transcriber transcriber = new AmazonTranscribe();
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(filePath));
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+        return result;
     }
 
     /**
      * Main method to run this example. This program can be invoked as follows
      * <ol>
-     * <li><code>transcribe-translate ${tika-config.xml} ${file}</code>; which 
executes both
-     * transcription then translation on the given resource, or
-     * <li><code>transcribe ${tika-config.xml} ${file}</code>; which executes 
only translation</li>
-     * @param args either of the commands described above and the input file
-     * (which needs to be on the Java Classpath).
-     *
-     *
-     *
-     * ${tika-config.xml} must include credentials for aws and a temporary 
storage bucket:
-     * <pre>
-     * {@code
-     *  <properties>
-     *   <parsers>
-     *     <parser class="org.apache.tika.parser.DefaultParser"/>
-     *     <parser 
class="org.apache.tika.parser.transcribe.aws.AmazonTranscribe">
-     *       <params>
-     *         <param name="bucket" type="string">bucket</param>
-     *         <param name="clientId" type="string">clientId</param>
-     *         <param name="clientSecret" type="string">clientSecret</param>
-     *       </params>
-     *     </parser>
-     *   </parsers>
-     * </properties>
-     * }
-     * </pre>
+     * <li><code>transcribe-translate ${file}</code>; which executes both 
+     * transcription then translation on the given resource, or 
+     * <li><code>transcribe ${file}</code>; which executes only 
translation</li>
+     * @param args either of the commands described above and the input file 
+     * (which needs to be on the Java Classpath). 
      */
-    public static void main (String[] args) throws Exception {
+    public static void main (String[] args) {
         String text = null;
-        if (args.length > 1) {
-            if ("transcribe-translate".equals(args[1])) {
-                text = 
googleTranslateToEnglish(amazonTranscribe(Paths.get(args[0]),
-                        Paths.get(args[1])));
-                System.out.print("Transcription and translation 
successful!\nEXTRACTED TEXT: " + text);
-            } else if ("transcribe".equals(args[1])) {
-                text = amazonTranscribe(Paths.get(args[0]), 
Paths.get(args[1]));
-                System.out.print("Transcription successful!\nEXTRACTED TEXT: " 
+ text);
+        if (args.length != 0) {
+            if ("transcribe-translate".equals(args[0])) {
+                text = googleTranslateToEnglish(amazonTranscribe(args[1]));
+                System.out.print("Transcription and translation 
successful!\nEXTRAXCTED TEXT: " + text);
+            } else if ("transcribe".equals(args[0])) {
+                text = amazonTranscribe(args[1]);
+                System.out.print("Transcription successful!\nEXTRAXCTED TEXT: 
" + text);
             } else {
                 System.out.print("Incorrect invocation, see Javadoc.");
             }
diff --git a/tika-parsers/tika-parsers-ml/pom.xml 
b/tika-parsers/tika-parsers-ml/pom.xml
index 2dcde9e..ba9bd38 100644
--- a/tika-parsers/tika-parsers-ml/pom.xml
+++ b/tika-parsers/tika-parsers-ml/pom.xml
@@ -40,7 +40,6 @@
     <module>tika-age-recogniser</module>
     <module>tika-parser-advancedmedia-module</module>
     <module>tika-dl</module>
-    <module>tika-transcribe-aws</module>
   </modules>
 
   <build>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/main/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribe.java
 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/main/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribe.java
deleted file mode 100644
index 91e8452..0000000
--- 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/main/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribe.java
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.parser.transcribe.aws;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.UUID;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.stream.Collectors;
-
-import com.amazonaws.AmazonServiceException;
-import com.amazonaws.SdkClientException;
-import com.amazonaws.auth.AWSStaticCredentialsProvider;
-import com.amazonaws.auth.BasicAWSCredentials;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.model.AmazonS3Exception;
-import com.amazonaws.services.s3.model.CompressionType;
-import com.amazonaws.services.s3.model.ExpressionType;
-import com.amazonaws.services.s3.model.InputSerialization;
-import com.amazonaws.services.s3.model.JSONInput;
-import com.amazonaws.services.s3.model.JSONOutput;
-import com.amazonaws.services.s3.model.JSONType;
-import com.amazonaws.services.s3.model.OutputSerialization;
-import com.amazonaws.services.s3.model.PutObjectRequest;
-import com.amazonaws.services.s3.model.PutObjectResult;
-import com.amazonaws.services.s3.model.SelectObjectContentEvent;
-import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor;
-import com.amazonaws.services.s3.model.SelectObjectContentRequest;
-import com.amazonaws.services.s3.model.SelectObjectContentResult;
-import com.amazonaws.services.transcribe.AmazonTranscribeAsync;
-import com.amazonaws.services.transcribe.AmazonTranscribeAsyncClientBuilder;
-import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest;
-import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult;
-import com.amazonaws.services.transcribe.model.LanguageCode;
-import com.amazonaws.services.transcribe.model.Media;
-import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest;
-import com.amazonaws.services.transcribe.model.TranscriptionJob;
-import com.amazonaws.services.transcribe.model.TranscriptionJobStatus;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.json.simple.parser.ParseException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import org.apache.tika.config.Field;
-import org.apache.tika.config.Initializable;
-import org.apache.tika.config.InitializableProblemHandler;
-import org.apache.tika.config.Param;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-
-/**
- * <a href="https://aws.amazon.com/transcribe/";>Amazon Transcribe</a>
- * implementation. See Javadoc for configuration options.
- * <p>
- * Silently becomes unavailable when client keys are unavailable.
- *
- * <b>N.B.</b> it is not necessary to create the bucket before hand.
- * This implementation will automatically create the bucket if one
- * does not already exist, per the name defined above.
- *
- * @since Tika 2.0
- */
-
-public class AmazonTranscribe extends AbstractParser implements Initializable {
-    private static final Logger LOG = 
LoggerFactory.getLogger(AmazonTranscribe.class);
-    private AmazonTranscribeAsync amazonTranscribeAsync;
-    private AmazonS3 amazonS3;
-    private String bucketName;
-    private String region;
-    private boolean isAvailable; // Flag for whether or not transcription is
-    // available.
-    private String clientId;
-    private String clientSecret; // Keys used for the API calls.
-    private AWSStaticCredentialsProvider credsProvider;
-
-    //https://docs.aws.amazon.com/transcribe/latest/dg/input.html
-    protected static final Set<MediaType> SUPPORTED_TYPES = 
Collections.unmodifiableSet(
-            new HashSet<>(Arrays.asList(MediaType.audio("x-flac"), 
MediaType.audio("mp3"),
-                    MediaType.audio("mpeg"), MediaType.video("ogg"), 
MediaType.audio("vnd.wave"),
-                    MediaType.audio("mp4"), MediaType.video("mp4"), 
MediaType.application("mp4"),
-                    MediaType.video("quicktime"))));
-
-
-    @Override
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        if (!isAvailable) {
-            return Collections.EMPTY_SET;
-        }
-        return SUPPORTED_TYPES;
-    }
-
-    /**
-     * Starts AWS Transcribe Job with language specification.
-     *
-     * @param stream   the source input stream.
-     * @param handler  handler to use
-     * @param metadata
-     * @param context  -- set the {@link LanguageCode} in the ParseContext if 
known
-     * @throws TikaException When there is an error transcribing.
-     * @throws IOException   If an I/O exception of some sort has occurred.
-     * @see <a href=
-     * 
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
-     * Language Code</a>
-     */
-    @Override
-    public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata,
-                      ParseContext context) throws IOException, SAXException, 
TikaException {
-
-        if (!isAvailable) {
-            return;
-        }
-        String jobName = getJobKey();
-        LanguageCode languageCode = context.get(LanguageCode.class);
-        uploadFileToBucket(stream, jobName);
-        StartTranscriptionJobRequest startTranscriptionJobRequest =
-                new StartTranscriptionJobRequest();
-        Media media = new Media();
-        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
-        
startTranscriptionJobRequest.withMedia(media).withOutputBucketName(this.bucketName)
-                
.withTranscriptionJobName(jobName).setRequestCredentialsProvider(credsProvider);
-
-        if (languageCode != null) {
-            startTranscriptionJobRequest.withLanguageCode(languageCode);
-        } else {
-            startTranscriptionJobRequest.withIdentifyLanguage(true);
-        }
-        
amazonTranscribeAsync.startTranscriptionJob(startTranscriptionJobRequest);
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-        String text = getTranscriptText(jobName);
-        xhtml.startElement("p");
-        xhtml.characters(text);
-        xhtml.endElement("p");
-        xhtml.endDocument();
-
-    }
-
-
-    /**
-     * @return true if this Transcriber is probably able to transcribe right
-     * now.
-     * @since Tika 2.1
-     */
-    public boolean isAvailable() {
-        return this.isAvailable;
-    }
-
-    /**
-     * Sets the client Id for the transcriber API.
-     *
-     * @param id The ID to set.
-     */
-    @Field
-    public void setClientId(String id) {
-        this.clientId = id;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Sets the client secret for the transcriber API.
-     *
-     * @param secret The secret to set.
-     */
-    @Field
-    public void setClientSecret(String secret) {
-        this.clientSecret = secret;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Sets the client secret for the transcriber API.
-     *
-     * @param bucket The bucket to set.
-     */
-    @Field
-    public void setBucket(String bucket) {
-        this.bucketName = bucket;
-        this.isAvailable = checkAvailable();
-    }
-
-    @Field
-    public void setRegion(String region) {
-        this.region = region;
-        this.isAvailable = checkAvailable();
-    }
-
-    /**
-     * Private method check if the service is available.
-     *
-     * @return if the service is available
-     */
-    private boolean checkAvailable() {
-        return clientId != null && clientSecret != null && bucketName != null;
-    }
-
-    /**
-     * private method to get a unique job key.
-     *
-     * @return unique job key.
-     */
-    private String getJobKey() {
-        return UUID.randomUUID().toString();
-    }
-
-    /**
-     * Constructs a new {@link PutObjectRequest} object to upload a file to the
-     * specified bucket and jobName. After constructing the request, users may
-     * optionally specify object metadata or a canned ACL as well.
-     *
-     * @param inputStream, null
-     *                     The file to upload to Amazon S3.
-     * @param jobName      The unique job name for each job(UUID).
-     */
-    private void uploadFileToBucket(InputStream inputStream, String jobName) 
throws TikaException {
-        PutObjectRequest request =
-                new PutObjectRequest(this.bucketName, jobName, inputStream, 
null);
-        try {
-            @SuppressWarnings("unused") PutObjectResult response = 
amazonS3.putObject(request);
-        } catch (SdkClientException e) {
-            throw (new TikaException("File Upload to AWS Failed"));
-        }
-    }
-
-    /**
-     * Gets Transcription result from AWS S3 bucket given the jobName.
-     *
-     * @param fileNameS3 The path of the file to upload to Amazon S3.
-     * @return The transcribed string result, NULL if the job failed.
-     * @throws IOException            possible reasons include (i) an End 
Event is not received
-     *                                from AWS S3 SelectObjectContentResult 
operation and (ii) a parse exception
-     *                                whilst processing JSON from the AWS S3 
SelectObjectContentResult operation.
-     * @throws SdkClientException     a AWS-specific exception related to 
SelectObjectContentResult
-     *                                operation.
-     * @throws AmazonServiceException possibly thrown if there is an issue 
selecting object content
-     *                                from AWS S3 objects.
-     */
-    private String getTranscriptText(String fileNameS3)
-            throws AmazonServiceException, SdkClientException, IOException {
-        TranscriptionJob transcriptionJob = 
retrieveObjectWhenJobCompleted(fileNameS3);
-        String text = null;
-        if (transcriptionJob != null && !TranscriptionJobStatus.FAILED.name()
-                .equals(transcriptionJob.getTranscriptionJobStatus())) {
-            InputSerialization inputSerialization =
-                    new InputSerialization().withJson(new 
JSONInput().withType(JSONType.DOCUMENT))
-                            .withCompressionType(CompressionType.NONE);
-            OutputSerialization outputSerialization =
-                    new OutputSerialization().withJson(new JSONOutput());
-            SelectObjectContentRequest request =
-                    new 
SelectObjectContentRequest().withBucketName(this.bucketName)
-                            .withKey(fileNameS3 + ".json").withExpression(
-                            "Select s.results.transcripts[0].transcript from 
S3Object s")
-                            //WHERE transcript IS NOT MISSING
-                            .withExpressionType(ExpressionType.SQL)
-                            .withRequestCredentialsProvider(credsProvider);
-            request.setInputSerialization(inputSerialization);
-            request.setOutputSerialization(outputSerialization);
-
-            final AtomicBoolean isResultComplete = new AtomicBoolean(false);
-
-            try (SelectObjectContentResult result = 
amazonS3.selectObjectContent(request)) {
-                InputStream resultInputStream = result.getPayload()
-                        .getRecordsInputStream(new 
SelectObjectContentEventVisitor() {
-                            @Override
-                            public void 
visit(SelectObjectContentEvent.StatsEvent event) {
-                                LOG.debug("Received Stats, Bytes Scanned: " +
-                                        event.getDetails().getBytesScanned() +
-                                        " Bytes Processed: " +
-                                        
event.getDetails().getBytesProcessed());
-                            }
-
-                            /*
-                             * An End Event informs that the request has
-                             * finished successfully.
-                             */
-                            @Override
-                            public void 
visit(SelectObjectContentEvent.EndEvent event) {
-                                isResultComplete.set(true);
-                                LOG.debug("Received End Event. Result is 
complete.");
-                            }
-                        });
-                text = new BufferedReader(
-                        new InputStreamReader(resultInputStream, 
StandardCharsets.UTF_8)).lines()
-                        .collect(Collectors.joining("\n"));
-            }
-            /*
-             * The End Event indicates all matching records have been
-             * transmitted. If the End Event is not received, the results
-             * may be incomplete.
-             */
-            if (!isResultComplete.get()) {
-                throw new IOException(
-                        "S3 Select request was incomplete as End Event was not 
received.");
-            }
-        }
-        JSONParser parser = new JSONParser();
-        JSONObject obj = null;
-        try {
-            obj = (JSONObject) parser.parse(text);
-        } catch (ParseException e) {
-            throw new IOException(e.getMessage(), e);
-        }
-        return obj.get("transcript").toString();
-    }
-
-    /**
-     * Private helper function to get object from s3.
-     *
-     * @param jobName The unique job name for each job(UUID).
-     * @return TranscriptionJob object
-     */
-    private TranscriptionJob retrieveObjectWhenJobCompleted(String jobName) {
-        GetTranscriptionJobRequest getTranscriptionJobRequest = new 
GetTranscriptionJobRequest();
-        
getTranscriptionJobRequest.withRequestCredentialsProvider(credsProvider);
-        getTranscriptionJobRequest.setTranscriptionJobName(jobName);
-        while (true) {
-            GetTranscriptionJobResult innerResult =
-                    
amazonTranscribeAsync.getTranscriptionJob(getTranscriptionJobRequest);
-            String status = 
innerResult.getTranscriptionJob().getTranscriptionJobStatus();
-            if (TranscriptionJobStatus.COMPLETED.name().equals(status) ||
-                    TranscriptionJobStatus.FAILED.name().equals(status)) {
-                return innerResult.getTranscriptionJob();
-            }
-        }
-    }
-
-    @Override
-    public void initialize(Map<String, Param> params) throws 
TikaConfigException {
-        if (!checkAvailable()) {
-            return;
-        }
-
-        try {
-            BasicAWSCredentials creds = new BasicAWSCredentials(this.clientId, 
this.clientSecret);
-            this.credsProvider = new AWSStaticCredentialsProvider(creds);
-            if (region != null) {
-                this.amazonS3 = 
AmazonS3ClientBuilder.standard().withCredentials(credsProvider)
-                        .withRegion(this.region).build();
-            } else {
-                this.amazonS3 =
-                        
AmazonS3ClientBuilder.standard().withCredentials(credsProvider).build();
-
-            }
-            if (!this.amazonS3.doesBucketExistV2(this.bucketName)) {
-                try {
-                    amazonS3.createBucket(this.bucketName);
-                } catch (AmazonS3Exception e) {
-                    throw new TikaConfigException("couldn't create bucket", e);
-                }
-            }
-            this.amazonTranscribeAsync =
-                    
AmazonTranscribeAsyncClientBuilder.standard().withCredentials(credsProvider)
-                            .withRegion(this.region).build();
-        } catch (Exception e) {
-            LOG.warn("Exception reading config file", e);
-            isAvailable = false;
-        }
-
-    }
-
-    @Override
-    public void checkInitialization(InitializableProblemHandler problemHandler)
-            throws TikaConfigException {
-        //TODO alert user if they've gotten 1 or 2 out of three?
-        this.isAvailable = checkAvailable();
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribeTest.java
 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribeTest.java
deleted file mode 100644
index be4f76a..0000000
--- 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/java/org/apache/tika/parser/transcribe/aws/AmazonTranscribeTest.java
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.transcribe.aws;
-
-import java.io.InputStream;
-
-import com.amazonaws.services.transcribe.model.LanguageCode;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-
-//TODO: Check the ACTUAL output of Amazon Transcribe.
-
-/**
- * Tests tika-trancribe by creating an AmazonTranscribe() object.
- * 1) Tests that transcribe functions properly when it is given just a 
filepath.
- * 2) Both audio (mp3) and video (mp4) files are used in these tests.
- */
-@Ignore("Ignore until finalize AmazonTrancsribe Interface & build Tika")
-public class AmazonTranscribeTest extends TikaTest {
-
-    static Parser PARSER;
-
-    @BeforeClass
-    public static void setUp() throws Exception {
-        try (InputStream is = AmazonTranscribeTest.class
-                .getResourceAsStream("tika-config-aws-transcribe.xml")) {
-            PARSER = new TikaConfig(is).getParser();
-        }
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enUS() throws Exception {
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.EnUS);
-        String xml = getXML("en-US_(A_Little_Bottle_Of_Water).mp3", PARSER, 
context).xml;
-        String expected = "a little bottle of water.";
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enUS() throws Exception {
-        String xml = getXML("en-US_(A_Little_Bottle_Of_Water).mp3", 
PARSER).xml;
-        String expected = "a little bottle of water.";
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeVideo_enUS() throws Exception {
-        String expected = "Hi";
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.EnUS);
-        String xml = getXML("en-US_(Hi).mp4", PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with a video file without passing in the source 
language.
-     * The source language of the file is en-US (English - United States)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownVideo_enUS() throws Exception {
-        String expected = "Hi";
-        String xml = getXML("en-US_(Hi).mp4", PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-GB (English - Great Britain)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enGB() throws Exception {
-        String file = "en-GB_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.EnGB);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-GB (English - Great Britain)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enGB() throws Exception {
-        String file = "en-GB_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is en-AU (English - Australia)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_enAU() throws Exception {
-        String file = "en-AU_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.EnAU);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is en-AU (English - Australian)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_enAU() throws Exception {
-        String file = "en-AU_(A_Little_Bottle_Of_Water).mp3";
-        String expected = "a little bottle of water.";
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is de-DE (German)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_deDE() throws Exception {
-        String file = "de-DE_(We_Are_At_School_x2).mp3";
-        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.DeDE);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is de-DE (German)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_deDE() throws Exception {
-        String file = "de-DE_(We_Are_At_School_x2).mp3";
-        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is it-IT (Italian)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_itIT() throws Exception {
-        String file = "it-IT_(We_Are_Having_Class_x2).mp3";
-        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.ItIT);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is it-IT (Italian)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_itIT() throws Exception {
-        String file = "it-IT_(We_Are_Having_Class_x2).mp3";
-        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is ja-JP (Japanese)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_jaJP() throws Exception {
-        String file = "ja-JP_(We_Are_At_School).mp3";
-        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.JaJP);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is ja-JP (Japanese)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_jaJP() throws Exception {
-        String file = "ja-JP_(We_Are_At_School).mp3";
-        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_koKR() throws Exception {
-        String file = "ko-KR_(We_Are_Having_Class_x2).mp3";
-        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.KoKR);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_koKR() throws Exception {
-        String file = "ko-KR_(We_Are_Having_Class_x2).mp3";
-        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with a video file given the source language
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeVideo_koKR() throws Exception {
-        String file = "ko-KR_(Annyeonghaseyo).mp4";
-        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
-        String expected = "Annyeonghaseyo";
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.KoKR);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an video file without passing in the source 
language.
-     * The source language of the file is ko-KR (Korean)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownVideo_koKR() throws Exception {
-        String file = "ko-KR_(Annyeonghaseyo).mp4";
-        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
-        String expected = "Annyeonghaseyo";
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file given the source language
-     * The source language of the file is pt-BR (Portuguese - Brazil)
-     */
-    @Test
-    public void testAmazonTranscribeAudio_ptBR() throws Exception {
-        String file = "pt-BR_(We_Are_At_School).mp3";
-        String expected = "nós estamos na escola.";
-        ParseContext context = new ParseContext();
-        context.set(LanguageCode.class, LanguageCode.PtBR);
-        String xml = getXML(file, PARSER, context).xml;
-        assertContains(expected, xml);
-    }
-
-    /**
-     * Tests transcribe with an audio file without passing in the source 
language.
-     * The source language of the file is pt-BR (Portuguese - Brazil)
-     */
-    @Test
-    public void testAmazonTranscribeUnknownAudio_ptBR() throws Exception {
-        String file = "pt-BR_(We_Are_At_School).mp3";
-        String expected = "nós estamos na escola.";
-        String xml = getXML(file, PARSER).xml;
-        assertContains(expected, xml);
-    }
-
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/tika-config-transcribe-aws.xml
 
b/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/tika-config-transcribe-aws.xml
deleted file mode 100644
index 875fe5b..0000000
--- 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/tika-config-transcribe-aws.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-  <parsers>
-    <parser class="org.apache.tika.parser.DefaultParser"/>
-    <parser class="org.apache.tika.parser.transcribe.aws.AmazonTranscribe">
-      <params>
-        <!-- first three are required -->
-        <param name="bucket" type="string">bucket</param>
-        <param name="clientId" type="string">clientId</param>
-        <param name="clientSecret" type="string">clientSecret</param>
-        <!-- region is optional -->
-        <param name="region" type="string">region</param>
-      </params>
-    </parser>
-  </parsers>
-</properties>
\ No newline at end of file
diff --git a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/pom.xml 
b/tika-transcribe/pom.xml
similarity index 78%
rename from tika-parsers/tika-parsers-ml/tika-transcribe-aws/pom.xml
rename to tika-transcribe/pom.xml
index 1e287c5..aadb137 100644
--- a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/pom.xml
+++ b/tika-transcribe/pom.xml
@@ -25,19 +25,20 @@
     <modelVersion>4.0.0</modelVersion>
 
     <parent>
-        <artifactId>tika-parsers-ml</artifactId>
         <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parent</artifactId>
         <version>2.0.0-SNAPSHOT</version>
+        <relativePath>../tika-parent/pom.xml</relativePath>
     </parent>
 
-    <artifactId>tika-transcribe-aws</artifactId>
+    <artifactId>tika-transcribe</artifactId>
     <packaging>bundle</packaging>
-    <name>Apache Tika transcribe aws</name>
+    <name>Apache Tika transcribe</name>
     <url>http://tika.apache.org/</url>
     <!--TODO use latest aws version or the one defined in the tika-parent-->
     <dependencies>
         <dependency>
-            <groupId>${project.groupId}</groupId>
+            <groupId>org.apache.tika</groupId>
             <artifactId>tika-core</artifactId>
             <version>${project.version}</version>
         </dependency>
@@ -54,37 +55,9 @@
                     <groupId>commons-codec</groupId>
                     <artifactId>commons-codec</artifactId>
                 </exclusion>
-                <exclusion>
-                    <groupId>com.fasterxml.jackson.core</groupId>
-                    <artifactId>jackson-core</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>com.fasterxml.jackson.core</groupId>
-                    <artifactId>jackson-databind</artifactId>
-                </exclusion>
             </exclusions>
         </dependency>
         <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-            <version>${commons.logging.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-            <version>${commons.codec.version}</version>
-        </dependency>
-        <dependency>
             <groupId>com.amazonaws</groupId>
             <artifactId>aws-java-sdk-s3</artifactId>
             <version>${aws.version}</version>
@@ -98,14 +71,6 @@
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>${project.groupId}</groupId>
-            <artifactId>tika-core</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
-            <type>test-jar</type>
         </dependency>
     </dependencies>
     <build>
@@ -146,7 +111,7 @@
                 <configuration>
                     <archive>
                         <manifestEntries>
-                            
<Automatic-Module-Name>org.apache.tika.parser.transcribe.aws</Automatic-Module-Name>
+                            
<Automatic-Module-Name>org.apache.tika.translate</Automatic-Module-Name>
                         </manifestEntries>
                     </archive>
                 </configuration>
diff --git 
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
 
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
new file mode 100644
index 0000000..5b50491
--- /dev/null
+++ 
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
@@ -0,0 +1,406 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.transcribe;
+
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.AWSStaticCredentialsProvider;
+import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.AmazonS3Exception;
+import com.amazonaws.services.s3.model.CompressionType;
+import com.amazonaws.services.s3.model.ExpressionType;
+import com.amazonaws.services.s3.model.InputSerialization;
+import com.amazonaws.services.s3.model.JSONInput;
+import com.amazonaws.services.s3.model.JSONOutput;
+import com.amazonaws.services.s3.model.JSONType;
+import com.amazonaws.services.s3.model.OutputSerialization;
+import com.amazonaws.services.s3.model.PutObjectRequest;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.SelectObjectContentEvent;
+import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor;
+import com.amazonaws.services.s3.model.SelectObjectContentRequest;
+import com.amazonaws.services.s3.model.SelectObjectContentResult;
+import com.amazonaws.services.transcribe.AmazonTranscribeAsync;
+import com.amazonaws.services.transcribe.AmazonTranscribeAsyncClientBuilder;
+import com.amazonaws.services.transcribe.model.Media;
+import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest;
+import com.amazonaws.services.transcribe.model.TranscriptionJob;
+import com.amazonaws.services.transcribe.model.TranscriptionJobStatus;
+import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest;
+import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult;
+import com.amazonaws.services.transcribe.model.LanguageCode;
+import org.apache.tika.exception.TikaException;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.Properties;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+
+/**
+ * <a href="https://aws.amazon.com/transcribe/";>Amazon Transcribe</a> 
+ * {@link Transcriber} implementation. See Javadoc for configiration options.
+ *
+ * @since Tika 2.1
+ */
+public class AmazonTranscribe implements Transcriber {
+
+    public static final String PROPERTIES_FILE = 
"transcribe.amazon.properties";
+    public static final String ID_PROPERTY = "transcribe.AWS_ACCESS_KEY";
+    public static final String SECRET_PROPERTY = "transcribe.AWS_SECRET_KEY";
+    public static final String DEFAULT_ID = "dummy-id";
+    public static final String DEFAULT_SECRET = "dummy-secret";
+    public static final String DEFAULT_BUCKET = "dummy-bucket";
+    public static final String BUCKET_NAME = "transcribe.BUCKET_NAME";
+    public static final String REGION = "transcribe.REGION";
+    private static final Logger LOG = LoggerFactory
+            .getLogger(AmazonTranscribe.class);
+    private AmazonTranscribeAsync amazonTranscribeAsync;
+    private AmazonS3 amazonS3;
+    private String bucketName;
+    private String region;
+    private boolean isAvailable; // Flag for whether or not transcription is
+    // available.
+    private String clientId;
+    private String clientSecret; // Keys used for the API calls.
+    private AWSStaticCredentialsProvider credsProvider;
+
+    /**
+     * Create a new AmazonTranscribe instance with the client keys specified in
+     * <code>transcribe.amazon.properties</code> which needs to be available on
+     * the Java Classpath.
+     * Silently becomes unavailable when client keys are unavailable.
+     * <code>transcribe.AWS_ACCESS_KEY</code>,
+     * <code>transcribe.AWS_SECRET_KEY</code>,
+     * <code>transcribe.BUCKET_NAME</code> and 
+     * <code>transcribe.REGION</code> must be set in
+     * <code>transcribe.amazon.properties</code>.
+     * <b>N.B.</b> it is not necessary to create the bucket before hand. 
+     * This implementation will automatically create the bucket if one
+     * does not alrerady exist, per the name defined above.
+     *
+     * @since Tika 2.0
+     */
+    public AmazonTranscribe() {
+        Properties config = new Properties();
+        try {
+            config.load(AmazonTranscribe.class
+                    .getResourceAsStream(PROPERTIES_FILE));
+            this.clientId = config.getProperty(ID_PROPERTY);
+            this.clientSecret = config.getProperty(SECRET_PROPERTY);
+            this.bucketName = config.getProperty(BUCKET_NAME);
+            this.region = config.getProperty(REGION);
+            BasicAWSCredentials creds = new BasicAWSCredentials(this.clientId,
+                    this.clientSecret);
+            this.credsProvider = new AWSStaticCredentialsProvider(creds);
+            amazonS3 = AmazonS3ClientBuilder.standard()
+                    .withCredentials(credsProvider).withRegion(this.region)
+                    .build();
+            this.isAvailable = checkAvailable();
+            if (!this.amazonS3.doesBucketExistV2(this.bucketName)) {
+                try {
+                    amazonS3.createBucket(this.bucketName);
+                } catch (AmazonS3Exception e) {
+                    throw new RuntimeException(e.getErrorMessage());
+                }
+            }
+            this.amazonTranscribeAsync = AmazonTranscribeAsyncClientBuilder
+                    .standard().withCredentials(credsProvider)
+                    .withRegion(this.region).build();
+        } catch (Exception e) {
+            LOG.warn("Exception reading config file", e);
+            isAvailable = false;
+        }
+    }
+
+    /**
+     * private method to get a unique job key.
+     *
+     * @return unique job key.
+     */
+    private String getJobKey() {
+        return UUID.randomUUID().toString();
+    }
+
+    /**
+     * Constructs a new {@link PutObjectRequest} object to upload a file to the
+     * specified bucket and jobName. After constructing the request, users may
+     * optionally specify object metadata or a canned ACL as well.
+     *
+     * @param inputStream, null
+     *            The file to upload to Amazon S3.
+     * @param jobName
+     *            The unique job name for each job(UUID).
+     */
+    private void uploadFileToBucket(InputStream inputStream, String jobName)
+            throws TikaException {
+        PutObjectRequest request = new PutObjectRequest(this.bucketName,
+                jobName, inputStream, null);
+        try {
+            @SuppressWarnings("unused")
+            PutObjectResult response = amazonS3.putObject(request);
+        } catch (SdkClientException e) {
+            throw (new TikaException("File Upload to AWS Failed"));
+        }
+    }
+
+    /**
+     * Starts AWS Transcribe Job without language specification.
+     *
+     * @param inputStream
+     *            the source input stream.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException
+     *             When there is an error transcribing.
+     * @throws IOException
+     *             If an I/O exception of some sort has occurred.
+     */
+    @Override
+    public String transcribe(InputStream inputStream)
+            throws TikaException, IOException {
+        if (!isAvailable())
+            return null;
+        String jobName = getJobKey();
+        uploadFileToBucket(inputStream, jobName);
+        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
+        Media media = new Media();
+        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
+        
startTranscriptionJobRequest.withIdentifyLanguage(true).withMedia(media)
+        .withOutputBucketName(this.bucketName)
+        .withTranscriptionJobName(jobName)
+        .setRequestCredentialsProvider(credsProvider);
+        amazonTranscribeAsync
+        .startTranscriptionJob(startTranscriptionJobRequest);
+        return getTranscriptText(jobName);
+    }
+
+    /**
+     * Starts AWS Transcribe Job with language specification.
+     *
+     * @param inputStream
+     *            the source input stream.
+     * @param sourceLanguage
+     *            <a href=
+     *            
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
+     *            Language Code</a> for the language used in the input media
+     *            file.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException
+     *             When there is an error transcribing.
+     * @throws IOException
+     *             If an I/O exception of some sort has occurred.
+     * @see <a href=
+     *      
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
+     *      Language Code</a>
+     */
+    @Override
+    public String transcribe(InputStream inputStream, String sourceLanguage)
+            throws TikaException, IOException {
+        if (!isAvailable())
+            return null;
+        String jobName = getJobKey();
+        uploadFileToBucket(inputStream, jobName);
+        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
+        Media media = new Media();
+        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
+        ((StartTranscriptionJobRequest) startTranscriptionJobRequest
+                .withMedia(media).withOutputBucketName(this.bucketName)
+                .withTranscriptionJobName(jobName)
+                .withRequestCredentialsProvider(credsProvider))
+        .withLanguageCode(
+                LanguageCode.fromValue(sourceLanguage));
+        amazonTranscribeAsync
+        .startTranscriptionJob(startTranscriptionJobRequest);
+        return getTranscriptText(jobName);
+    }
+
+    /**
+     * @return true if this Transcriber is probably able to transcribe right
+     *         now.
+     * @since Tika 2.1
+     */
+    @Override
+    public boolean isAvailable() {
+        return this.isAvailable;
+    }
+
+    /**
+     * Sets the client Id for the transcriber API.
+     *
+     * @param id
+     *            The ID to set.
+     */
+    public void setId(String id) {
+        this.clientId = id;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Sets the client secret for the transcriber API.
+     *
+     * @param secret
+     *            The secret to set.
+     */
+    public void setSecret(String secret) {
+        this.clientSecret = secret;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Sets the client secret for the transcriber API.
+     *
+     * @param bucket
+     *            The bucket to set.
+     */
+    public void setBucket(String bucket) {
+        this.bucketName = bucket;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Private method check if the service is available.
+     *
+     * @return if the service is available
+     */
+    private boolean checkAvailable() {
+        return clientId != null && !clientId.equals(DEFAULT_ID)
+                && clientSecret != null && !clientSecret.equals(DEFAULT_SECRET)
+                && bucketName != null && !bucketName.equals(DEFAULT_BUCKET);
+    }
+
+    /**
+     * Gets Transcription result from AWS S3 bucket given the jobName.
+     *
+     * @param fileNameS3
+     *            The path of the file to upload to Amazon S3.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws IOException possible reasons include (i) an End Event is not 
received
+     * from AWS S3 SelectObjectContentResult operation and (ii) a parse 
exception
+     * whilst processing JSON from the AWS S3 SelectObjectContentResult 
operation.
+     * @throws SdkClientException a AWS-specific exception related to 
SelectObjectContentResult
+     * operation.
+     * @throws AmazonServiceException possibly thrown if there is an issue 
selecting object content
+     * from AWS S3 objects.
+     */
+    private String getTranscriptText(String fileNameS3) throws 
AmazonServiceException, SdkClientException, IOException {
+        TranscriptionJob transcriptionJob = retrieveObjectWhenJobCompleted(
+                fileNameS3);
+        String text = null;
+        if (transcriptionJob != null && !TranscriptionJobStatus.FAILED.name()
+                .equals(transcriptionJob.getTranscriptionJobStatus())) {
+            InputSerialization inputSerialization = new 
InputSerialization().withJson(new JSONInput().withType(JSONType.DOCUMENT))
+                    .withCompressionType(CompressionType.NONE);
+            OutputSerialization outputSerialization = new 
OutputSerialization().withJson(new JSONOutput());
+            SelectObjectContentRequest request = new 
SelectObjectContentRequest()
+                    .withBucketName(this.bucketName).withKey(fileNameS3 + 
".json")
+                    .withExpression("Select 
s.results.transcripts[0].transcript from S3Object s")//WHERE transcript IS NOT 
MISSING
+                    
.withExpressionType(ExpressionType.SQL).withRequestCredentialsProvider(credsProvider);
+            request.setInputSerialization(inputSerialization);
+            request.setOutputSerialization(outputSerialization);
+
+            final AtomicBoolean isResultComplete = new AtomicBoolean(false);
+
+            try (SelectObjectContentResult result = amazonS3
+                    .selectObjectContent(request)) {
+                InputStream resultInputStream = result.getPayload()
+                        .getRecordsInputStream(
+                                new SelectObjectContentEventVisitor() {
+                                    @Override
+                                    public void visit(
+                                            
SelectObjectContentEvent.StatsEvent event) {
+                                        LOG.debug(
+                                                "Received Stats, Bytes 
Scanned: "
+                                                        + event.getDetails()
+                                                        .getBytesScanned()
+                                                        + " Bytes Processed: "
+                                                        + event.getDetails()
+                                                        .getBytesProcessed());
+                                    }
+
+                                    /*
+                                     * An End Event informs that the request 
has
+                                     * finished successfully.
+                                     */
+                                    @Override
+                                    public void visit(
+                                            SelectObjectContentEvent.EndEvent 
event) {
+                                        isResultComplete.set(true);
+                                        LOG.debug(
+                                                "Received End Event. Result is 
complete.");
+                                    }
+                                });
+                text = new BufferedReader(
+                        new InputStreamReader(resultInputStream, 
StandardCharsets.UTF_8))
+                        .lines()
+                        .collect(Collectors.joining("\n"));
+            }
+            /*
+             * The End Event indicates all matching records have been
+             * transmitted. If the End Event is not received, the results
+             * may be incomplete.
+             */
+            if (!isResultComplete.get()) {
+                throw new IOException(
+                        "S3 Select request was incomplete as End Event was not 
received.");
+            }
+        }
+        JSONParser parser = new JSONParser();
+        JSONObject obj = null;
+        try {
+            obj = (JSONObject) parser.parse(text);
+        } catch (ParseException e) {
+            throw new IOException(e.getMessage(), e);
+        }
+        return obj.get("transcript").toString();
+    }
+
+    /**
+     * Private helper function to get object from s3.
+     *
+     * @param jobName
+     *            The unique job name for each job(UUID).
+     * @return TranscriptionJob object
+     */
+    private TranscriptionJob retrieveObjectWhenJobCompleted(String jobName) {
+        GetTranscriptionJobRequest getTranscriptionJobRequest = new 
GetTranscriptionJobRequest();
+        getTranscriptionJobRequest
+        .withRequestCredentialsProvider(credsProvider);
+        getTranscriptionJobRequest.setTranscriptionJobName(jobName);
+        while (true) {
+            GetTranscriptionJobResult innerResult = amazonTranscribeAsync
+                    .getTranscriptionJob(getTranscriptionJobRequest);
+            String status = innerResult.getTranscriptionJob()
+                    .getTranscriptionJobStatus();
+            if (TranscriptionJobStatus.COMPLETED.name().equals(status)
+                    || TranscriptionJobStatus.FAILED.name().equals(status)) {
+                return innerResult.getTranscriptionJob();
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git 
a/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
 
b/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
new file mode 100644
index 0000000..1256ab6
--- /dev/null
+++ 
b/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.language.translate.amazontranscribe
diff --git 
a/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
 
b/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
new file mode 100644
index 0000000..043a66f
--- /dev/null
+++ 
b/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+transcribe.AWS_ACCESS_KEY=dummy_key
+transcribe.AWS_SECRET_KEY=dummy_key
+transcribe.BUCKET_NAME=dummy_name
diff --git 
a/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
 
b/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
new file mode 100644
index 0000000..3b424f9
--- /dev/null
+++ 
b/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
@@ -0,0 +1,527 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.transcribe;
+
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.io.FileInputStream;
+
+import static junit.framework.TestCase.assertNotNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+//TODO: Check the ACTUAL output of Amazon Transcribe.
+
+/**
+ * Tests tika-trancribe by creating an AmazonTranscribe() object.
+ * 1) Tests that transcribe functions properly when it is given just a 
filepath.
+ * 2) Both audio (mp3) and video (mp4) files are used in these tests.
+ */
+@Ignore("Ignore until finalize AmazonTransribe Interface & build Tika")
+public class AmazonTranscribeTest {
+    AmazonTranscribe transcriber;
+
+    @Before
+    public void setUp() {
+        transcriber = new AmazonTranscribe();
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enUS() {
+        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-US");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enUS() {
+        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeVideo_enUS() {
+        String videoFilePath = "en-US_(Hi).mp4";
+        String expected = "Hi";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(videoFilePath), "en-US");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with a video file without passing in the source 
language.
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownVideo_enUS() {
+        String videoFilePath = "en-US_(Hi).mp4";
+        String expected = "Hi";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-GB (English - Great Britain)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enGB() {
+        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-GB");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-GB (English - Great Britain)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enGB() {
+        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-AU (English - Australia)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enAU() {
+        String source = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(source), 
"en-AU");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-AU (English - Australian)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enAU() {
+        String videoFilePath = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is de-DE (German)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_deDE() {
+        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
+        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "de-DE");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is de-DE (German)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_deDE() {
+        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
+        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is it-IT (Italian)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_itIT() {
+        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
+        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "it-IT");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is it-IT (Italian)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_itIT() {
+        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
+        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is ja-JP (Japanese)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_jaJP() {
+        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
+        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ja-JP");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is ja-JP (Japanese)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_jaJP() {
+        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
+        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_koKR() {
+        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
+        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ko-KR");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_koKR() {
+        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
+        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with a video file given the source language
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeVideo_koKR() {
+        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
+        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
+        String expected = "Annyeonghaseyo";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(source), 
"ko-KR");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an video file without passing in the source 
language.
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownVideo_koKR() {
+        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
+        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
+        String expected = "Annyeonghaseyo";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(source));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is pt-BR (Portuguese - Brazil)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_ptBR() {
+        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
+        String expected = "nós estamos na escola.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "pt-BR");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is pt-BR (Portuguese - Brazil)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_ptBR() {
+        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
+        String expected = "nós estamos na escola.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ShortAudioSampleFrench.mp3
 b/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ShortAudioSampleFrench.mp3
rename to tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/de-DE_(We_Are_At_School_x2).mp3
 b/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/de-DE_(We_Are_At_School_x2).mp3
rename to tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-AU_(A_Little_Bottle_Of_Water).mp3
 b/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-AU_(A_Little_Bottle_Of_Water).mp3
rename to 
tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-GB_(A_Little_Bottle_Of_Water).mp3
 b/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-GB_(A_Little_Bottle_Of_Water).mp3
rename to 
tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(A_Little_Bottle_Of_Water).mp3
 b/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(A_Little_Bottle_Of_Water).mp3
rename to 
tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(Hi).mp4
 b/tika-transcribe/src/test/resources/en-US_(Hi).mp4
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/en-US_(Hi).mp4
rename to tika-transcribe/src/test/resources/en-US_(Hi).mp4
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/it-IT_(We_Are_Having_Class_x2).mp3
 b/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/it-IT_(We_Are_Having_Class_x2).mp3
rename to tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ja-JP_(We_Are_At_School).mp3
 b/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ja-JP_(We_Are_At_School).mp3
rename to tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(Annyeonghaseyo).mp4
 b/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(Annyeonghaseyo).mp4
rename to tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(We_Are_Having_Class_x2).mp3
 b/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/ko-KR_(We_Are_Having_Class_x2).mp3
rename to tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3
diff --git 
a/tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/pt-BR_(We_Are_At_School).mp3
 b/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3
similarity index 100%
rename from 
tika-parsers/tika-parsers-ml/tika-transcribe-aws/src/test/resources/test-documents/pt-BR_(We_Are_At_School).mp3
rename to tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3

Reply via email to