This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 2e520e8  Revert "TIKA-3384 -- convert transcribe to a traditional 
parser"
2e520e8 is described below

commit 2e520e82d7c2d5088803af60cb44793abd852bea
Author: tallison <[email protected]>
AuthorDate: Tue May 18 05:49:11 2021 -0400

    Revert "TIKA-3384 -- convert transcribe to a traditional parser"
    
    This reverts commit 93d2211037b01ca237a51f83879ae35f3f76dca8.
---
 pom.xml                                            |   1 +
 .../org/apache/tika/transcribe/Transcriber.java    |  60 +++
 tika-example/pom.xml                               |   8 +-
 .../tika/example/TranscribeTranslateExample.java   |  71 ++-
 tika-parsers/tika-parsers-ml/pom.xml               |   1 -
 tika-transcribe/pom.xml                            | 159 +++++++
 .../apache/tika/transcribe/AmazonTranscribe.java   | 406 ++++++++++++++++
 .../org.apache.tika.language.translate.Translator  |  16 +
 .../transcribe.amazon.properties                   |  18 +
 .../tika/transcribe/AmazonTranscribeTest.java      | 527 +++++++++++++++++++++
 .../src/test/resources/ShortAudioSampleFrench.mp3  | Bin 0 -> 25861 bytes
 .../test/resources/de-DE_(We_Are_At_School_x2).mp3 | Bin 0 -> 38547 bytes
 .../resources/en-AU_(A_Little_Bottle_Of_Water).mp3 | Bin 0 -> 33365 bytes
 .../resources/en-GB_(A_Little_Bottle_Of_Water).mp3 | Bin 0 -> 35872 bytes
 .../resources/en-US_(A_Little_Bottle_Of_Water).mp3 | Bin 0 -> 29603 bytes
 tika-transcribe/src/test/resources/en-US_(Hi).mp4  | Bin 0 -> 21739 bytes
 .../resources/it-IT_(We_Are_Having_Class_x2).mp3   | Bin 0 -> 42219 bytes
 .../test/resources/ja-JP_(We_Are_At_School).mp3    | Bin 0 -> 21699 bytes
 .../src/test/resources/ko-KR_(Annyeonghaseyo).mp4  | Bin 0 -> 144151 bytes
 .../resources/ko-KR_(We_Are_Having_Class_x2).mp3   | Bin 0 -> 66843 bytes
 .../test/resources/pt-BR_(We_Are_At_School).mp3    | Bin 0 -> 29043 bytes
 21 files changed, 1220 insertions(+), 47 deletions(-)

diff --git a/pom.xml b/pom.xml
index d0e43d4..f8c6591 100644
--- a/pom.xml
+++ b/pom.xml
@@ -52,6 +52,7 @@
     <module>tika-translate</module>
     <module>tika-example</module>
     <module>tika-java7</module>
+    <module>tika-transcribe</module>
   </modules>
 
   <profiles>
diff --git 
a/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java 
b/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
new file mode 100644
index 0000000..3546256
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/transcribe/Transcriber.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.transcribe;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Interface for Transcriber services.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-94";>TIKA-94</a>
+ * @since Tika 2.1
+ */
+public interface Transcriber {
+    /**
+     * Transcribe the given file.
+     *
+     * @param inputStream the source input stream.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException When there is an error transcribing.
+     * @throws IOException   If an I/O exception of some sort has occurred.
+     * @since 2.1
+     */
+    public String transcribe(InputStream inputStream) throws TikaException, 
IOException;
+
+    /**
+     * Transcribe the given the file and the source language.
+     *
+     * @param inputStream    the source input stream.
+     * @param sourceLanguage The language code for the language used in the 
input media file.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException When there is an error transcribing.
+     * @throws IOException   If an I/O exception of some sort has occurred.
+     * @since 2.1
+     */
+    public String transcribe(InputStream inputStream, String sourceLanguage) 
throws TikaException, IOException;
+
+    /**
+     * @return true if this Transcriber is probably able to transcribe right 
now.
+     * @since Tika 2.1
+     */
+    public boolean isAvailable();
+}
diff --git a/tika-example/pom.xml b/tika-example/pom.xml
index ce6a2b3..f12304e 100644
--- a/tika-example/pom.xml
+++ b/tika-example/pom.xml
@@ -64,13 +64,13 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
+      <groupId>org.apache.tika</groupId>
       <artifactId>tika-eval-core</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-transcribe-aws</artifactId>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-transcribe</artifactId>
       <version>${project.version}</version>
       <exclusions>
         <exclusion>
@@ -88,7 +88,7 @@
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
+      <groupId>org.apache.tika</groupId>
       <artifactId>tika-core</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>
diff --git 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
index f77af72..12dd7e5 100644
--- 
a/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
+++ 
b/tika-example/src/main/java/org/apache/tika/example/TranscribeTranslateExample.java
@@ -17,14 +17,12 @@
 
 package org.apache.tika.example;
 
-import java.nio.file.Path;
-import java.nio.file.Paths;
+import java.io.FileInputStream;
 
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
 import org.apache.tika.language.translate.GoogleTranslator;
 import org.apache.tika.language.translate.Translator;
-import org.apache.tika.parser.transcribe.aws.AmazonTranscribe;
+import org.apache.tika.transcribe.AmazonTranscribe;
+import org.apache.tika.transcribe.Transcriber;
 
 /**
  * This example demonstrates primitive logic for
@@ -32,8 +30,8 @@ import org.apache.tika.parser.transcribe.aws.AmazonTranscribe;
  * could be considered as a downstream process to 
  * transcription.
  * We simply pass the output of
- * a call to {@link Tika#parseToString(Path)}
- * into {@link Translator#translate(String, String)}.
+ * a call to {@link Transcriber#transcribe(java.io.InputStream)}
+ * into {@link Translator#translate(String, String)}. 
  * The {@link GoogleTranslator} is configured with a target 
  * language of "en-US".
  * @author lewismc
@@ -64,53 +62,42 @@ public class TranscribeTranslateExample {
 
     /**
      * Use {@link AmazonTranscribe} to execute transcription on input data.
-     * This implementation needs to be configured as explained in the Javadoc.
+     * This implementation needs configured as explained in the Javadoc.
      * @param file the name of the file (which needs to be on the Java 
Classpath) to transcribe.
      * @return transcribed text.
      */
-    public static String amazonTranscribe(Path tikaConfig, Path file) throws 
Exception {
-        return new Tika(new TikaConfig(tikaConfig)).parseToString(file);
+    public static String amazonTranscribe(String file) {
+        String filePath = 
TranscribeTranslateExample.class.getClassLoader().getResource(file).getPath();
+        String result = null;
+        Transcriber transcriber = new AmazonTranscribe();
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(filePath));
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+        return result;
     }
 
     /**
      * Main method to run this example. This program can be invoked as follows
      * <ol>
-     * <li><code>transcribe-translate ${tika-config.xml} ${file}</code>; which 
executes both
+     * <li><code>transcribe-translate ${file}</code>; which executes both 
      * transcription then translation on the given resource, or 
-     * <li><code>transcribe ${tika-config.xml} ${file}</code>; which executes 
only translation</li>
+     * <li><code>transcribe ${file}</code>; which executes only 
translation</li>
      * @param args either of the commands described above and the input file 
-     * (which needs to be on the Java Classpath).
-     *
-     *
-     *
-     * ${tika-config.xml} must include credentials for aws and a temporary 
storage bucket:
-     * <pre>
-     * {@code
-     *  <properties>
-     *   <parsers>
-     *     <parser class="org.apache.tika.parser.DefaultParser"/>
-     *     <parser 
class="org.apache.tika.parser.transcribe.aws.AmazonTranscribe">
-     *       <params>
-     *         <param name="bucket" type="string">bucket</param>
-     *         <param name="clientId" type="string">clientId</param>
-     *         <param name="clientSecret" type="string">clientSecret</param>
-     *       </params>
-     *     </parser>
-     *   </parsers>
-     * </properties>
-     * }
-     * </pre>
+     * (which needs to be on the Java Classpath). 
      */
-    public static void main (String[] args) throws Exception {
+    public static void main (String[] args) {
         String text = null;
-        if (args.length > 1) {
-            if ("transcribe-translate".equals(args[1])) {
-                text = 
googleTranslateToEnglish(amazonTranscribe(Paths.get(args[0]),
-                        Paths.get(args[1])));
-                System.out.print("Transcription and translation 
successful!\nEXTRACTED TEXT: " + text);
-            } else if ("transcribe".equals(args[1])) {
-                text = amazonTranscribe(Paths.get(args[0]), 
Paths.get(args[1]));
-                System.out.print("Transcription successful!\nEXTRACTED TEXT: " 
+ text);
+        if (args.length != 0) {
+            if ("transcribe-translate".equals(args[0])) {
+                text = googleTranslateToEnglish(amazonTranscribe(args[1]));
+                System.out.print("Transcription and translation 
successful!\nEXTRAXCTED TEXT: " + text);
+            } else if ("transcribe".equals(args[0])) {
+                text = amazonTranscribe(args[1]);
+                System.out.print("Transcription successful!\nEXTRAXCTED TEXT: 
" + text);
             } else {
                 System.out.print("Incorrect invocation, see Javadoc.");
             }
diff --git a/tika-parsers/tika-parsers-ml/pom.xml 
b/tika-parsers/tika-parsers-ml/pom.xml
index 2dcde9e..ba9bd38 100644
--- a/tika-parsers/tika-parsers-ml/pom.xml
+++ b/tika-parsers/tika-parsers-ml/pom.xml
@@ -40,7 +40,6 @@
     <module>tika-age-recogniser</module>
     <module>tika-parser-advancedmedia-module</module>
     <module>tika-dl</module>
-    <module>tika-transcribe-aws</module>
   </modules>
 
   <build>
diff --git a/tika-transcribe/pom.xml b/tika-transcribe/pom.xml
new file mode 100644
index 0000000..aadb137
--- /dev/null
+++ b/tika-transcribe/pom.xml
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parent</artifactId>
+        <version>2.0.0-SNAPSHOT</version>
+        <relativePath>../tika-parent/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>tika-transcribe</artifactId>
+    <packaging>bundle</packaging>
+    <name>Apache Tika transcribe</name>
+    <url>http://tika.apache.org/</url>
+    <!--TODO use latest aws version or the one defined in the tika-parent-->
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-transcribe</artifactId>
+            <version>${aws.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>commons-logging</groupId>
+                    <artifactId>commons-logging</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-s3</artifactId>
+            <version>${aws.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.googlecode.json-simple</groupId>
+            <artifactId>json-simple</artifactId>
+            <version>${json.simple.version}</version>
+        </dependency>
+        <!-- Test dependencies -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+        </dependency>
+    </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.felix</groupId>
+                <artifactId>maven-bundle-plugin</artifactId>
+                <version>${maven.bundle.version}</version>
+                <extensions>true</extensions>
+                <configuration>
+                    <instructions>
+                        <Bundle-DocURL>${project.url}</Bundle-DocURL>
+                        <Bundle-Activator>
+                            org.apache.tika.parser.internal.Activator
+                        </Bundle-Activator>
+                        <Import-Package>
+                            org.w3c.dom,
+                            org.apache.tika.*,
+                            *;resolution:=optional
+                        </Import-Package>
+                    </instructions>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+                <version>${rat.version}</version>
+                <configuration>
+                    <excludes>
+                        
<exclude>src/main/java/org/apache/tika/parser/txt/Charset*.java</exclude>
+                        <exclude>src/test/resources/test-documents/**</exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <configuration>
+                    <archive>
+                        <manifestEntries>
+                            
<Automatic-Module-Name>org.apache.tika.translate</Automatic-Module-Name>
+                        </manifestEntries>
+                    </archive>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+
+        <pluginManagement>
+            <plugins>
+                <!-- This plugin's configuration is used to store Eclipse m2e  
    -->
+                <!-- settings only. It has no influence on the Maven build 
itself. -->
+                <plugin>
+                    <groupId>org.eclipse.m2e</groupId>
+                    <artifactId>lifecycle-mapping</artifactId>
+                    <version>1.0.0</version>
+                    <configuration>
+                        <lifecycleMappingMetadata>
+                            <pluginExecutions>
+                                <pluginExecution>
+                                    <pluginExecutionFilter>
+                                        <groupId>org.apache.felix</groupId>
+                                        
<artifactId>maven-scr-plugin</artifactId>
+                                        <version>${maven.scr.version}</version>
+                                        <goals>
+                                            <goal>scr</goal>
+                                        </goals>
+                                    </pluginExecutionFilter>
+                                    <action>
+                                        <execute/>
+                                    </action>
+                                </pluginExecution>
+                            </pluginExecutions>
+                        </lifecycleMappingMetadata>
+                    </configuration>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+    </build>
+</project>
\ No newline at end of file
diff --git 
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
 
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
new file mode 100644
index 0000000..5b50491
--- /dev/null
+++ 
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
@@ -0,0 +1,406 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.transcribe;
+
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.AWSStaticCredentialsProvider;
+import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.AmazonS3Exception;
+import com.amazonaws.services.s3.model.CompressionType;
+import com.amazonaws.services.s3.model.ExpressionType;
+import com.amazonaws.services.s3.model.InputSerialization;
+import com.amazonaws.services.s3.model.JSONInput;
+import com.amazonaws.services.s3.model.JSONOutput;
+import com.amazonaws.services.s3.model.JSONType;
+import com.amazonaws.services.s3.model.OutputSerialization;
+import com.amazonaws.services.s3.model.PutObjectRequest;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.SelectObjectContentEvent;
+import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor;
+import com.amazonaws.services.s3.model.SelectObjectContentRequest;
+import com.amazonaws.services.s3.model.SelectObjectContentResult;
+import com.amazonaws.services.transcribe.AmazonTranscribeAsync;
+import com.amazonaws.services.transcribe.AmazonTranscribeAsyncClientBuilder;
+import com.amazonaws.services.transcribe.model.Media;
+import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest;
+import com.amazonaws.services.transcribe.model.TranscriptionJob;
+import com.amazonaws.services.transcribe.model.TranscriptionJobStatus;
+import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest;
+import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult;
+import com.amazonaws.services.transcribe.model.LanguageCode;
+import org.apache.tika.exception.TikaException;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.Properties;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+
+/**
+ * <a href="https://aws.amazon.com/transcribe/";>Amazon Transcribe</a> 
+ * {@link Transcriber} implementation. See Javadoc for configiration options.
+ *
+ * @since Tika 2.1
+ */
+public class AmazonTranscribe implements Transcriber {
+
+    public static final String PROPERTIES_FILE = 
"transcribe.amazon.properties";
+    public static final String ID_PROPERTY = "transcribe.AWS_ACCESS_KEY";
+    public static final String SECRET_PROPERTY = "transcribe.AWS_SECRET_KEY";
+    public static final String DEFAULT_ID = "dummy-id";
+    public static final String DEFAULT_SECRET = "dummy-secret";
+    public static final String DEFAULT_BUCKET = "dummy-bucket";
+    public static final String BUCKET_NAME = "transcribe.BUCKET_NAME";
+    public static final String REGION = "transcribe.REGION";
+    private static final Logger LOG = LoggerFactory
+            .getLogger(AmazonTranscribe.class);
+    private AmazonTranscribeAsync amazonTranscribeAsync;
+    private AmazonS3 amazonS3;
+    private String bucketName;
+    private String region;
+    private boolean isAvailable; // Flag for whether or not transcription is
+    // available.
+    private String clientId;
+    private String clientSecret; // Keys used for the API calls.
+    private AWSStaticCredentialsProvider credsProvider;
+
+    /**
+     * Create a new AmazonTranscribe instance with the client keys specified in
+     * <code>transcribe.amazon.properties</code> which needs to be available on
+     * the Java Classpath.
+     * Silently becomes unavailable when client keys are unavailable.
+     * <code>transcribe.AWS_ACCESS_KEY</code>,
+     * <code>transcribe.AWS_SECRET_KEY</code>,
+     * <code>transcribe.BUCKET_NAME</code> and 
+     * <code>transcribe.REGION</code> must be set in
+     * <code>transcribe.amazon.properties</code>.
+     * <b>N.B.</b> it is not necessary to create the bucket before hand. 
+     * This implementation will automatically create the bucket if one
+     * does not alrerady exist, per the name defined above.
+     *
+     * @since Tika 2.0
+     */
+    public AmazonTranscribe() {
+        Properties config = new Properties();
+        try {
+            config.load(AmazonTranscribe.class
+                    .getResourceAsStream(PROPERTIES_FILE));
+            this.clientId = config.getProperty(ID_PROPERTY);
+            this.clientSecret = config.getProperty(SECRET_PROPERTY);
+            this.bucketName = config.getProperty(BUCKET_NAME);
+            this.region = config.getProperty(REGION);
+            BasicAWSCredentials creds = new BasicAWSCredentials(this.clientId,
+                    this.clientSecret);
+            this.credsProvider = new AWSStaticCredentialsProvider(creds);
+            amazonS3 = AmazonS3ClientBuilder.standard()
+                    .withCredentials(credsProvider).withRegion(this.region)
+                    .build();
+            this.isAvailable = checkAvailable();
+            if (!this.amazonS3.doesBucketExistV2(this.bucketName)) {
+                try {
+                    amazonS3.createBucket(this.bucketName);
+                } catch (AmazonS3Exception e) {
+                    throw new RuntimeException(e.getErrorMessage());
+                }
+            }
+            this.amazonTranscribeAsync = AmazonTranscribeAsyncClientBuilder
+                    .standard().withCredentials(credsProvider)
+                    .withRegion(this.region).build();
+        } catch (Exception e) {
+            LOG.warn("Exception reading config file", e);
+            isAvailable = false;
+        }
+    }
+
+    /**
+     * private method to get a unique job key.
+     *
+     * @return unique job key.
+     */
+    private String getJobKey() {
+        return UUID.randomUUID().toString();
+    }
+
+    /**
+     * Constructs a new {@link PutObjectRequest} object to upload a file to the
+     * specified bucket and jobName. After constructing the request, users may
+     * optionally specify object metadata or a canned ACL as well.
+     *
+     * @param inputStream, null
+     *            The file to upload to Amazon S3.
+     * @param jobName
+     *            The unique job name for each job(UUID).
+     */
+    private void uploadFileToBucket(InputStream inputStream, String jobName)
+            throws TikaException {
+        PutObjectRequest request = new PutObjectRequest(this.bucketName,
+                jobName, inputStream, null);
+        try {
+            @SuppressWarnings("unused")
+            PutObjectResult response = amazonS3.putObject(request);
+        } catch (SdkClientException e) {
+            throw (new TikaException("File Upload to AWS Failed"));
+        }
+    }
+
+    /**
+     * Starts AWS Transcribe Job without language specification.
+     *
+     * @param inputStream
+     *            the source input stream.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException
+     *             When there is an error transcribing.
+     * @throws IOException
+     *             If an I/O exception of some sort has occurred.
+     */
+    @Override
+    public String transcribe(InputStream inputStream)
+            throws TikaException, IOException {
+        if (!isAvailable())
+            return null;
+        String jobName = getJobKey();
+        uploadFileToBucket(inputStream, jobName);
+        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
+        Media media = new Media();
+        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
+        
startTranscriptionJobRequest.withIdentifyLanguage(true).withMedia(media)
+        .withOutputBucketName(this.bucketName)
+        .withTranscriptionJobName(jobName)
+        .setRequestCredentialsProvider(credsProvider);
+        amazonTranscribeAsync
+        .startTranscriptionJob(startTranscriptionJobRequest);
+        return getTranscriptText(jobName);
+    }
+
+    /**
+     * Starts AWS Transcribe Job with language specification.
+     *
+     * @param inputStream
+     *            the source input stream.
+     * @param sourceLanguage
+     *            <a href=
+     *            
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
+     *            Language Code</a> for the language used in the input media
+     *            file.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws TikaException
+     *             When there is an error transcribing.
+     * @throws IOException
+     *             If an I/O exception of some sort has occurred.
+     * @see <a href=
+     *      
"https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/transcribe/model/LanguageCode.html";>AWS
+     *      Language Code</a>
+     */
+    @Override
+    public String transcribe(InputStream inputStream, String sourceLanguage)
+            throws TikaException, IOException {
+        if (!isAvailable())
+            return null;
+        String jobName = getJobKey();
+        uploadFileToBucket(inputStream, jobName);
+        StartTranscriptionJobRequest startTranscriptionJobRequest = new 
StartTranscriptionJobRequest();
+        Media media = new Media();
+        media.setMediaFileUri(amazonS3.getUrl(bucketName, jobName).toString());
+        ((StartTranscriptionJobRequest) startTranscriptionJobRequest
+                .withMedia(media).withOutputBucketName(this.bucketName)
+                .withTranscriptionJobName(jobName)
+                .withRequestCredentialsProvider(credsProvider))
+        .withLanguageCode(
+                LanguageCode.fromValue(sourceLanguage));
+        amazonTranscribeAsync
+        .startTranscriptionJob(startTranscriptionJobRequest);
+        return getTranscriptText(jobName);
+    }
+
+    /**
+     * @return true if this Transcriber is probably able to transcribe right
+     *         now.
+     * @since Tika 2.1
+     */
+    @Override
+    public boolean isAvailable() {
+        return this.isAvailable;
+    }
+
+    /**
+     * Sets the client Id for the transcriber API.
+     *
+     * @param id
+     *            The ID to set.
+     */
+    public void setId(String id) {
+        this.clientId = id;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Sets the client secret for the transcriber API.
+     *
+     * @param secret
+     *            The secret to set.
+     */
+    public void setSecret(String secret) {
+        this.clientSecret = secret;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Sets the client secret for the transcriber API.
+     *
+     * @param bucket
+     *            The bucket to set.
+     */
+    public void setBucket(String bucket) {
+        this.bucketName = bucket;
+        this.isAvailable = checkAvailable();
+    }
+
+    /**
+     * Private method check if the service is available.
+     *
+     * @return if the service is available
+     */
+    private boolean checkAvailable() {
+        return clientId != null && !clientId.equals(DEFAULT_ID)
+                && clientSecret != null && !clientSecret.equals(DEFAULT_SECRET)
+                && bucketName != null && !bucketName.equals(DEFAULT_BUCKET);
+    }
+
+    /**
+     * Gets Transcription result from AWS S3 bucket given the jobName.
+     *
+     * @param fileNameS3
+     *            The path of the file to upload to Amazon S3.
+     * @return The transcribed string result, NULL if the job failed.
+     * @throws IOException possible reasons include (i) an End Event is not 
received
+     * from AWS S3 SelectObjectContentResult operation and (ii) a parse 
exception
+     * whilst processing JSON from the AWS S3 SelectObjectContentResult 
operation.
+     * @throws SdkClientException a AWS-specific exception related to 
SelectObjectContentResult
+     * operation.
+     * @throws AmazonServiceException possibly thrown if there is an issue 
selecting object content
+     * from AWS S3 objects.
+     */
+    private String getTranscriptText(String fileNameS3) throws 
AmazonServiceException, SdkClientException, IOException {
+        TranscriptionJob transcriptionJob = retrieveObjectWhenJobCompleted(
+                fileNameS3);
+        String text = null;
+        if (transcriptionJob != null && !TranscriptionJobStatus.FAILED.name()
+                .equals(transcriptionJob.getTranscriptionJobStatus())) {
+            InputSerialization inputSerialization = new 
InputSerialization().withJson(new JSONInput().withType(JSONType.DOCUMENT))
+                    .withCompressionType(CompressionType.NONE);
+            OutputSerialization outputSerialization = new 
OutputSerialization().withJson(new JSONOutput());
+            SelectObjectContentRequest request = new 
SelectObjectContentRequest()
+                    .withBucketName(this.bucketName).withKey(fileNameS3 + 
".json")
+                    .withExpression("Select 
s.results.transcripts[0].transcript from S3Object s")//WHERE transcript IS NOT 
MISSING
+                    
.withExpressionType(ExpressionType.SQL).withRequestCredentialsProvider(credsProvider);
+            request.setInputSerialization(inputSerialization);
+            request.setOutputSerialization(outputSerialization);
+
+            final AtomicBoolean isResultComplete = new AtomicBoolean(false);
+
+            try (SelectObjectContentResult result = amazonS3
+                    .selectObjectContent(request)) {
+                InputStream resultInputStream = result.getPayload()
+                        .getRecordsInputStream(
+                                new SelectObjectContentEventVisitor() {
+                                    @Override
+                                    public void visit(
+                                            
SelectObjectContentEvent.StatsEvent event) {
+                                        LOG.debug(
+                                                "Received Stats, Bytes 
Scanned: "
+                                                        + event.getDetails()
+                                                        .getBytesScanned()
+                                                        + " Bytes Processed: "
+                                                        + event.getDetails()
+                                                        .getBytesProcessed());
+                                    }
+
+                                    /*
+                                     * An End Event informs that the request 
has
+                                     * finished successfully.
+                                     */
+                                    @Override
+                                    public void visit(
+                                            SelectObjectContentEvent.EndEvent 
event) {
+                                        isResultComplete.set(true);
+                                        LOG.debug(
+                                                "Received End Event. Result is 
complete.");
+                                    }
+                                });
+                text = new BufferedReader(
+                        new InputStreamReader(resultInputStream, 
StandardCharsets.UTF_8))
+                        .lines()
+                        .collect(Collectors.joining("\n"));
+            }
+            /*
+             * The End Event indicates all matching records have been
+             * transmitted. If the End Event is not received, the results
+             * may be incomplete.
+             */
+            if (!isResultComplete.get()) {
+                throw new IOException(
+                        "S3 Select request was incomplete as End Event was not 
received.");
+            }
+        }
+        JSONParser parser = new JSONParser();
+        JSONObject obj = null;
+        try {
+            obj = (JSONObject) parser.parse(text);
+        } catch (ParseException e) {
+            throw new IOException(e.getMessage(), e);
+        }
+        return obj.get("transcript").toString();
+    }
+
+    /**
+     * Private helper function to get object from s3.
+     *
+     * @param jobName
+     *            The unique job name for each job(UUID).
+     * @return TranscriptionJob object
+     */
+    private TranscriptionJob retrieveObjectWhenJobCompleted(String jobName) {
+        GetTranscriptionJobRequest getTranscriptionJobRequest = new 
GetTranscriptionJobRequest();
+        getTranscriptionJobRequest
+        .withRequestCredentialsProvider(credsProvider);
+        getTranscriptionJobRequest.setTranscriptionJobName(jobName);
+        while (true) {
+            GetTranscriptionJobResult innerResult = amazonTranscribeAsync
+                    .getTranscriptionJob(getTranscriptionJobRequest);
+            String status = innerResult.getTranscriptionJob()
+                    .getTranscriptionJobStatus();
+            if (TranscriptionJobStatus.COMPLETED.name().equals(status)
+                    || TranscriptionJobStatus.FAILED.name().equals(status)) {
+                return innerResult.getTranscriptionJob();
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git 
a/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
 
b/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
new file mode 100644
index 0000000..1256ab6
--- /dev/null
+++ 
b/tika-transcribe/src/main/resources/META-INF.services/org.apache.tika.language.translate.Translator
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.language.translate.amazontranscribe
diff --git 
a/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
 
b/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
new file mode 100644
index 0000000..043a66f
--- /dev/null
+++ 
b/tika-transcribe/src/main/resources/org.apache.tika.transcribe/transcribe.amazon.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+transcribe.AWS_ACCESS_KEY=dummy_key
+transcribe.AWS_SECRET_KEY=dummy_key
+transcribe.BUCKET_NAME=dummy_name
diff --git 
a/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
 
b/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
new file mode 100644
index 0000000..3b424f9
--- /dev/null
+++ 
b/tika-transcribe/src/test/java/org/apache/tika/transcribe/AmazonTranscribeTest.java
@@ -0,0 +1,527 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.transcribe;
+
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.io.FileInputStream;
+
+import static junit.framework.TestCase.assertNotNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+//TODO: Check the ACTUAL output of Amazon Transcribe.
+
+/**
+ * Tests tika-trancribe by creating an AmazonTranscribe() object.
+ * 1) Tests that transcribe functions properly when it is given just a 
filepath.
+ * 2) Both audio (mp3) and video (mp4) files are used in these tests.
+ */
+@Ignore("Ignore until finalize AmazonTransribe Interface & build Tika")
+public class AmazonTranscribeTest {
+    AmazonTranscribe transcriber;
+
+    @Before
+    public void setUp() {
+        transcriber = new AmazonTranscribe();
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enUS() {
+        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-US");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enUS() {
+        String audioFilePath = 
"src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeVideo_enUS() {
+        String videoFilePath = "en-US_(Hi).mp4";
+        String expected = "Hi";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(videoFilePath), "en-US");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with a video file without passing in the source 
language.
+     * The source language of the file is en-US (English - United States)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownVideo_enUS() {
+        String videoFilePath = "en-US_(Hi).mp4";
+        String expected = "Hi";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-GB (English - Great Britain)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enGB() {
+        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "en-GB");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-GB (English - Great Britain)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enGB() {
+        String audioFilePath = 
"src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is en-AU (English - Australia)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_enAU() {
+        String source = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(source), 
"en-AU");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is en-AU (English - Australian)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_enAU() {
+        String videoFilePath = 
"src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3";
+        String expected = "a little bottle of water.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(videoFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is de-DE (German)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_deDE() {
+        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
+        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "de-DE");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is de-DE (German)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_deDE() {
+        String audioFilePath = 
"src/test/resources/de-DE_(We_Are_At_School_x2).mp3";
+        String expected = "Wir sind in der Schule. Wir sind in der Schule.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is it-IT (Italian)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_itIT() {
+        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
+        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "it-IT");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is it-IT (Italian)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_itIT() {
+        String audioFilePath = 
"src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3";
+        String expected = "stiamo facendo lezione. stiamo facendo lezione.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is ja-JP (Japanese)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_jaJP() {
+        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
+        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ja-JP");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is ja-JP (Japanese)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_jaJP() {
+        String audioFilePath = 
"src/test/resources/ja-JP_(We_Are_At_School).mp3";
+        String expected = "私達は学校にいます"; //TODO or Watashitachi wa gakkō ni imasu
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_koKR() {
+        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
+        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "ko-KR");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_koKR() {
+        String audioFilePath = 
"src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3";
+        String expected = "우리는 수업을하고있다"; //TODO or ulineun sueob-eulhagoissda
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with a video file given the source language
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeVideo_koKR() {
+        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
+        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
+        String expected = "Annyeonghaseyo";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(source), 
"ko-KR");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an video file without passing in the source 
language.
+     * The source language of the file is ko-KR (Korean)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownVideo_koKR() {
+        String source = "src/test/resources/ko-KR_(Annyeonghaseyo).mp4";
+        //TODO: Check whether output is Annyeonghaseyo or 안녕하세요
+        String expected = "Annyeonghaseyo";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new FileInputStream(source));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file given the source language
+     * The source language of the file is pt-BR (Portuguese - Brazil)
+     */
+    @Test
+    public void testAmazonTranscribeAudio_ptBR() {
+        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
+        String expected = "nós estamos na escola.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath), "pt-BR");
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * Tests transcribe with an audio file without passing in the source 
language.
+     * The source language of the file is pt-BR (Portuguese - Brazil)
+     */
+    @Test
+    public void testAmazonTranscribeUnknownAudio_ptBR() {
+        String audioFilePath = 
"src/test/resources/pt-BR_(We_Are_At_School).mp3";
+        String expected = "nós estamos na escola.";
+        String result;
+
+        if (transcriber.isAvailable()) {
+            try {
+                result = transcriber.transcribe(new 
FileInputStream(audioFilePath));
+                assertNotNull(result);
+                assertEquals("Result: [" + result
+                        + "]: not equal to expected: [" + expected + "]",
+                    expected, result);
+            } catch (Exception e) {
+                e.printStackTrace();
+                fail(e.getMessage());
+            }
+        }
+    }
+
+}
diff --git a/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3 
b/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3
new file mode 100644
index 0000000..a718047
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/ShortAudioSampleFrench.mp3 differ
diff --git a/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3 
b/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3
new file mode 100644
index 0000000..9d4df04
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/de-DE_(We_Are_At_School_x2).mp3 differ
diff --git 
a/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3 
b/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3
new file mode 100644
index 0000000..16f840d
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/en-AU_(A_Little_Bottle_Of_Water).mp3 differ
diff --git 
a/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3 
b/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3
new file mode 100644
index 0000000..2c6ae35
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/en-GB_(A_Little_Bottle_Of_Water).mp3 differ
diff --git 
a/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3 
b/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3
new file mode 100644
index 0000000..3d69b68
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/en-US_(A_Little_Bottle_Of_Water).mp3 differ
diff --git a/tika-transcribe/src/test/resources/en-US_(Hi).mp4 
b/tika-transcribe/src/test/resources/en-US_(Hi).mp4
new file mode 100644
index 0000000..d697b13
Binary files /dev/null and b/tika-transcribe/src/test/resources/en-US_(Hi).mp4 
differ
diff --git 
a/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3 
b/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3
new file mode 100644
index 0000000..5fa69c3
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/it-IT_(We_Are_Having_Class_x2).mp3 differ
diff --git a/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3 
b/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3
new file mode 100644
index 0000000..5ddf6e5
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/ja-JP_(We_Are_At_School).mp3 differ
diff --git a/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4 
b/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4
new file mode 100644
index 0000000..d757d42
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/ko-KR_(Annyeonghaseyo).mp4 differ
diff --git 
a/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3 
b/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3
new file mode 100644
index 0000000..444098c
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/ko-KR_(We_Are_Having_Class_x2).mp3 differ
diff --git a/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3 
b/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3
new file mode 100644
index 0000000..7dfc811
Binary files /dev/null and 
b/tika-transcribe/src/test/resources/pt-BR_(We_Are_At_School).mp3 differ

Reply via email to