[
https://issues.apache.org/jira/browse/NIFI-615?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15343010#comment-15343010
]
ASF GitHub Bot commented on NIFI-615:
-------------------------------------
Github user JPercivall commented on a diff in the pull request:
https://github.com/apache/nifi/pull/556#discussion_r67969885
--- Diff:
nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/test/java/org/apache/nifi/processors/media/TestExtractMediaMetadata.java
---
@@ -0,0 +1,450 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.media;
+
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestExtractMediaMetadata {
+
+ @Test
+ public void testProperties() {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ ProcessContext context = runner.getProcessContext();
+ Map<PropertyDescriptor, String> propertyValues =
context.getProperties();
+ assertEquals(6, propertyValues.size());
+ }
+
+ @Test
+ public void testRelationships() {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ ProcessContext context = runner.getProcessContext();
+ Set<Relationship> relationships =
context.getAvailableRelationships();
+ assertEquals(2, relationships.size());
+ assertTrue(relationships.contains(ExtractMediaMetadata.SUCCESS));
+ assertTrue(relationships.contains(ExtractMediaMetadata.FAILURE));
+ }
+
+ @Test
+ public void testTextBytes() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.MIME_TYPE_FILTER,
"text/.*");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, "");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ final Map<String, String> attrs = new HashMap<>();
+ attrs.put("filename", "test1.txt");
+ runner.enqueue("test1".getBytes(), attrs);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ final List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeEquals("filename", "test1.txt");
+ flowFile0.assertAttributeExists("txt.Content-Type");
+
assertTrue(flowFile0.getAttribute("txt.Content-Type").startsWith("text/plain"));
+ flowFile0.assertAttributeExists("txt.X-Parsed-By");
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser"));
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser"));
+ flowFile0.assertAttributeExists("txt.Content-Encoding");
+ flowFile0.assertAttributeEquals("txt.Content-Encoding",
"ISO-8859-1");
+ flowFile0.assertContentEquals("test1".getBytes("UTF-8"));
+ }
+
+ @Test
+ public void testNoFlowFile() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.MIME_TYPE_FILTER,
"text/.*");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, "");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
0);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+ }
+
+ @Test
+ public void testTextFile() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.MIME_TYPE_FILTER,
"text/.*");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, "");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFile.txt").toPath());
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ final List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeEquals("filename", "textFile.txt");
+ flowFile0.assertAttributeExists("txt.Content-Type");
+
assertTrue(flowFile0.getAttribute("txt.Content-Type").startsWith("text/plain"));
+ flowFile0.assertAttributeExists("txt.X-Parsed-By");
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser"));
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser"));
+ flowFile0.assertAttributeExists("txt.Content-Encoding");
+ flowFile0.assertAttributeEquals("txt.Content-Encoding",
"ISO-8859-1");
+ flowFile0.assertContentEquals("This file is not an image and is
used for testing the image metadata extractor.".getBytes("UTF-8"));
+ }
+
+ @Test
+ public void testBigTextFileFailsWithSmallBuffer() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.setProperty(ExtractMediaMetadata.CONTENT_BUFFER_SIZE,
"100000");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFileBig.txt").toPath());
+ runner.run(2);
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.FAILURE,
1);
+ }
+
+ @Test
+ public void testBigTextFile() throws IOException {
+ File textFile = new File("target/test-classes/textFileBig.txt");
+
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.setProperty(ExtractMediaMetadata.CONTENT_BUFFER_SIZE,
Long.toString(textFile.length() + 1000L));
+ runner.assertValid();
+
+ runner.enqueue(textFile.toPath());
+ runner.run(2);
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ final List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeEquals("filename", "textFileBig.txt");
+ flowFile0.assertAttributeExists("txt.Content-Type");
+
assertTrue(flowFile0.getAttribute("txt.Content-Type").startsWith("text/plain"));
+ flowFile0.assertAttributeExists("txt.X-Parsed-By");
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser"));
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser"));
+ flowFile0.assertAttributeExists("txt.Content-Encoding");
+ flowFile0.assertAttributeEquals("txt.Content-Encoding",
"ISO-8859-1");
+ assertEquals(flowFile0.getSize(), textFile.length());
+ }
+
+ @Test
+ public void testJunkBytes() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER, "");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"junk.");
+ runner.assertValid();
+
+ final Map<String, String> attrs = new HashMap<>();
+ attrs.put("filename", "junk");
+ Random random = new Random();
+ byte[] bytes = new byte[2048];
+ random.nextBytes(bytes);
+ runner.enqueue(bytes, attrs);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ final List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeEquals("filename", "junk");
+ flowFile0.assertAttributeExists("junk.Content-Type");
+
assertTrue(flowFile0.getAttribute("junk.Content-Type").startsWith("application/octet-stream"));
+ flowFile0.assertAttributeExists("junk.X-Parsed-By");
+
assertTrue(flowFile0.getAttribute("junk.X-Parsed-By").contains("org.apache.tika.parser.EmptyParser"));
+ flowFile0.assertContentEquals(bytes);
+ }
+
+ @Test
+ public void testMimeTypeFilter() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.MIME_TYPE_FILTER,
"audio.*");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFile.txt").toPath());
+ runner.run(2);
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ final List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists(CoreAttributes.FILENAME.key());
+ flowFile0.assertAttributeNotExists("txt.Content-Type");
+ flowFile0.assertAttributeNotExists("txt.X-Parsed-By");
+ }
+
+ @Test
+ public void testMetadataKeyFilter() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_FILTER,
"(X-Parsed.*)");
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFile.txt").toPath());
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ final List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeEquals("filename", "textFile.txt");
+ flowFile0.assertAttributeExists("txt.X-Parsed-By");
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser"));
+
assertTrue(flowFile0.getAttribute("txt.X-Parsed-By").contains("org.apache.tika.parser.txt.TXTParser"));
+ flowFile0.assertAttributeNotExists("txt.Content-Encoding");
+ }
+
+ @Test
+ public void testMetadataKeyPrefix() throws IOException {
+ TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFile.txt").toPath());
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeExists("X-Parsed-By");
+
+ runner = TestRunners.newTestRunner(new ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFile.txt").toPath());
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeExists("txt.X-Parsed-By");
+ }
+
+ @Test
+ public void testMaxAttributes() throws IOException {
+ TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFile.txt").toPath());
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ List<MockFlowFile> successFiles0 =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles0.get(0);
+ int fileAttrCount0 = 0;
+ for (Map.Entry attr : flowFile0.getAttributes().entrySet()) {
+ if (attr.getKey().toString().startsWith("txt.")) {
+ fileAttrCount0++;
+ }
+ }
+ assertTrue(fileAttrCount0 > 1);
+
+ runner = TestRunners.newTestRunner(new ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.MAX_NUMBER_OF_ATTRIBUTES,
Integer.toString(fileAttrCount0 - 1));
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"txt.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/textFile.txt").toPath());
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile1 = successFiles.get(0);
+ int fileAttrCount1 = 0;
+ for (Map.Entry attr : flowFile1.getAttributes().entrySet()) {
+ if (attr.getKey().toString().startsWith("txt.")) {
+ fileAttrCount1++;
+ }
+ }
+ assertEquals(fileAttrCount0, fileAttrCount1 + 1);
+ }
+
+ @Test
+ public void testBmp() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"bmp.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/16color-10x10.bmp").toPath());
+ runner.run(2);
+
+ runner.assertAllFlowFilesTransferred(ExtractMediaMetadata.SUCCESS,
1);
+ runner.assertTransferCount(ExtractMediaMetadata.FAILURE, 0);
+
+ final List<MockFlowFile> successFiles =
runner.getFlowFilesForRelationship(ExtractMediaMetadata.SUCCESS);
+ MockFlowFile flowFile0 = successFiles.get(0);
+ flowFile0.assertAttributeExists("filename");
+ flowFile0.assertAttributeEquals("filename", "16color-10x10.bmp");
+ flowFile0.assertAttributeExists("bmp.Content-Type");
+ flowFile0.assertAttributeEquals("bmp.Content-Type",
"image/x-ms-bmp");
+ flowFile0.assertAttributeExists("bmp.X-Parsed-By");
+
assertTrue(flowFile0.getAttribute("bmp.X-Parsed-By").contains("org.apache.tika.parser.DefaultParser"));
+
assertTrue(flowFile0.getAttribute("bmp.X-Parsed-By").contains("org.apache.tika.parser.image.ImageParser"));
+ flowFile0.assertAttributeExists("bmp.height");
+ flowFile0.assertAttributeEquals("bmp.height", "10");
+ flowFile0.assertAttributeExists("bmp.width");
+ flowFile0.assertAttributeEquals("bmp.width", "10");
+ }
+
+ @Test
+ public void testJpg() throws IOException {
+ final TestRunner runner = TestRunners.newTestRunner(new
ExtractMediaMetadata());
+ runner.setProperty(ExtractMediaMetadata.METADATA_KEY_PREFIX,
"jpg.");
+ runner.assertValid();
+
+ runner.enqueue(new
File("target/test-classes/simple.jpg").toPath());
+ runner.run(2);
--- End diff --
Any specific reason it runs twice?
> Create a processor to extract WAV file characteristics
> ------------------------------------------------------
>
> Key: NIFI-615
> URL: https://issues.apache.org/jira/browse/NIFI-615
> Project: Apache NiFi
> Issue Type: Improvement
> Reporter: Brandon DeVries
> Assignee: Joe Skora
> Priority: Minor
> Fix For: 1.0.0, 0.7.0
>
>
> Create a processor to extract information from a WAV file, including
> encoding, bit rate, metadata, etc...
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)