[
https://issues.apache.org/jira/browse/NIFI-1899?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15370835#comment-15370835
]
ASF GitHub Bot commented on NIFI-1899:
--------------------------------------
Github user JPercivall commented on a diff in the pull request:
https://github.com/apache/nifi/pull/483#discussion_r70264008
--- Diff:
nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailAttachments.java
---
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.email;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.Date;
+
+import javax.activation.DataSource;
+import javax.mail.Address;
+import javax.mail.MessagingException;
+import javax.mail.Session;
+import javax.mail.internet.MimeMessage;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.mail.util.MimeMessageParser;
+
+
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.FlowFileHandlingException;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.io.OutputStreamCallback;
+import org.apache.nifi.stream.io.BufferedInputStream;
+
+
+
+@SupportsBatching
+@EventDriven
+@SideEffectFree
+@Tags({"split", "email"})
+@InputRequirement(Requirement.INPUT_REQUIRED)
+@CapabilityDescription("Extract attachments from a mime formatted email
file, splitting them into individual flowfiles.")
+@WritesAttributes({
+ @WritesAttribute(attribute = "filename ", description = "The
filename of the attachment"),
+ @WritesAttribute(attribute = "email.attachment.parent.filename ",
description = "The filename of the parent FlowFile"),
+ @WritesAttribute(attribute = "email.attachment.parent.uuid",
description = "The UUID of the original FlowFile."),
+ @WritesAttribute(attribute = "mime.type", description = "The mime
type of the attachment.")})
+
+public class ExtractEmailAttachments extends AbstractProcessor {
+ public static final String ATTACHMENT_ORIGINAL_FILENAME =
"email.attachment.parent.filename";
+ public static final String ATTACHMENT_ORIGINAL_UUID =
"email.attachment.parent.uuid";
+
+ public static final Relationship REL_ATTACHMENTS = new
Relationship.Builder()
+ .name("attachments")
+ .description("Each individual attachment will be routed to the
attachments relationship")
+ .build();
+ public static final Relationship REL_ORIGINAL = new
Relationship.Builder()
+ .name("original")
+ .description("The original file")
+ .build();
+ public static final Relationship REL_FAILURE = new
Relationship.Builder()
+ .name("failure")
+ .description("Flowfiles that could not be parsed")
+ .build();
+ private Set<Relationship> relationships;
+ private List<PropertyDescriptor> descriptors;
+
+
+ @Override
+ protected void init(final ProcessorInitializationContext context) {
+ final Set<Relationship> relationships = new HashSet<>();
+ relationships.add(REL_ATTACHMENTS);
+ relationships.add(REL_ORIGINAL);
+ relationships.add(REL_FAILURE);
+ this.relationships = Collections.unmodifiableSet(relationships);
+
+ final List<PropertyDescriptor> descriptors = new ArrayList<>();
+
+ this.descriptors = Collections.unmodifiableList(descriptors);
+ }
+
+ @Override
+ public void onTrigger(final ProcessContext context, final
ProcessSession session) {
+ final ComponentLog logger = getLogger();
+ final FlowFile originalFlowFile = session.get();
+ if (originalFlowFile == null) {
+ return;
+ }
+ final List<FlowFile> attachmentsList = new ArrayList<>();
+ final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
+ final List<FlowFile> originalFlowFilesList = new ArrayList<>();
+
+ session.read(originalFlowFile, new InputStreamCallback() {
+ @Override
+ public void process(final InputStream rawIn) throws
IOException {
+ try (final InputStream in = new
BufferedInputStream(rawIn)) {
+ Properties props = new Properties();
+ Session mailSession =
Session.getDefaultInstance(props, null);
+ MimeMessage originalMessage = new
MimeMessage(mailSession, in);
+ MimeMessageParser parser = new
MimeMessageParser(originalMessage).parse();
+ // RFC-2822 determines that a message must have a
"From:" header
+ // if a message lacks the field, it is flagged as
invalid
+ Address[] from = originalMessage.getFrom();
+ Date sentDate = originalMessage.getSentDate();
+ if (from == null || sentDate == null ) {
+ // Throws MessageException due to lack of
minimum required headers
+ throw new MessagingException("Message failed
RFC2822 validation");
+ }
+ if (parser.hasAttachments()) {
+ final String originalFlowFileName =
originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
+ for (final DataSource data :
parser.getAttachmentList()) {
+ try {
+ FlowFile split =
session.create(originalFlowFile);
+ final Map<String, String> attributes = new
HashMap<>();
+ if
(StringUtils.isNotBlank(data.getName())) {
+
attributes.put(CoreAttributes.FILENAME.key(), data.getName());
+ }
+ if
(StringUtils.isNotBlank(data.getContentType())) {
+
attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
+ }
+ String parentUuid =
originalFlowFile.getAttribute(CoreAttributes.UUID.key());
+ attributes.put(ATTACHMENT_ORIGINAL_UUID,
parentUuid);
+
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
+ split = session.append(split, new
OutputStreamCallback() {
+ @Override
+ public void process(OutputStream out)
throws IOException {
+
IOUtils.copy(data.getInputStream(),out);
+ }
+ });
+ split = session.putAllAttributes(split,
attributes);
+
originalFlowFilesList.add(originalFlowFile);
+ attachmentsList.add(split);
+ } catch (FlowFileHandlingException e) {
+ session.remove(attachmentsList);
+ logger.error("Flowfile {} triggered
error {} while processing message removing generated FlowFiles from sessions",
new Object[] {originalFlowFile, e});
+
invalidFlowFilesList.add(originalFlowFile);
+ }
--- End diff --
If there are multiple attachments and one fails, the originalFlowFile will
get added to the invalid list and success list. In the interest of simplifying
the processor a bit, probably best to put this try/catch around the for loop so
you don't have to handle the cases where some attachments fail another others
succeed.
If you decided to handle that, you need a way to convey to the user which
attachments failed, which succeeded, where the original is transferred, etc.
> Create ListenSMTP & ExtractEmailAttachment processors
> -----------------------------------------------------
>
> Key: NIFI-1899
> URL: https://issues.apache.org/jira/browse/NIFI-1899
> Project: Apache NiFi
> Issue Type: New Feature
> Reporter: Andre
>
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)