[
https://issues.apache.org/jira/browse/NIFI-2380?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15467159#comment-15467159
]
ASF GitHub Bot commented on NIFI-2380:
--------------------------------------
Github user olegz commented on a diff in the pull request:
https://github.com/apache/nifi/pull/817#discussion_r77615322
--- Diff:
nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractTNEFAttachments.java
---
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.email;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.FlowFileHandlingException;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.io.OutputStreamCallback;
+import org.apache.nifi.stream.io.BufferedInputStream;
+import org.apache.poi.hmef.Attachment;
+import org.apache.poi.hmef.HMEFMessage;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+
+@SupportsBatching
+@EventDriven
+@SideEffectFree
+@Tags({"split", "email"})
+@InputRequirement(Requirement.INPUT_REQUIRED)
+@CapabilityDescription("Extract attachments from a mime formatted email
file, splitting them into individual flowfiles.")
+@WritesAttributes({
+ @WritesAttribute(attribute = "filename ", description = "The
filename of the attachment"),
+ @WritesAttribute(attribute =
"email.tnef.attachment.parent.filename ", description = "The filename of the
parent FlowFile"),
+ @WritesAttribute(attribute = "email.tnef.attachment.parent.uuid",
description = "The UUID of the original FlowFile.")})
+
+public class ExtractTNEFAttachments extends AbstractProcessor {
+ public static final String ATTACHMENT_ORIGINAL_FILENAME =
"email.tnef.attachment.parent.filename";
+ public static final String ATTACHMENT_ORIGINAL_UUID =
"email.tnef.attachment.parent.uuid";
+
+ public static final Relationship REL_ATTACHMENTS = new
Relationship.Builder()
+ .name("attachments")
+ .description("Each individual attachment will be routed to the
attachments relationship")
+ .build();
+ public static final Relationship REL_ORIGINAL = new
Relationship.Builder()
+ .name("original")
+ .description("The original file")
+ .build();
+ public static final Relationship REL_FAILURE = new
Relationship.Builder()
+ .name("failure")
+ .description("Flowfiles that could not be parsed")
+ .build();
+ private Set<Relationship> relationships;
+ private List<PropertyDescriptor> descriptors;
+
+
+ @Override
+ protected void init(final ProcessorInitializationContext context) {
+ final Set<Relationship> relationships = new HashSet<>();
+ relationships.add(REL_ATTACHMENTS);
+ relationships.add(REL_ORIGINAL);
+ relationships.add(REL_FAILURE);
+ this.relationships = Collections.unmodifiableSet(relationships);
+
+ final List<PropertyDescriptor> descriptors = new ArrayList<>();
+
+ this.descriptors = Collections.unmodifiableList(descriptors);
+ }
+
+ @Override
+ public void onTrigger(final ProcessContext context, final
ProcessSession session) {
+ final ComponentLog logger = getLogger();
+ final FlowFile originalFlowFile = session.get();
+ if (originalFlowFile == null) {
+ return;
+ }
+ final List<FlowFile> attachmentsList = new ArrayList<>();
+ final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
+ final List<FlowFile> originalFlowFilesList = new ArrayList<>();
+
+ session.read(originalFlowFile, new InputStreamCallback() {
+ @Override
+ public void process(final InputStream rawIn) throws
IOException {
+ try (final InputStream in = new
BufferedInputStream(rawIn)) {
+ Properties props = new Properties();
+
+ HMEFMessage hmefMessage = null;
+
+ // This will trigger an exception in case
content is not a TNEF.
+ hmefMessage = new HMEFMessage(in);
+
+ // Add otiginal flowfile (may revert later on in
case of errors) //
+ originalFlowFilesList.add(originalFlowFile);
+
+ if (hmefMessage != null) {
+ // Attachments isn empty, proceeding.
+ if (! hmefMessage.getAttachments().isEmpty()) {
+ final String originalFlowFileName =
originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
+ try {
+ for (final Attachment attachment :
hmefMessage.getAttachments()) {
+ FlowFile split =
session.create(originalFlowFile);
+ final Map<String, String>
attributes = new HashMap<>();
+ if
(StringUtils.isNotBlank(attachment.getLongFilename())) {
+
attributes.put(CoreAttributes.FILENAME.key(), attachment.getFilename());
+ }
+
+ String parentUuid =
originalFlowFile.getAttribute(CoreAttributes.UUID.key());
+
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
+
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
+
+ // TODO: Extract Mime Type (HMEF
doesn't seem to be able to get this info.
+
+ split = session.append(split, new
OutputStreamCallback() {
+ @Override
+ public void
process(OutputStream out) throws IOException {
+
out.write(attachment.getContents());
+ }
+ });
+ split =
session.putAllAttributes(split, attributes);
+ attachmentsList.add(split);
+ }
+ } catch (FlowFileHandlingException e) {
+ // Something went wrong
+ // Removing splits that may have been
created
+ session.remove(attachmentsList);
+ // Removing the original flow from its
list
+
originalFlowFilesList.remove(originalFlowFile);
+ logger.error("Flowfile {} triggered
error {} while processing message removing generated FlowFiles from sessions",
new Object[]{originalFlowFile, e});
+
invalidFlowFilesList.add(originalFlowFile);
+ }
+ }
+ }
+ }catch (Exception e) {
+ // Another error hit...
+ // Removing the original flow from its list
+ originalFlowFilesList.remove(originalFlowFile);
+ logger.error("Could not parse the flowfile {} as
an email, treating as failure", new Object[]{originalFlowFile, e});
+ // Message is invalid or triggered an error during
parsing
+ invalidFlowFilesList.add(originalFlowFile);
+ }
+ }
+ });
+
+ session.transfer(attachmentsList, REL_ATTACHMENTS);
+
+ // As per above code, originalFlowfile may be routed to invalid or
+ // original depending on RFC2822 compliance.
+ session.transfer(invalidFlowFilesList, REL_FAILURE);
+ session.transfer(originalFlowFilesList, REL_ORIGINAL);
+
+ if (attachmentsList.size() > 10) {
+ logger.info("Split {} into {} files", new
Object[]{originalFlowFile, attachmentsList.size()});
+ } else if (attachmentsList.size() > 1){
+ logger.info("Split {} into {} files: {}", new
Object[]{originalFlowFile, attachmentsList.size(), attachmentsList});
+ }
--- End diff --
I am not sure I understand this IF block. It appears you want to display
the 'attachmentsList' if it's size is > 1, but you don't want to display it
when it is > 10 which is also > 1. Could you please clarify?
> ExtractEmailAttachments processor should support TNEF files (aka winmail.dat)
> -----------------------------------------------------------------------------
>
> Key: NIFI-2380
> URL: https://issues.apache.org/jira/browse/NIFI-2380
> Project: Apache NiFi
> Issue Type: Improvement
> Affects Versions: 1.0.0
> Reporter: Andre
> Assignee: Andre
> Fix For: 1.1.0
>
>
> during the review of NIFI-1899 Dan Marshall highlighted some use cases for
> email processing that have not been addressed as part of the initial
> development cycle.
> One of these use cases was the decoding of Microsoft Transport Neutral
> Encoding Files (TNEF).
> This type of attachments is popularly know as winmail.dat and uses a non RFC
> compliant structure to transfer attachments across different Microsoft
> Outlook clients.
> Given the prevalence of outlook and the issues with winmail.dat files, it
> would be nice to be able to decode TNEF as we currently do with MIME
> attachments.
> Permalink to Dan's comments
> http://mail-archives.apache.org/mod_mbox/nifi-dev/201607.mbox/%3C1468716836729-12827.post%40n7.nabble.com%3E
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)