[ 
https://issues.apache.org/jira/browse/NIFI-1899?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15368794#comment-15368794
 ] 

ASF GitHub Bot commented on NIFI-1899:
--------------------------------------

Github user trixpan commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/483#discussion_r70157622
  
    --- Diff: 
nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailHeaders.java
 ---
    @@ -0,0 +1,233 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.nifi.processors.email;
    +
    +
    +import org.apache.commons.lang3.ArrayUtils;
    +import org.apache.commons.lang3.StringUtils;
    +import org.apache.commons.mail.util.MimeMessageParser;
    +import org.apache.nifi.annotation.behavior.EventDriven;
    +import org.apache.nifi.annotation.behavior.InputRequirement;
    +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
    +import org.apache.nifi.annotation.behavior.SideEffectFree;
    +import org.apache.nifi.annotation.behavior.WritesAttribute;
    +import org.apache.nifi.annotation.behavior.WritesAttributes;
    +import org.apache.nifi.annotation.documentation.CapabilityDescription;
    +import org.apache.nifi.annotation.documentation.Tags;
    +import org.apache.nifi.components.PropertyDescriptor;
    +import org.apache.nifi.flowfile.FlowFile;
    +import org.apache.nifi.logging.ComponentLog;
    +import org.apache.nifi.processor.AbstractProcessor;
    +import org.apache.nifi.processor.ProcessContext;
    +import org.apache.nifi.processor.ProcessSession;
    +import org.apache.nifi.processor.ProcessorInitializationContext;
    +import org.apache.nifi.processor.Relationship;
    +import org.apache.nifi.processor.io.InputStreamCallback;
    +import org.apache.nifi.processor.util.StandardValidators;
    +import org.apache.nifi.stream.io.BufferedInputStream;
    +
    +import javax.mail.Address;
    +import javax.mail.Header;
    +import javax.mail.Message;
    +import javax.mail.MessagingException;
    +import javax.mail.Session;
    +import javax.mail.internet.MimeMessage;
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.lang.reflect.Array;
    +import java.util.ArrayList;
    +import java.util.Arrays;
    +import java.util.Collections;
    +import java.util.Date;
    +import java.util.Enumeration;
    +import java.util.HashMap;
    +import java.util.HashSet;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Properties;
    +import java.util.Set;
    +
    +
    +@EventDriven
    +@SideEffectFree
    +@Tags({"split", "email"})
    +@InputRequirement(Requirement.INPUT_REQUIRED)
    +@CapabilityDescription("Using the flowfile content as source of data, 
extract header from an RFC  compliant  email file adding the relevant 
attributes to the flowfile. " +
    +        "This processor does not perform extensive RFC validation but 
still requires a bare minimum compliance with RFC 2822")
    +@WritesAttributes({
    +        @WritesAttribute(attribute = "email.headers.bcc.*", description = 
"Each individual BCC recipient (if available)"),
    +        @WritesAttribute(attribute = "email.headers.cc.*", description = 
"Each individual CC recipient (if available)"),
    +        @WritesAttribute(attribute = "email.headers.from.*", description = 
"Each individual mailbox contained in the From  of the Email (array as per 
RFC-2822)"),
    +        @WritesAttribute(attribute = "email.headers.message-id", 
description = "The value of the Message-ID header (if available)"),
    +        @WritesAttribute(attribute = "email.headers.received_date", 
description = "The Received-Date of the message (if available)"),
    +        @WritesAttribute(attribute = "email.headers.sent_date", 
description = "Date the message was sent"),
    +        @WritesAttribute(attribute = "email.headers.subject", description 
= "Subject of the message (if available)"),
    +        @WritesAttribute(attribute = "email.headers.to.*", description = 
"Each individual TO recipient (if available)"),
    +        @WritesAttribute(attribute = "email.attachment_count", description 
= "Number of attachments of the message" )})
    +
    +public class ExtractEmailHeaders extends AbstractProcessor {
    +    public static final String EMAIL_HEADER_BCC = "email.headers.bcc";
    +    public static final String EMAIL_HEADER_CC = "email.headers.cc";
    +    public static final String EMAIL_HEADER_FROM = "email.headers.from";
    +    public static final String EMAIL_HEADER_MESSAGE_ID = 
"email.headers.message-id";
    +    public static final String EMAIL_HEADER_RECV_DATE = 
"email.headers.received_date";
    +    public static final String EMAIL_HEADER_SENT_DATE = 
"email.headers.sent_date";
    +    public static final String EMAIL_HEADER_SUBJECT = 
"email.headers.subject";
    +    public static final String EMAIL_HEADER_TO = "email.headers.to";
    +    public static final String EMAIL_ATTACHMENT_COUNT = 
"email.attachment_count";
    +
    +    public static final PropertyDescriptor CAPTURED_HEADERS = new 
PropertyDescriptor.Builder()
    +            .name("Header List")
    +            .description("COLON separated list of headers to be extracted 
from the flowfile  content. " +
    +                    "NOTE the header key is case insensitive and will be 
matched as lower-case." +
    +                    " Values will respect email contents.")
    +            .required(false)
    +            .expressionLanguageSupported(false)
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .defaultValue("x-mailer")
    +            .build();
    +
    +    public static final Relationship REL_SUCCESS = new 
Relationship.Builder()
    +            .name("success")
    +            .description("Extraction was successful")
    +            .build();
    +    public static final Relationship REL_FAILURE = new 
Relationship.Builder()
    +            .name("failure")
    +            .description("Flowfiles that could not be parsed as a RFC-2822 
compliant message")
    +            .build();
    +
    +    private Set<Relationship> relationships;
    +    private List<PropertyDescriptor> descriptors;
    +
    +    @Override
    +    protected void init(final ProcessorInitializationContext context) {
    +        final Set<Relationship> relationships = new HashSet<>();
    +        relationships.add(REL_SUCCESS);
    +        relationships.add(REL_FAILURE);
    +        this.relationships = Collections.unmodifiableSet(relationships);
    +
    +        final List<PropertyDescriptor> descriptors = new ArrayList<>();
    +
    +        descriptors.add(CAPTURED_HEADERS);
    +        this.descriptors = Collections.unmodifiableList(descriptors);
    +    }
    +
    +    @Override
    +    public void onTrigger(final ProcessContext context, final 
ProcessSession session) {
    +        final ComponentLog logger = getLogger();
    +
    +        final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    +        final List<FlowFile> processedFlowFilesList = new ArrayList<>();
    +
    +        final FlowFile originalFlowFile = session.get();
    +        if (originalFlowFile == null) {
    +            return;
    +        }
    +
    +        final List<String> capturedHeadersList = 
Arrays.asList(context.getProperty(CAPTURED_HEADERS).getValue().toLowerCase().split(":"));
    +
    +        final Map<String, String> attributes = new HashMap<>();
    +        session.read(originalFlowFile, new InputStreamCallback() {
    +            @Override
    +            public void process(final InputStream rawIn) throws 
IOException {
    +                try (final InputStream in = new 
BufferedInputStream(rawIn)) {
    +                    Properties props = new Properties();
    +                    Session mailSession = 
Session.getDefaultInstance(props, null);
    +                    MimeMessage originalMessage = new 
MimeMessage(mailSession, in);
    +                    MimeMessageParser parser = new 
MimeMessageParser(originalMessage).parse();
    +                    // RFC-2822 determines that a message must have a 
"From:" header
    +                    // if a message lacks the field, it is flagged as 
invalid
    +                    Address[] from = originalMessage.getFrom();
    +                    Date sentDate = originalMessage.getSentDate();
    +                    if (from == null || sentDate == null ) {
    +                        // See ya later my friend...
    --- End diff --
    
    addressed


> Create ListenSMTP & ExtractEmailAttachment processors
> -----------------------------------------------------
>
>                 Key: NIFI-1899
>                 URL: https://issues.apache.org/jira/browse/NIFI-1899
>             Project: Apache NiFi
>          Issue Type: New Feature
>            Reporter: Andre
>




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to