[jira] [Commented] (NIFI-2068) Add Elasticsearch processors that use the REST API

ASF GitHub Bot (JIRA) Fri, 01 Jul 2016 14:04:21 -0700

    [ 
https://issues.apache.org/jira/browse/NIFI-2068?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15359627#comment-15359627
 ]


ASF GitHub Bot commented on NIFI-2068:
--------------------------------------

Github user mattyb149 commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/576#discussion_r69355562
  
    --- Diff: 
nifi-nar-bundles/nifi-elasticsearch-bundle/nifi-elasticsearch-processors/src/main/java/org/apache/nifi/processors/elasticsearch/PutElasticsearchHttp.java
 ---
    @@ -0,0 +1,330 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.nifi.processors.elasticsearch;
    +
    +import com.squareup.okhttp.MediaType;
    +import com.squareup.okhttp.OkHttpClient;
    +import com.squareup.okhttp.RequestBody;
    +import com.squareup.okhttp.Response;
    +import com.squareup.okhttp.ResponseBody;
    +import org.apache.commons.io.IOUtils;
    +import org.apache.nifi.annotation.behavior.EventDriven;
    +import org.apache.nifi.annotation.behavior.InputRequirement;
    +import org.apache.nifi.annotation.behavior.SupportsBatching;
    +import org.apache.nifi.annotation.documentation.CapabilityDescription;
    +import org.apache.nifi.annotation.documentation.Tags;
    +import org.apache.nifi.annotation.lifecycle.OnScheduled;
    +import org.apache.nifi.components.PropertyDescriptor;
    +import org.apache.nifi.components.ValidationContext;
    +import org.apache.nifi.components.ValidationResult;
    +import org.apache.nifi.expression.AttributeExpression;
    +import org.apache.nifi.flowfile.FlowFile;
    +import org.apache.nifi.logging.ComponentLog;
    +import org.apache.nifi.processor.ProcessContext;
    +import org.apache.nifi.processor.ProcessSession;
    +import org.apache.nifi.processor.Relationship;
    +import org.apache.nifi.processor.exception.ProcessException;
    +import org.apache.nifi.processor.util.StandardValidators;
    +import org.apache.nifi.stream.io.ByteArrayInputStream;
    +import org.apache.nifi.util.StringUtils;
    +import org.codehaus.jackson.JsonNode;
    +import org.codehaus.jackson.node.ArrayNode;
    +
    +import java.io.IOException;
    +import java.net.URL;
    +import java.nio.charset.Charset;
    +import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.HashSet;
    +import java.util.LinkedList;
    +import java.util.List;
    +import java.util.Set;
    +
    +import static org.apache.commons.lang3.StringUtils.trimToEmpty;
    +
    +
    +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
    +@EventDriven
    +@SupportsBatching
    +@Tags({"elasticsearch", "insert", "update", "upsert", "delete", "write", 
"put", "http"})
    +@CapabilityDescription("Writes the contents of a FlowFile to 
Elasticsearch, using the specified parameters such as "
    +        + "the index to insert into and the type of the document.")
    +public class PutElasticsearchHttp extends 
AbstractElasticsearchHttpProcessor {
    +
    +    public static final Relationship REL_SUCCESS = new 
Relationship.Builder().name("success")
    +            .description("All FlowFiles that are written to Elasticsearch 
are routed to this relationship").build();
    +
    +    public static final Relationship REL_FAILURE = new 
Relationship.Builder().name("failure")
    +            .description("All FlowFiles that cannot be written to 
Elasticsearch are routed to this relationship").build();
    +
    +    public static final Relationship REL_RETRY = new 
Relationship.Builder().name("retry")
    +            .description("A FlowFile is routed to this relationship if the 
database cannot be updated but attempting the operation again may succeed")
    +            .build();
    +
    +    public static final PropertyDescriptor ID_ATTRIBUTE = new 
PropertyDescriptor.Builder()
    +            .name("Identifier Attribute")
    +            .description("The name of the FlowFile attribute containing 
the identifier for the document. If the Index Operation is \"index\", "
    +                    + "this property may be left empty or evaluate to an 
empty value, in which case the document's identifier will be "
    +                    + "auto-generated by Elasticsearch. For all other 
Index Operations, the attribute must evaluate to a non-empty value.")
    +            .required(false)
    +            .expressionLanguageSupported(false)
    +            .addValidator(StandardValidators.ATTRIBUTE_KEY_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor INDEX = new 
PropertyDescriptor.Builder()
    +            .name("Index")
    +            .description("The name of the index to insert into")
    +            .required(true)
    +            .expressionLanguageSupported(true)
    +            
.addValidator(StandardValidators.createAttributeExpressionLanguageValidator(
    +                    AttributeExpression.ResultType.STRING, true))
    +            .build();
    +
    +    public static final PropertyDescriptor TYPE = new 
PropertyDescriptor.Builder()
    +            .name("Type")
    +            .description("The type of this document (used by Elasticsearch 
for indexing and searching)")
    +            .required(true)
    +            .expressionLanguageSupported(true)
    +            
.addValidator(StandardValidators.createAttributeExpressionLanguageValidator(
    +                    AttributeExpression.ResultType.STRING, true))
    +            .build();
    +
    +    public static final PropertyDescriptor INDEX_OP = new 
PropertyDescriptor.Builder()
    +            .name("Index Operation")
    +            .description("The type of the operation used to index (index, 
update, upsert, delete)")
    +            .required(true)
    +            .expressionLanguageSupported(true)
    +            
.addValidator(StandardValidators.createAttributeExpressionLanguageValidator(
    +                    AttributeExpression.ResultType.STRING, true))
    +            .defaultValue("index")
    +            .build();
    +
    +    public static final PropertyDescriptor BATCH_SIZE = new 
PropertyDescriptor.Builder()
    +            .name("Batch Size")
    +            .description("The preferred number of FlowFiles to put to the 
database in a single transaction")
    +            .required(true)
    +            .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
    +            .defaultValue("100")
    +            .build();
    +
    +    @Override
    +    public Set<Relationship> getRelationships() {
    +        final Set<Relationship> relationships = new HashSet<>();
    +        relationships.add(REL_SUCCESS);
    +        relationships.add(REL_FAILURE);
    +        relationships.add(REL_RETRY);
    +        return Collections.unmodifiableSet(relationships);
    +    }
    +
    +    @Override
    +    public final List<PropertyDescriptor> 
getSupportedPropertyDescriptors() {
    +        final List<PropertyDescriptor> descriptors = new ArrayList<>();
    +        descriptors.add(ES_URL);
    +        descriptors.add(PROP_SSL_CONTEXT_SERVICE);
    +        descriptors.add(CONNECT_TIMEOUT);
    +        descriptors.add(RESPONSE_TIMEOUT);
    +        descriptors.add(ID_ATTRIBUTE);
    +        descriptors.add(INDEX);
    +        descriptors.add(TYPE);
    +        descriptors.add(CHARSET);
    +        descriptors.add(BATCH_SIZE);
    +        descriptors.add(INDEX_OP);
    +        return Collections.unmodifiableList(descriptors);
    +    }
    +
    +    @Override
    +    protected Collection<ValidationResult> 
customValidate(ValidationContext validationContext) {
    +        final List<ValidationResult> problems = new 
ArrayList<>(super.customValidate(validationContext));
    +        // Since Expression Language is allowed for index operation, we 
can't guarantee that we can catch
    +        // all invalid configurations, but we should catch them as soon as 
we can. For example, if the
    +        // Identifier Attribute property is empty, the Index Operation 
must evaluate to "index".
    +        String idAttribute = 
validationContext.getProperty(ID_ATTRIBUTE).getValue();
    +        String indexOp = 
validationContext.getProperty(INDEX_OP).getValue();
    +
    +        if(StringUtils.isEmpty(idAttribute)) {
    +            if(StringUtils.isEmpty(indexOp)) {
    +                problems.add(new ValidationResult.Builder()
    +                        .valid(false)
    +                        .subject(INDEX_OP.getDisplayName())
    +                        .explanation("If Identifier Attribute is not set, 
Index Operation must evaluate to \"index\"")
    +                        .build());
    +            }
    +        }
    +        return problems;
    +    }
    +
    +    @OnScheduled
    +    public void setup(ProcessContext context) {
    +        super.setup(context);
    +    }
    +
    +    @Override
    +    public void onTrigger(final ProcessContext context, final 
ProcessSession session) throws ProcessException {
    +        final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    +        final String id_attribute = 
context.getProperty(ID_ATTRIBUTE).getValue();
    +        final Charset charset = 
Charset.forName(context.getProperty(CHARSET).getValue());
    +
    +        final List<FlowFile> flowFiles = session.get(batchSize);
    +        if (flowFiles.isEmpty()) {
    +            return;
    +        }
    +
    +        OkHttpClient okHttpClient = getClient();
    +        final ComponentLog logger = getLogger();
    +
    +        // Keep track of the list of flow files that need to be 
transferred. As they are transferred, remove them from the list.
    +        List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
    +        try {
    +            final StringBuilder sb = new StringBuilder();
    +            final String baseUrl = 
trimToEmpty(context.getProperty(ES_URL).getValue());
    +            final URL url = new URL((baseUrl.endsWith("/") ? baseUrl : 
baseUrl + "/") + "_bulk");
    +
    +            for (FlowFile file : flowFiles) {
    +                final String index = 
context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
    +                if (StringUtils.isEmpty(index)) {
    +                    logger.error("No value for index in for {}, 
transferring to failure", new Object[]{id_attribute, file});
    +                    flowFilesToTransfer.remove(file);
    +                    session.transfer(file, REL_FAILURE);
    +                    continue;
    +                }
    +                final String docType = 
context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
    +                String indexOp = 
context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
    +                if (StringUtils.isEmpty(indexOp)) {
    +                    logger.error("No Index operation specified for {}, 
transferring to failure.", new Object[]{file});
    +                    flowFilesToTransfer.remove(file);
    +                    file = session.penalize(file);
    +                    session.transfer(file, REL_FAILURE);
    +                    continue;
    +                }
    +
    +                switch (indexOp.toLowerCase()) {
    +                    case "index":
    +                    case "update":
    +                    case "upsert":
    +                    case "delete":
    +                        break;
    +                    default:
    +                        logger.error("Index operation {} not supported for 
{}, transferring to failure.", new Object[]{indexOp, file});
    +                        flowFilesToTransfer.remove(file);
    +                        file = session.penalize(file);
    +                        session.transfer(file, REL_FAILURE);
    +                        continue;
    +                }
    +
    +                final String id = (id_attribute != null) ? 
file.getAttribute(id_attribute) : null;
    +
    +                // The ID must be valid for all operations except "index". 
For that case,
    +                // a missing ID indicates one is to be auto-generated by 
Elasticsearch
    +                if (id == null && !indexOp.equalsIgnoreCase("index")) {
    +                    logger.error("Index operation {} requires a valid 
identifier value from a flow file attribute, transferring to failure.",
    +                            new Object[]{indexOp, file});
    +                    flowFilesToTransfer.remove(file);
    +                    file = session.penalize(file);
    +                    session.transfer(file, REL_FAILURE);
    +                    continue;
    +                }
    +
    +                final StringBuilder json = new StringBuilder();
    +                session.read(file, in -> {
    +                    json.append(IOUtils.toString(in, 
charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' '));
    +                });
    +                if (indexOp.equalsIgnoreCase("index")) {
    +                    sb.append("{\"index\": { \"_index\": \"");
    +                    sb.append(index);
    +                    sb.append("\", \"_type\": \"");
    +                    sb.append(docType);
    +                    sb.append("\"");
    +                    if (!StringUtils.isEmpty(id)) {
    +                        sb.append(", \"_id\": \"");
    +                        sb.append(id);
    +                        sb.append("\"");
    +                    }
    +                    sb.append("}}\n");
    +                    sb.append(json);
    +                    sb.append("\n");
    +                } else if (indexOp.equalsIgnoreCase("upsert") || 
indexOp.equalsIgnoreCase("update")) {
    +                    sb.append("{\"update\": { \"_index\": \"");
    +                    sb.append(index);
    +                    sb.append("\", \"_type\": \"");
    +                    sb.append(docType);
    +                    sb.append("\", \"_id\": \"");
    +                    sb.append(id);
    +                    sb.append("\" }\n");
    +                    sb.append("{\"doc\": ");
    +                    sb.append(json);
    +                    sb.append(", \"doc_as_upsert\": ");
    +                    sb.append(indexOp.equalsIgnoreCase("upsert"));
    +                    sb.append(" }\n");
    +                } else if (indexOp.equalsIgnoreCase("delete")) {
    +                    sb.append("{\"delete\": { \"_index\": \"");
    +                    sb.append(index);
    +                    sb.append("\", \"_type\": \"");
    +                    sb.append(docType);
    +                    sb.append("\", \"_id\": \"");
    +                    sb.append(id);
    +                    sb.append("\" }\n");
    +                }
    +            }
    +            if (!flowFilesToTransfer.isEmpty()) {
    +                RequestBody requestBody = 
RequestBody.create(MediaType.parse("application/json"), sb.toString());
    +                final Response getResponse = 
sendRequestToElasticsearch(okHttpClient, url, "PUT", requestBody);
    +                final int statusCode = getResponse.code();
    +
    +                if (isSuccess(statusCode)) {
    +                    ResponseBody responseBody = getResponse.body();
    +                    final byte[] bodyBytes = responseBody.bytes();
    +                    JsonNode responseJson = parseJsonResponse(new 
ByteArrayInputStream(bodyBytes));
    +                    boolean errors = 
responseJson.get("errors").asBoolean(false);
    +                    if (errors) {
    +                        ArrayNode itemNodeArray = (ArrayNode) 
responseJson.get("items");
    +                        if (itemNodeArray.size() > 0) {
    +                            // All items are returned whether they 
succeeded or failed, so iterate through the item array
    +                            // at the same time as the flow file list, 
moving each to success or failure accordingly
    +                            for (int i = 0; i < itemNodeArray.size(); i++) 
{
    +                                JsonNode itemNode = itemNodeArray.get(i);
    +                                FlowFile flowFile = 
flowFilesToTransfer.remove(i);
    +                                int status = 
itemNode.findPath("status").asInt();
    +                                if (!isSuccess(status)) {
    +                                    String reason = 
itemNode.findPath("//error/reason").asText();
    +                                    logger.error("Failed to insert {} into 
Elasticsearch due to {}, transferring to failure",
    +                                            new Object[]{flowFile, 
reason});
    +                                    session.transfer(flowFile, 
REL_FAILURE);
    +
    +                                } else {
    +                                    session.transfer(flowFile, 
REL_SUCCESS);
    +                                    // Record provenance event
    +                                    
session.getProvenanceReporter().send(flowFile, url.toString());
    +                                }
    +                            }
    +                        }
    +                    }
    +                    // Transfer any remaining flowfiles to success
    +                    session.transfer(flowFilesToTransfer, REL_SUCCESS);
    --- End diff --
    
    Will add provenance events


> Add Elasticsearch processors that use the REST API
> --------------------------------------------------
>
>                 Key: NIFI-2068
>                 URL: https://issues.apache.org/jira/browse/NIFI-2068
>             Project: Apache NiFi
>          Issue Type: Improvement
>            Reporter: Matt Burgess
>            Assignee: Matt Burgess
>             Fix For: 1.0.0
>
>
> The current Elasticsearch processors use the Transport Client, and as a 
> result there can be some compatibility issues between multiple versions of ES 
> clusters. The REST API is much more standard between versions, so it would be 
> nice to have ES processors that use the REST API, to enable things like 
> migration from an Elasticsearch cluster with an older version to a cluster 
> with a newer version.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

[jira] [Commented] (NIFI-2068) Add Elasticsearch processors that use the REST API

Reply via email to