mattyb149 commented on a change in pull request #2861: NIFI-5248 Added new Elasticsearch json and record processors. URL: https://github.com/apache/nifi/pull/2861#discussion_r301308670
########## File path: nifi-nar-bundles/nifi-elasticsearch-bundle/nifi-elasticsearch-restapi-processors/src/main/java/org/apache/nifi/processors/elasticsearch/PutElasticsearchRecord.java ########## @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.elasticsearch; + +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.elasticsearch.ElasticSearchClientService; +import org.apache.nifi.elasticsearch.ElasticSearchError; +import org.apache.nifi.elasticsearch.IndexOperationRequest; +import org.apache.nifi.expression.ExpressionLanguageScope; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.record.path.FieldValue; +import org.apache.nifi.record.path.RecordPath; +import org.apache.nifi.record.path.RecordPathResult; +import org.apache.nifi.record.path.util.RecordPathCache; +import org.apache.nifi.record.path.validation.RecordPathValidator; +import org.apache.nifi.serialization.RecordReader; +import org.apache.nifi.serialization.RecordReaderFactory; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordFieldType; +import org.apache.nifi.serialization.record.util.DataTypeUtils; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) +@Tags({"json", "elasticsearch", "elasticsearch5", "elasticsearch6", "put", "index", "record"}) +@CapabilityDescription("A record-aware ElasticSearch put processor that uses the official Elastic REST client libraries.") +public class PutElasticsearchRecord extends AbstractProcessor implements ElasticSearchRestProcessor { + static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder() + .name("put-es-record-reader") + .displayName("Record Reader") + .description("The record reader to use for reading incoming records from flowfiles.") + .identifiesControllerService(RecordReaderFactory.class) + .required(true) + .build(); + + static final PropertyDescriptor OPERATION_RECORD_PATH = new PropertyDescriptor.Builder() Review comment: This still feels a little awkward to me for the case when you want a single operation. I like the idea of being able to specify a RecordPath for the case when you are doing a "bulk" operation but performing multiple types of operations inside it, but if I have a bunch of records I just want to update, I need to add a synthetic field to each with a value of `update`. If that field gets indexed/updated in Elasticsearch, then it's unnecessary for the single-operation case. If instead it gets removed before index/update, then if it wasn't synthetic (such as in a Change Data Capture case where the operation is an existing valid field that needs to be in the doc) then we are removing user data. Not sure the best way to tackle, but it seems like something you'd want to just pick from, like "use this RecordPath and keep the field in the doc" or "use this value after evaluating EL". If the incoming data is JSON this might be a moot point since one of the other processors can probably handle the single-operation case, but if it can't or the data isn't JSON, you'd need an UpdateRecord first, to convert the data and/or add a synthetic operation field. What do you think? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
