[
https://issues.apache.org/jira/browse/NIFI-5510?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16617076#comment-16617076
]
ASF GitHub Bot commented on NIFI-5510:
--------------------------------------
Github user zenfenan commented on a diff in the pull request:
https://github.com/apache/nifi/pull/2992#discussion_r217948182
--- Diff:
nifi-nar-bundles/nifi-cassandra-bundle/nifi-cassandra-processors/src/main/java/org/apache/nifi/processors/cassandra/PutCassandraRecord.java
---
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cassandra;
+
+import com.datastax.driver.core.BatchStatement;
+import com.datastax.driver.core.ConsistencyLevel;
+import com.datastax.driver.core.Session;
+import com.datastax.driver.core.exceptions.AuthenticationException;
+import com.datastax.driver.core.exceptions.NoHostAvailableException;
+import com.datastax.driver.core.querybuilder.Insert;
+import com.datastax.driver.core.querybuilder.QueryBuilder;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.annotation.lifecycle.OnShutdown;
+import org.apache.nifi.annotation.lifecycle.OnUnscheduled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.serialization.record.util.DataTypeUtils;
+import org.apache.nifi.util.StopWatch;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+@Tags({"cassandra", "cql", "put", "insert", "update", "set", "record"})
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@CapabilityDescription("Writes the content of the incoming FlowFile as
individual records to Apache Cassandra using native protocol version 3 or
higher.")
+public class PutCassandraRecord extends AbstractCassandraProcessor {
+
+ static final PropertyDescriptor RECORD_READER_FACTORY = new
PropertyDescriptor.Builder()
+ .name("put-cassandra-record-reader")
+ .displayName("Record Reader")
+ .description("Specifies the type of Record Reader controller
service to use for parsing the incoming data " +
+ "and determining the schema")
+ .identifiesControllerService(RecordReaderFactory.class)
+ .required(true)
+ .build();
+
+ static final PropertyDescriptor TABLE = new
PropertyDescriptor.Builder()
+ .name("put-cassandra-record-table")
+ .displayName("Table name")
+ .description("The name of the Cassandra table to which the
records have to be written.")
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_EL_VALIDATOR)
+
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+ .build();
+
+ static final PropertyDescriptor BATCH_SIZE = new
PropertyDescriptor.Builder()
+ .name("put-cassandra-record-batch-size")
+ .displayName("Batch size")
+ .description("Specifies the number of 'Insert statements' to
be grouped together to execute as a batch (BatchStatement)")
+ .defaultValue("100")
+ .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
+ .expressionLanguageSupported(ExpressionLanguageScope.NONE)
+ .required(true)
+ .build();
+
+ static final PropertyDescriptor BATCH_STATEMENT_TYPE = new
PropertyDescriptor.Builder()
+ .name("put-cassandra-record-batch-statement-type")
+ .displayName("Batch Statement Type")
+ .description("Specifies the type of 'Batch Statement' to be
used.")
+ .allowableValues(BatchStatement.Type.values())
+ .defaultValue(BatchStatement.Type.LOGGED.toString())
+ .required(false)
+ .build();
+
+ private final static List<PropertyDescriptor> propertyDescriptors =
Collections.unmodifiableList(Arrays.asList(
+ CONTACT_POINTS, KEYSPACE, TABLE, CLIENT_AUTH, USERNAME,
PASSWORD, RECORD_READER_FACTORY,
+ BATCH_SIZE, CONSISTENCY_LEVEL, BATCH_STATEMENT_TYPE,
PROP_SSL_CONTEXT_SERVICE));
+
+ private final static Set<Relationship> relationships =
Collections.unmodifiableSet(
+ new HashSet<>(Arrays.asList(REL_SUCCESS, REL_FAILURE)));
+
+ @Override
+ protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+ return propertyDescriptors;
+ }
+
+ @Override
+ public Set<Relationship> getRelationships() {
+ return relationships;
+ }
+
+ @OnScheduled
+ public void onScheduled(ProcessContext context) {
+ try {
+ connectToCassandra(context);
+ } catch (NoHostAvailableException nhae) {
+ getLogger().error("No host in the Cassandra cluster can be
contacted successfully to execute this statement", nhae);
+ getLogger().error(nhae.getCustomMessage(10, true, false));
+ throw new ProcessException(nhae);
+ } catch (AuthenticationException ae) {
+ getLogger().error("Invalid username/password combination", ae);
+ throw new ProcessException(ae);
+ }
+ }
+
+ @Override
+ public void onTrigger(ProcessContext context, ProcessSession session)
throws ProcessException {
+ FlowFile inputFlowFile = session.get();
+
+ if (inputFlowFile == null) {
+ return;
+ }
+
+ final String cassandraTable =
context.getProperty(TABLE).evaluateAttributeExpressions(inputFlowFile).getValue();
+ final RecordReaderFactory recordParserFactory =
context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
+ final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
+ final String batchStatementType =
context.getProperty(BATCH_STATEMENT_TYPE).getValue();
+ final String serialConsistencyLevel =
context.getProperty(CONSISTENCY_LEVEL).getValue();
+
+ final BatchStatement batchStatement;
+ final Session connectionSession = cassandraSession.get();
+ final AtomicInteger recordsAdded = new AtomicInteger(0);
+ final StopWatch stopWatch = new StopWatch(true);
+
+ boolean error = false;
+
+ try (final InputStream inputStream = session.read(inputFlowFile);
+ final RecordReader reader =
recordParserFactory.createRecordReader(inputFlowFile, inputStream,
getLogger())){
+
+ final RecordSchema schema = reader.getSchema();
+ Record record;
+
+ batchStatement = new
BatchStatement(BatchStatement.Type.valueOf(batchStatementType));
+
batchStatement.setSerialConsistencyLevel(ConsistencyLevel.valueOf(serialConsistencyLevel));
+
+ while((record = reader.nextRecord()) != null) {
+ Map<String, Object> recordContentMap = (Map<String,
Object>) DataTypeUtils
+ .convertRecordFieldtoObject(record,
RecordFieldType.RECORD.getRecordDataType(record.getSchema()));
+ Insert insertQuery =
QueryBuilder.insertInto(cassandraTable);
+ for (String fieldName : schema.getFieldNames()) {
+ insertQuery.value(fieldName,
recordContentMap.get(fieldName));
+ }
+ batchStatement.add(insertQuery);
+
+ if (recordsAdded.incrementAndGet() == batchSize) {
+ connectionSession.execute(batchStatement);
+ batchStatement.clear();
+ }
+ }
+
+ if (batchStatement.size() != 0) {
+ connectionSession.execute(batchStatement);
+ batchStatement.clear();
+ }
+
+ } catch (Exception e) {
+ error = true;
+ getLogger().error("Unable to write the records into Cassandra
table due to {}", new Object[] {e});
+ session.transfer(inputFlowFile, REL_FAILURE);
+ }
+
+ if (!error) {
+ stopWatch.stop();
+ long duration = stopWatch.getDuration(TimeUnit.MILLISECONDS);
+ String transitUri = "cassandra://" +
connectionSession.getCluster().getMetadata().getClusterName();
--- End diff --
Fair point. Will update it.
> Allow records to be put directly into Cassandra
> -----------------------------------------------
>
> Key: NIFI-5510
> URL: https://issues.apache.org/jira/browse/NIFI-5510
> Project: Apache NiFi
> Issue Type: New Feature
> Components: Extensions
> Reporter: Matt Burgess
> Assignee: Sivaprasanna Sethuraman
> Priority: Major
>
> Currently the standard way of getting data into Cassandra through NiFi is to
> use PutCassandraQL, which often means raw data needs to be converted to CQL
> statements, usually done (with modifications) via ConvertJSONToSQL.
> It would be better to have something closer to PutDatabaseRecord, a processor
> called PutCassandraRecord perhaps, that would take the raw data and input it
> into Cassandra "directly", without the need for the user to convert the data
> into CQL statements.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)