Github user bdesert commented on a diff in the pull request:
https://github.com/apache/nifi/pull/2478#discussion_r170697126
--- Diff:
nifi-nar-bundles/nifi-hbase-bundle/nifi-hbase-processors/src/main/java/org/apache/nifi/hbase/ScanHBase.java
---
@@ -0,0 +1,564 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.hbase;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.hbase.io.JsonFullRowSerializer;
+import org.apache.nifi.hbase.io.JsonQualifierAndValueRowSerializer;
+import org.apache.nifi.hbase.io.RowSerializer;
+import org.apache.nifi.hbase.scan.Column;
+import org.apache.nifi.hbase.scan.ResultCell;
+import org.apache.nifi.hbase.scan.ResultHandler;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.util.Tuple;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Pattern;
+
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@Tags({"hbase", "scan", "fetch", "get"})
+@CapabilityDescription("Scans and fetches rows from an HBase table. This
processor may be used to fetch rows from hbase table by specifying a range of
rowkey values (start and/or end ),"
+ + "by time range, by filter expression, or any combination of
them. "
+ + "Order of records can be controlled by a property Reversed"
+ + "Number of rows retrieved by the processor can be limited.")
+@WritesAttributes({
+ @WritesAttribute(attribute = "hbase.table", description = "The
name of the HBase table that the row was fetched from"),
+ @WritesAttribute(attribute = "hbase.resultset", description = "A
JSON document/s representing the row/s. This property is only written when a
Destination of flowfile-attributes is selected."),
+ @WritesAttribute(attribute = "mime.type", description = "Set to
application/json when using a Destination of flowfile-content, not set or
modified otherwise"),
+ @WritesAttribute(attribute = "hbase.rows.count", description =
"Number of rows in the content of given flow file"),
+ @WritesAttribute(attribute = "scanhbase.results.found",
description = "Indicates whether at least one row has been found in given hbase
table with provided conditions. "
+ + "Could be null (not present) if transfered to FAILURE")
+})
+public class ScanHBase extends AbstractProcessor {
+ //enhanced regex for columns to allow "-" in column qualifier names
+ static final Pattern COLUMNS_PATTERN =
Pattern.compile("\\w+(:(\\w|-)+)?(?:,\\w+(:(\\w|-)+)?)*");
+ static final byte[] nl = System.lineSeparator().getBytes();
+
+ static final PropertyDescriptor HBASE_CLIENT_SERVICE = new
PropertyDescriptor.Builder()
+ .displayName("HBase Client Service")
+ .name("scanhbase-client-service")
+ .description("Specifies the Controller Service to use for
accessing HBase.")
+ .required(true)
+ .identifiesControllerService(HBaseClientService.class)
+ .build();
+
+ static final PropertyDescriptor TABLE_NAME = new
PropertyDescriptor.Builder()
+ .displayName("Table Name")
+ .name("scanhbase-table-name")
+ .description("The name of the HBase Table to fetch from.")
+ .required(true)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor START_ROW = new
PropertyDescriptor.Builder()
+ .displayName("Start rowkey")
+ .name("scanhbase-start-rowkey")
+ .description("The rowkey to start scan from.")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor END_ROW = new
PropertyDescriptor.Builder()
+ .displayName("End rowkey")
+ .name("scanhbase-end-rowkey")
+ .description("The row key to end scan by.")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor TIME_RANGE_MIN = new
PropertyDescriptor.Builder()
+ .displayName("Time range min")
+ .name("scanhbase-time-range-min")
+ .description("Time range min value. Both min and max values
for time range should be either blank or provided.")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.LONG_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor TIME_RANGE_MAX = new
PropertyDescriptor.Builder()
+ .displayName("Time range max")
+ .name("scanhbase-time-range-max")
+ .description("Time range max value. Both min and max values
for time range should be either blank or provided.")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.LONG_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor LIMIT_ROWS = new
PropertyDescriptor.Builder()
+ .displayName("Limit rows")
+ .name("scanhbase-limit")
+ .description("Limit number of rows retrieved by scan.")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.INTEGER_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor BULK_SIZE = new
PropertyDescriptor.Builder()
+ .displayName("Max rows per flow file")
+ .name("scanhbase-bulk-size")
+ .description("Limits number of rows in single flow file
content. Set to 0 to avoid multiple flow files.")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .defaultValue("0")
+ .addValidator(StandardValidators.INTEGER_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor REVERSED_SCAN = new
PropertyDescriptor.Builder()
+ .displayName("Reversed order")
+ .name("scanhbase-reversed-order")
+ .description("Set whether this scan is a reversed one. This is
false by default which means forward(normal) scan.")
+ .expressionLanguageSupported(false)
+ .allowableValues("true", "false")
+ .required(false)
+ .defaultValue("false")
+ .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor FILTER_EXPRESSION = new
PropertyDescriptor.Builder()
+ .displayName("Filter expression")
+ .name("scanhbase-filter-expression")
+ .description("An HBase filter expression that will be applied
to the scan. This property can not be used when also using the Columns
property. "
+ + "Example: \"ValueFilter( =, 'binaryprefix:commit'
)\"")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor COLUMNS = new
PropertyDescriptor.Builder()
+ .displayName("Columns")
+ .name("scanhbase-columns")
+ .description("An optional comma-separated list of
\"<colFamily>:<colQualifier>\" pairs to fetch. To return all columns " +
+ "for a given family, leave off the qualifier such as
\"<colFamily1>,<colFamily2>\".")
+ .required(false)
+ .expressionLanguageSupported(true)
+
.addValidator(StandardValidators.createRegexMatchingValidator(COLUMNS_PATTERN))
+ .build();
+
+ static final AllowableValue JSON_FORMAT_FULL_ROW = new
AllowableValue("full-row", "full-row",
+ "Creates a JSON document with the format: {\"row\":<row-id>,
\"cells\":[{\"fam\":<col-fam>, \"qual\":<col-val>, \"val\":<value>,
\"ts\":<timestamp>}]}.");
+ static final AllowableValue JSON_FORMAT_QUALIFIER_AND_VALUE = new
AllowableValue("col-qual-and-val", "col-qual-and-val",
+ "Creates a JSON document with the format:
{\"<col-qual>\":\"<value>\", \"<col-qual>\":\"<value>\".");
+
+ static final PropertyDescriptor JSON_FORMAT = new
PropertyDescriptor.Builder()
+ .displayName("JSON Format")
+ .name("scanhbase-json-format")
+ .description("Specifies how to represent the HBase row as a
JSON document.")
+ .required(true)
+ .allowableValues(JSON_FORMAT_FULL_ROW,
JSON_FORMAT_QUALIFIER_AND_VALUE)
+ .defaultValue(JSON_FORMAT_FULL_ROW.getValue())
+ .build();
+
+ static final PropertyDescriptor DECODE_CHARSET = new
PropertyDescriptor.Builder()
+ .displayName("Decode Character Set")
+ .name("scanhbase-decode-charset")
+ .description("The character set used to decode data from
HBase.")
+ .required(true)
+ .defaultValue("UTF-8")
+ .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor ENCODE_CHARSET = new
PropertyDescriptor.Builder()
+ .displayName("Encode Character Set")
+ .name("scanhbase-encode-charset")
+ .description("The character set used to encode the JSON
representation of the row.")
+ .required(true)
+ .defaultValue("UTF-8")
+ .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+ .build();
+
+ public static final Relationship REL_ORIGINAL = new
Relationship.Builder()
+ .name("original")
+ .description("The original input file will be routed to this
destination, even if no rows are retrieved based on provided conditions.")
+ .build();
+
+ static final Relationship REL_SUCCESS = new Relationship.Builder()
+ .name("success")
+ .description("All successful fetches are routed to this
relationship.")
+ .build();
+ static final Relationship REL_FAILURE = new Relationship.Builder()
+ .name("failure")
+ .description("All failed fetches are routed to this
relationship.")
+ .build();
+
+ static final String HBASE_TABLE_ATTR = "hbase.table";
+ static final String HBASE_ROWS_COUNT_ATTR = "hbase.rows.count";
+
+ static final List<PropertyDescriptor> properties;
+ static {
+ List<PropertyDescriptor> props = new ArrayList<>();
+ props.add(HBASE_CLIENT_SERVICE);
+ props.add(TABLE_NAME);
+ props.add(START_ROW);
+ props.add(END_ROW);
+ props.add(TIME_RANGE_MIN);
+ props.add(TIME_RANGE_MAX);
+ props.add(LIMIT_ROWS);
+ props.add(REVERSED_SCAN);
+ props.add(BULK_SIZE);
+ props.add(FILTER_EXPRESSION);
+ props.add(COLUMNS);
+ props.add(JSON_FORMAT);
+ props.add(ENCODE_CHARSET);
+ props.add(DECODE_CHARSET);
+ properties = Collections.unmodifiableList(props);
+ }
+
+ static final Set<Relationship> relationships;
+ static {
+ Set<Relationship> rels = new HashSet<>();
+ rels.add(REL_SUCCESS);
+ rels.add(REL_ORIGINAL);
+ rels.add(REL_FAILURE);
+ relationships = Collections.unmodifiableSet(rels);
+ }
+
+ private volatile Charset decodeCharset;
+ private volatile Charset encodeCharset;
+ private RowSerializer serializer = null;
+
+ @OnScheduled
+ public void onScheduled(ProcessContext context) {
+ this.decodeCharset =
Charset.forName(context.getProperty(DECODE_CHARSET).getValue());
+ this.encodeCharset =
Charset.forName(context.getProperty(ENCODE_CHARSET).getValue());
+
+ final String jsonFormat =
context.getProperty(JSON_FORMAT).getValue();
+ if (jsonFormat.equals(JSON_FORMAT_FULL_ROW.getValue())) {
+ this.serializer = new JsonFullRowSerializer(decodeCharset,
encodeCharset);
+ } else {
+ this.serializer = new
JsonQualifierAndValueRowSerializer(decodeCharset, encodeCharset);
+ }
+ }
+
+ @Override
+ protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+ return properties;
+ }
+
+ @Override
+ public Set<Relationship> getRelationships() {
+ return relationships;
+ }
+
+ @Override
+ protected Collection<ValidationResult>
customValidate(ValidationContext validationContext) {
+
+ final List<ValidationResult> problems = new ArrayList<>();
+
+ final String columns =
validationContext.getProperty(COLUMNS).getValue();
+ final String filter =
validationContext.getProperty(FILTER_EXPRESSION).getValue();
+
+ if (!StringUtils.isBlank(columns) && !StringUtils.isBlank(filter))
{
+ problems.add(new ValidationResult.Builder()
+ .subject(FILTER_EXPRESSION.getDisplayName())
+ .input(filter).valid(false)
+ .explanation("A filter expression can not be used in
conjunction with the Columns property")
+ .build());
+ }
+
--- End diff --
Since it supports EL, I validate this condition in onTrigger (lines
348-356). If you think we should add it also to customValidate, I can add it
---