[
https://issues.apache.org/jira/browse/NIFI-3644?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15955644#comment-15955644
]
ASF GitHub Bot commented on NIFI-3644:
--------------------------------------
Github user bbende commented on a diff in the pull request:
https://github.com/apache/nifi/pull/1645#discussion_r109727737
--- Diff:
nifi-nar-bundles/nifi-standard-services/nifi-hbase_1_1_2-client-service-bundle/nifi-hbase_1_1_2-client-service/src/main/java/org/apache/nifi/hbase/HBase_1_1_2_ClientMapCacheService.java
---
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.hbase;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnEnabled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.controller.AbstractControllerService;
+import org.apache.nifi.controller.ConfigurationContext;
+
+import org.apache.nifi.distributed.cache.client.DistributedMapCacheClient;
+import org.apache.nifi.distributed.cache.client.Serializer;
+import org.apache.nifi.distributed.cache.client.Deserializer;
+import java.io.ByteArrayOutputStream;
+import org.apache.nifi.reporting.InitializationException;
+
+import java.nio.charset.StandardCharsets;
+import org.apache.nifi.hbase.scan.ResultCell;
+import org.apache.nifi.hbase.scan.ResultHandler;
+import org.apache.nifi.hbase.scan.Column;
+import org.apache.nifi.hbase.put.PutColumn;
+
+
+import org.apache.nifi.processor.util.StandardValidators;
+
+@Tags({"distributed", "cache", "state", "map", "cluster","hbase"})
+@SeeAlso(classNames =
{"org.apache.nifi.distributed.cache.server.map.DistributedMapCacheClient",
"org.apache.nifi.hbase.HBase_1_1_2_ClientService"})
+@CapabilityDescription("Provides the ability to use an HBase table as a
cache, in place of a DistributedMapCache."
+ + " Uses a HBase_1_1_2_ClientService controller to communicate with
HBase.")
+
+public class HBase_1_1_2_ClientMapCacheService extends
AbstractControllerService implements DistributedMapCacheClient {
+
+ static final PropertyDescriptor HBASE_CLIENT_SERVICE = new
PropertyDescriptor.Builder()
+ .name("HBase Client Service")
+ .description("Specifies the HBase Client Controller Service to use
for accessing HBase.")
+ .required(true)
+ .identifiesControllerService(HBaseClientService.class)
+ .build();
+
+ public static final PropertyDescriptor HBASE_CACHE_TABLE_NAME = new
PropertyDescriptor.Builder()
+ .name("HBase Cache Table Name")
+ .description("Name of the table on HBase to use for the cache.")
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor HBASE_COLUMN_FAMILY = new
PropertyDescriptor.Builder()
+ .name("HBase Column Family")
+ .description("Name of the column family on HBase to use for the
cache.")
+ .required(true)
+ .defaultValue("f")
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor HBASE_COLUMN_QUALIFIER = new
PropertyDescriptor.Builder()
+ .name("HBase Column Qualifier")
+ .description("Name of the column qualifier on HBase to use for the
cache")
+ .defaultValue("q")
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ @Override
+ protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+ final List<PropertyDescriptor> descriptors = new ArrayList<>();
+ descriptors.add(HBASE_CACHE_TABLE_NAME);
+ descriptors.add(HBASE_CLIENT_SERVICE);
+ descriptors.add(HBASE_COLUMN_FAMILY);
+ descriptors.add(HBASE_COLUMN_QUALIFIER);
+ return descriptors;
+ }
+
+ private String hBaseCacheTableName;
--- End diff --
Since all these member variables are set in @OnEnabled, they should all be
marked as volatile since different threads can call OnEnabled vs the actual
methods of the service, volatile forces the variable to be read fresh and
ensures that the other thread sees the correct value.
> Add DetectDuplicateUsingHBase processor
> ---------------------------------------
>
> Key: NIFI-3644
> URL: https://issues.apache.org/jira/browse/NIFI-3644
> Project: Apache NiFi
> Issue Type: Improvement
> Components: Extensions
> Reporter: Bjorn Olsen
> Priority: Minor
>
> The DetectDuplicate processor makes use of a distributed map cache for
> maintaining a list of unique file identifiers (such as hashes).
> The distributed map cache functionality could be provided by an HBase table,
> which then allows for reliably storing a huge volume of file identifiers and
> auditing information. The downside of this approach is of course that HBase
> is required.
> Storing the unique file identifiers in a reliable, query-able manner along
> with some audit information is of benefit to several use cases.
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)