granthenke commented on a change in pull request #5020:
URL: https://github.com/apache/nifi/pull/5020#discussion_r618059261
##########
File path:
nifi-nar-bundles/nifi-kudu-bundle/nifi-kudu-processors/src/main/java/org/apache/nifi/processors/kudu/AbstractKuduProcessor.java
##########
@@ -184,10 +200,25 @@ protected KuduClient buildClient(final ProcessContext
context) {
final String masters =
context.getProperty(KUDU_MASTERS).evaluateAttributeExpressions().getValue();
final int operationTimeout =
context.getProperty(KUDU_OPERATION_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
final int adminOperationTimeout =
context.getProperty(KUDU_KEEP_ALIVE_PERIOD_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
+ final int workerCount = context.getProperty(WORKER_COUNT).asInteger();
+
+ // Create Executor following approach of
Executors.newCachedThreadPool() using worker count as maximum pool size
+ final int corePoolSize = 0;
+ final long threadKeepAliveTime = 60;
+ final Executor nioExecutor = new ThreadPoolExecutor(
Review comment:
Is this required vs just setting the workerCount on the client? IIUC the
client uses an unbound cached threadpool, but the NioEventLoopGroup will never
user more than workerCount threads.
##########
File path:
nifi-nar-bundles/nifi-kudu-bundle/nifi-kudu-processors/src/main/java/org/apache/nifi/processors/kudu/AbstractKuduProcessor.java
##########
@@ -184,10 +200,25 @@ protected KuduClient buildClient(final ProcessContext
context) {
final String masters =
context.getProperty(KUDU_MASTERS).evaluateAttributeExpressions().getValue();
final int operationTimeout =
context.getProperty(KUDU_OPERATION_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
final int adminOperationTimeout =
context.getProperty(KUDU_KEEP_ALIVE_PERIOD_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
+ final int workerCount = context.getProperty(WORKER_COUNT).asInteger();
+
+ // Create Executor following approach of
Executors.newCachedThreadPool() using worker count as maximum pool size
+ final int corePoolSize = 0;
+ final long threadKeepAliveTime = 60;
+ final Executor nioExecutor = new ThreadPoolExecutor(
+ corePoolSize,
+ workerCount,
+ threadKeepAliveTime,
+ TimeUnit.SECONDS,
+ new SynchronousQueue<>(),
+ new ClientThreadFactory(getIdentifier())
+ );
return new KuduClient.KuduClientBuilder(masters)
.defaultOperationTimeoutMs(operationTimeout)
- .defaultSocketReadTimeoutMs(adminOperationTimeout)
+ .defaultAdminOperationTimeoutMs(adminOperationTimeout)
Review comment:
This code looks suspect to me it's called adminOperationTimeout but the
property is called KUDU_KEEP_ALIVE_PERIOD_TIMEOUT_MS. Not sure what this is and
it probably should be cleaned up, but maybe in a seperate change.
##########
File path:
nifi-nar-bundles/nifi-kudu-bundle/nifi-kudu-processors/src/main/java/org/apache/nifi/processors/kudu/PutKudu.java
##########
@@ -342,38 +346,73 @@ public void onTrigger(final ProcessContext context, final
ProcessSession session
final KerberosUser user = getKerberosUser();
if (user == null) {
- executeOnKuduClient(kuduClient -> trigger(context, session,
flowFiles, kuduClient));
+ executeOnKuduClient(kuduClient -> processFlowFiles(context,
session, flowFiles, kuduClient));
return;
}
final PrivilegedExceptionAction<Void> privilegedAction = () -> {
- executeOnKuduClient(kuduClient -> trigger(context, session,
flowFiles, kuduClient));
+ executeOnKuduClient(kuduClient -> processFlowFiles(context,
session, flowFiles, kuduClient));
return null;
};
final KerberosAction<Void> action = new KerberosAction<>(user,
privilegedAction, getLogger());
action.execute();
}
- private void trigger(final ProcessContext context, final ProcessSession
session, final List<FlowFile> flowFiles, KuduClient kuduClient) throws
ProcessException {
- final RecordReaderFactory recordReaderFactory =
context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
+ private void processFlowFiles(final ProcessContext context, final
ProcessSession session, final List<FlowFile> flowFiles, final KuduClient
kuduClient) {
+ final Map<FlowFile, Integer> processedRecords = new HashMap<>();
+ final Map<FlowFile, Object> flowFileFailures = new HashMap<>();
+ final Map<Operation, FlowFile> operationFlowFileMap = new HashMap<>();
+ final List<RowError> pendingRowErrors = new ArrayList<>();
final KuduSession kuduSession = createKuduSession(kuduClient);
+ try {
+ processFlowFiles(flowFiles,
+ processedRecords,
+ flowFileFailures,
+ operationFlowFileMap,
+ pendingRowErrors,
+ session,
+ context,
+ kuduClient,
+ kuduSession);
+ } finally {
+ try {
+ flushKuduSession(kuduSession, true, pendingRowErrors);
+ } catch (final KuduException|RuntimeException e) {
+ getLogger().error("KuduSession.close() Failed", e);
+ }
+ }
- final Map<FlowFile, Integer> numRecords = new HashMap<>();
- final Map<FlowFile, Object> flowFileFailures = new HashMap<>();
- final Map<Operation, FlowFile> operationFlowFileMap = new HashMap<>();
+ if (isRollbackOnFailure() && (!pendingRowErrors.isEmpty() ||
!flowFileFailures.isEmpty())) {
+ logFailures(pendingRowErrors, operationFlowFileMap);
+ session.rollback();
+ context.yield();
+ } else {
+ transferFlowFiles(flowFiles, processedRecords, flowFileFailures,
operationFlowFileMap, pendingRowErrors, session);
+ }
+ }
- int numBuffered = 0;
+ private void processFlowFiles(final List<FlowFile> flowFiles,
Review comment:
Maybe use a different name than processFlowFiles or change the name of
the above processFlowFiles?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]