Zookeeper is from the docker hub zookeeper:3.5.7 image.

Below is our nifi.properties (with secrets and hostnames modified).

thanks!
 - Wyllys



nifi.flow.configuration.file=/opt/nifi/nifi-current/latest_flow/nifi-0/flow.xml.gz

nifi.flow.configuration.archive.enabled=true

nifi.flow.configuration.archive.dir=/opt/nifi/nifi-current/archives

nifi.flow.configuration.archive.max.time=30 days

nifi.flow.configuration.archive.max.storage=500 MB

nifi.flow.configuration.archive.max.count=

nifi.flowcontroller.autoResumeState=false

nifi.flowcontroller.graceful.shutdown.period=10 sec

nifi.flowservice.writedelay.interval=500 ms

nifi.administrative.yield.duration=30 sec


nifi.bored.yield.duration=10 millis

nifi.queue.backpressure.count=10000

nifi.queue.backpressure.size=1 GB


nifi.authorizer.configuration.file=./conf/authorizers.xml

nifi.login.identity.provider.configuration.file=./conf/login-identity-providers.xml

nifi.templates.directory=/opt/nifi/nifi-current/templates

nifi.ui.banner.text=KI Nifi Cluster

nifi.ui.autorefresh.interval=30 sec

nifi.nar.library.directory=./lib

nifi.nar.library.autoload.directory=./extensions

nifi.nar.working.directory=./work/nar/

nifi.documentation.working.directory=./work/docs/components


nifi.state.management.configuration.file=./conf/state-management.xml

nifi.state.management.provider.local=local-provider

nifi.state.management.provider.cluster=zk-provider

nifi.state.management.embedded.zookeeper.start=false

nifi.state.management.embedded.zookeeper.properties=./conf/zookeeper.properties


nifi.database.directory=./database_repository

nifi.h2.url.append=;LOCK_TIMEOUT=25000;WRITE_DELAY=0;AUTO_SERVER=FALSE


nifi.flowfile.repository.implementation=org.apache.nifi.controller.repository.WriteAheadFlowFileRepository

nifi.flowfile.repository.wal.implementation=org.apache.nifi.wali.SequentialAccessWriteAheadLog

nifi.flowfile.repository.directory=./flowfile_repository

nifi.flowfile.repository.partitions=256

nifi.flowfile.repository.checkpoint.interval=2 mins

nifi.flowfile.repository.always.sync=false

nifi.flowfile.repository.encryption.key.provider.implementation=

nifi.flowfile.repository.encryption.key.provider.location=

nifi.flowfile.repository.encryption.key.id=

nifi.flowfile.repository.encryption.key=


nifi.swap.manager.implementation=org.apache.nifi.controller.FileSystemSwapManager

nifi.queue.swap.threshold=20000

nifi.swap.in.period=5 sec

nifi.swap.in.threads=1

nifi.swap.out.period=5 sec

nifi.swap.out.threads=4


nifi.content.repository.implementation=org.apache.nifi.controller.repository.FileSystemRepository

nifi.content.claim.max.appendable.size=1 MB

nifi.content.claim.max.flow.files=100

nifi.content.repository.directory.default=./content_repository

nifi.content.repository.archive.max.retention.period=12 hours

nifi.content.repository.archive.max.usage.percentage=50%

nifi.content.repository.archive.enabled=true

nifi.content.repository.always.sync=false

nifi.content.viewer.url=../nifi-content-viewer/

nifi.content.repository.encryption.key.provider.implementation=

nifi.content.repository.encryption.key.provider.location=

nifi.content.repository.encryption.key.id=

nifi.content.repository.encryption.key=


nifi.provenance.repository.implementation=org.apache.nifi.provenance.WriteAheadProvenanceRepository

nifi.provenance.repository.debug.frequency=1_000_000

nifi.provenance.repository.encryption.key.provider.implementation=

nifi.provenance.repository.encryption.key.provider.location=

nifi.provenance.repository.encryption.key.id=

nifi.provenance.repository.encryption.key=


nifi.provenance.repository.directory.default=./provenance_repository

nifi.provenance.repository.max.storage.time=7 days

nifi.provenance.repository.max.storage.size=100 GB

nifi.provenance.repository.rollover.time=120 secs

nifi.provenance.repository.rollover.size=100 MB

nifi.provenance.repository.query.threads=2

nifi.provenance.repository.index.threads=2

nifi.provenance.repository.compress.on.rollover=true

nifi.provenance.repository.always.sync=false

nifi.provenance.repository.indexed.fields=EventType, FlowFileUUID, Filename, 
ProcessorID, Relationship

nifi.provenance.repository.indexed.attributes=

nifi.provenance.repository.index.shard.size=4 GB

nifi.provenance.repository.max.attribute.length=65536

nifi.provenance.repository.concurrent.merge.threads=2

nifi.provenance.repository.buffer.size=100000


nifi.components.status.repository.implementation=org.apache.nifi.controller.status.history.VolatileComponentStatusRepository

nifi.components.status.repository.buffer.size=1440

nifi.components.status.snapshot.frequency=1 min


nifi.remote.input.host=nifi-0.nifi.ki.svc.cluster.local

nifi.remote.input.secure=true

nifi.remote.input.socket.port=10000

nifi.remote.input.http.enabled=true

nifi.remote.input.http.transaction.ttl=30 sec

nifi.remote.contents.cache.expiration=30 secs


nifi.web.war.directory=./lib

nifi.web.http.host=

nifi.web.http.port=

nifi.web.http.network.interface.default=

nifi.web.https.host=nifi-0.nifi.ki.svc.cluster.local

nifi.web.https.port=8080

nifi.web.https.network.interface.default=

nifi.web.jetty.working.directory=./work/jetty

nifi.web.jetty.threads=200

nifi.web.max.header.size=16 KB

nifi.web.proxy.context.path=/nifi-api,/nifi

nifi.web.proxy.host=ingress.ourdomain.com


nifi.sensitive.props.key=

nifi.sensitive.props.key.protected=

nifi.sensitive.props.algorithm=PBEWITHMD5AND256BITAES-CBC-OPENSSL

nifi.sensitive.props.provider=BC

nifi.sensitive.props.additional.keys=


nifi.security.keystore=/opt/nifi/nifi-current/security/nifi-0.keystore.jks

nifi.security.keystoreType=jks

nifi.security.keystorePasswd=XXXXXXXXXXXXXXXX

nifi.security.keyPasswd=XXXXXXXXXXXXXXXXX

nifi.security.truststore=/opt/nifi/nifi-current/security/nifi-0.truststore.jks

nifi.security.truststoreType=jks

nifi.security.truststorePasswd=XXXXXXXXXXXXXXXXXXXXXXXXXXX

nifi.security.user.authorizer=managed-authorizer

nifi.security.user.login.identity.provider=

nifi.security.ocsp.responder.url=

nifi.security.ocsp.responder.certificate=


nifi.security.user.oidc.discovery.url=https://keycloak-server-address/auth/realms/Test/.well-known/openid-configuration

nifi.security.user.oidc.connect.timeout=15 secs

nifi.security.user.oidc.read.timeout=15 secs

nifi.security.user.oidc.client.id=nifi

nifi.security.user.oidc.client.secret=XXXXXXXXXXXXXXXXXXXXX

nifi.security.user.oidc.preferred.jwsalgorithm=RS512

nifi.security.user.oidc.additional.scopes=

nifi.security.user.oidc.claim.identifying.user=


nifi.security.user.knox.url=

nifi.security.user.knox.publicKey=

nifi.security.user.knox.cookieName=hadoop-jwt

nifi.security.user.knox.audiences=


nifi.cluster.protocol.heartbeat.interval=30 secs

nifi.cluster.protocol.is.secure=true


nifi.cluster.is.node=true

nifi.cluster.node.address=nifi-0.nifi.ki.svc.cluster.local

nifi.cluster.node.protocol.port=2882

nifi.cluster.node.protocol.threads=40

nifi.cluster.node.protocol.max.threads=50

nifi.cluster.node.event.history.size=25

nifi.cluster.node.connection.timeout=120 secs

nifi.cluster.node.read.timeout=120 secs

nifi.cluster.node.max.concurrent.requests=100

nifi.cluster.firewall.file=

nifi.cluster.flow.election.max.wait.time=5 mins

nifi.cluster.flow.election.max.candidates=


nifi.cluster.load.balance.host=nifi-0.nifi.ki.svc.cluster.local

nifi.cluster.load.balance.port=6342

nifi.cluster.load.balance.connections.per.node=4

nifi.cluster.load.balance.max.thread.count=8

nifi.cluster.load.balance.comms.timeout=30 sec


nifi.zookeeper.connect.string=zk-0.zk-hs.ki.svc.cluster.local:2181,zk-1.zk-hs.ki.svc.cluster.local:2181,zk-2.zk-hs.ki.svc.cluster.local:2181

nifi.zookeeper.connect.timeout=30 secs

nifi.zookeeper.session.timeout=30 secs

nifi.zookeeper.root.node=/nifi

nifi.zookeeper.auth.type=

nifi.zookeeper.kerberos.removeHostFromPrincipal=

nifi.zookeeper.kerberos.removeRealmFromPrincipal=


nifi.kerberos.krb5.file=


nifi.kerberos.service.principal=

nifi.kerberos.service.keytab.location=


nifi.kerberos.spnego.principal=

nifi.kerberos.spnego.keytab.location=

nifi.kerberos.spnego.authentication.expiration=12 hours


nifi.variable.registry.properties=


nifi.analytics.predict.enabled=false

nifi.analytics.predict.interval=3 mins

nifi.analytics.query.interval=5 mins

nifi.analytics.connection.model.implementation=org.apache.nifi.controller.status.analytics.models.OrdinaryLeastSquares

nifi.analytics.connection.model.score.name=rSquared

nifi.analytics.connection.model.score.threshold=.90

________________________________
From: Chris Sampson <chris.samp...@naimuri.com>
Sent: Tuesday, September 29, 2020 12:41 PM
To: users@nifi.apache.org <users@nifi.apache.org>
Subject: Re: Clustered nifi issues

Also, which version of zookeeper and what image (I've found different versions 
and images provided better stability)?


Cheers,

Chris Sampson

On Tue, 29 Sep 2020, 17:34 Sushil Kumar, 
<skm....@gmail.com<mailto:skm....@gmail.com>> wrote:
Hello Wyll

It may be helpful if you can send nifi.properties.

Thanks
Sushil Kumar

On Tue, Sep 29, 2020 at 7:58 AM Wyll Ingersoll 
<wyllys.ingers...@keepertech.com<mailto:wyllys.ingers...@keepertech.com>> wrote:

I have a 3-node Nifi (1.11.4) cluster in kubernetes environment (as a 
StatefulSet) using external zookeeper (3 nodes also) to manage state.

Whenever even 1 node (pod/container) goes down or is restarted, it can throw 
the whole cluster into a bad state that forces me to restart ALL of the pods in 
order to recover.  This seems wrong.  The problem seems to be that when the 
primary node goes away, the remaining 2 nodes don't ever try to take over.  
Instead, I have restart all of them individually until one of them becomes the 
primary, then the other 2 eventually join and sync up.

When one of the nodes is refusing to sync up, I often see these errors in the 
log and the only way to get it back into the cluster is to restart it.  The 
node showing the errors below never seems to be able to rejoin or resync with 
the other 2 nodes.



2020-09-29 10:18:53,324 ERROR [Reconnect to Cluster] 
o.a.nifi.controller.StandardFlowService Handling reconnection request failed 
due to: org.apache.nifi.cluster.ConnectionException: Failed to connect node to 
cluster due to: java.lang.NullPointerException

org.apache.nifi.cluster.ConnectionException: Failed to connect node to cluster 
due to: java.lang.NullPointerException

at 
org.apache.nifi.controller.StandardFlowService.loadFromConnectionResponse(StandardFlowService.java:1035)

at 
org.apache.nifi.controller.StandardFlowService.handleReconnectionRequest(StandardFlowService.java:668)

at 
org.apache.nifi.controller.StandardFlowService.access$200(StandardFlowService.java:109)

at 
org.apache.nifi.controller.StandardFlowService$1.run(StandardFlowService.java:415)

at java.lang.Thread.run(Thread.java:748)

Caused by: java.lang.NullPointerException: null

at 
org.apache.nifi.controller.StandardFlowService.loadFromConnectionResponse(StandardFlowService.java:989)

... 4 common frames omitted

2020-09-29 10:18:53,326 INFO [Reconnect to Cluster] 
o.a.c.f.imps.CuratorFrameworkImpl Starting

2020-09-29 10:18:53,327 INFO [Reconnect to Cluster] 
org.apache.zookeeper.ClientCnxnSocket jute.maxbuffer value is 4194304 Bytes

2020-09-29 10:18:53,328 INFO [Reconnect to Cluster] 
o.a.c.f.imps.CuratorFrameworkImpl Default schema

2020-09-29 10:18:53,807 INFO [Reconnect to Cluster-EventThread] 
o.a.c.f.state.ConnectionStateManager State change: CONNECTED

2020-09-29 10:18:53,809 INFO [Reconnect to Cluster-EventThread] 
o.a.c.framework.imps.EnsembleTracker New config event received: 
{server.1=zk-0.zk-hs.ki.svc.cluster.local:2888:3888:participant;0.0.0.0:2181<http://0.0.0.0:2181>,
 version=0, 
server.3=zk-2.zk-hs.ki.svc.cluster.local:2888:3888:participant;0.0.0.0:2181<http://0.0.0.0:2181>,
 
server.2=zk-1.zk-hs.ki.svc.cluster.local:2888:3888:participant;0.0.0.0:2181<http://0.0.0.0:2181>}

2020-09-29 10:18:53,810 INFO [Curator-Framework-0] 
o.a.c.f.imps.CuratorFrameworkImpl backgroundOperationsLoop exiting

2020-09-29 10:18:53,813 INFO [Reconnect to Cluster-EventThread] 
o.a.c.framework.imps.EnsembleTracker New config event received: 
{server.1=zk-0.zk-hs.ki.svc.cluster.local:2888:3888:participant;0.0.0.0:2181<http://0.0.0.0:2181>,
 version=0, 
server.3=zk-2.zk-hs.ki.svc.cluster.local:2888:3888:participant;0.0.0.0:2181<http://0.0.0.0:2181>,
 
server.2=zk-1.zk-hs.ki.svc.cluster.local:2888:3888:participant;0.0.0.0:2181<http://0.0.0.0:2181>}

2020-09-29 10:18:54,323 INFO [Reconnect to Cluster] 
o.a.n.c.l.e.CuratorLeaderElectionManager Cannot unregister Leader Election Role 
'Primary Node' becuase that role is not registered

2020-09-29 10:18:54,324 INFO [Reconnect to Cluster] 
o.a.n.c.l.e.CuratorLeaderElectionManager Cannot unregister Leader Election Role 
'Cluster Coordinator' becuase that role is not registered

Reply via email to