YanshuoH commented on issue #12716:
URL: https://github.com/apache/pulsar/issues/12716#issuecomment-973693802
Sure.
A bit strange (annoying) thing is that I've enabled lead detection up to
paranoid, but didn't find any leak report.
<details>
<summary>Broker Conf</summary>
```
apiVersion: v1
kind: ConfigMap
metadata:
name: pulsar-broker
namespace: pulsar
labels:
app: pulsar
release: 2.6.1
cluster: 2.6.1-pulsar
component: broker
data:
# Metadata settings
zookeeperServers: "zookeeper.pulsar:2181"
configurationStoreServers: "zookeeper.pulsar:2181"
# Broker settings
clusterName: 2.6.1-pulsar
exposeTopicLevelMetricsInPrometheus: "false"
exposeConsumerLevelMetricsInPrometheus: "false"
numHttpServerThreads: "8"
zooKeeperSessionTimeoutMillis: "30000"
statusFilePath: "/pulsar/status"
defaultRetentionTimeInMinutes: "10080" # 7 days
defaultRetentionSizeInMB: "1000" # 5Gi
brokerDeleteInactiveTopicsEnabled: "false"
brokerDeduplicationEnabled: "false"
subscriptionExpirationTimeMinutes: "60"
# topic auto creation: this is crucial for geo-replication.
# partitioning must be the same between clusters.
allowAutoTopicCreation: "false"
allowAutoTopicCreationType: "partitioned"
defaultNumPartitions: "1"
# bundles
defaultNumberOfNamespaceBundles: "8"
# disable bundle split to avoid trembling during peak time
loadBalancerAutoBundleSplitEnabled: "false"
loadBalancerAutoUnloadSplitBundlesEnabled: "true"
loadBalancerNamespaceBundleMaxTopics: "1000"
loadBalancerNamespaceBundleMaxSessions: "1000"
loadBalancerNamespaceBundleMaxMsgRate: "20000"
loadBalancerNamespaceMaximumBundles: "128"
# throttle
# Max memory size for broker handling messages sending from producers.
# If the processing message size exceed this value, broker will stop read
data
# from the connection. The processing messages means messages are sends to
broker
# but broker have not send response to client, usually waiting to write to
bookies.
# It's shared across all the topics running in the same broker.
# Use -1 to disable the memory limitation. Default is 1/2 of direct memory.
maxMessagePublishBufferSizeInMB: ""
# Interval between checks to see if message publish buffer size is exceed
the max message publish buffer size
# Use 0 or negative number to disable the max publish buffer limiting.
messagePublishBufferCheckIntervalInMillis: "100"
# shedding
loadBalancerSheddingEnabled: "true"
loadBalancerLoadSheddingStrategy:
"org.apache.pulsar.broker.loadbalance.impl.ThresholdShedder"
loadBalancerSheddingIntervalMinutes: "1"
loadBalancerSheddingGracePeriodMinutes: "10"
loadBalancerBrokerOverloadedThresholdPercentage: "50"
# The broker resource usage threshold.
# When the broker resource usage is greater than the pulsar cluster
average resource usage,
# the threshold shredder will be triggered to offload bundles from the
broker.
# It only takes effect in ThresholdSheddler strategy.
loadBalancerBrokerThresholdShedderPercentage: "10"
# When calculating new resource usage, the history usage accounts for.
# It only takes effect in ThresholdSheddler strategy.
loadBalancerHistoryResourcePercentage: "0.9"
# The BandWithIn usage weight when calculating new resource usage.
# It only takes effect in ThresholdShedder strategy.
loadBalancerBandwithInResourceWeight: "1.0"
# The BandWithOut usage weight when calculating new resource usage.
# It only takes effect in ThresholdShedder strategy.
loadBalancerBandwithOutResourceWeight: "1.0"
# The CPU usage weight when calculating new resource usage.
# It only takes effect in ThresholdShedder strategy.
loadBalancerCPUResourceWeight: "1.0"
# The heap memory usage weight when calculating new resource usage.
# It only takes effect in ThresholdShedder strategy.
loadBalancerMemoryResourceWeight: "1.0"
# The direct memory usage weight when calculating new resource usage.
# It only takes effect in ThresholdShedder strategy.
loadBalancerDirectMemoryResourceWeight: "1.0"
# Bundle unload minimum throughput threshold (MB), avoiding bundle unload
frequently.
# It only takes effect in ThresholdShedder strategy.
loadBalancerBundleUnloadMinThroughputThreshold: "10"
# Consumer
# Precise dispatcher flow control according to history message number of
each entry
preciseDispatcherFlowControl: "true"
# Authorization & Authentication
authenticationEnabled: "true"
authenticationProviders:
"org.apache.pulsar.broker.authentication.AuthenticationProviderToken"
authorizationEnabled: "true"
authorizationProvider:
"org.apache.pulsar.broker.authorization.PulsarAuthorizationProvider"
superUserRoles: "admin,ops,pulsar"
proxyRoles: "pulsar-proxy"
# This is not very important. Traffic will come from pulsar-proxy.
# Enable this allows us to enable rbac on authorization.
tokenSecretKey: "data:;base64,redacted" # redacted
brokerClientAuthenticationPlugin:
"org.apache.pulsar.client.impl.auth.AuthenticationToken"
brokerClientAuthenticationParameters: "token:redacted"
# Ledger offload
managedLedgerOffloadDriver: "aws-s3"
# this is the actual bucket
s3ManagedLedgerOffloadServiceEndpoint:
"https://pulsar-cluster.s3.cn-northwest-1.amazonaws.com.cn"
# this is the directory
s3ManagedLedgerOffloadBucket: "pulsar-primary"
s3ManagedLedgerOffloadRegion: "cn-northwest-1"
# s3ManagedLedgerOffloadReadBufferSizeInBytes: "1000000" # 1 Mb by default
# s3ManagedLedgerOffloadMaxBlockSizeInBytes: "64000000" # 64 Mb by default
# currently the WebIdentity is not working
# s3ManagedLedgerOffloadRole:
"arn:aws-cn:iam::651844176281:role/pulsar-broker-sa"
# s3ManagedLedgerOffloadRoleSessionName: "pulsar-s3-offload"
managedLedgerOffloadAutoTriggerSizeThresholdBytes: "10000000" # 10 Mi
# managedLedgerOffloadAutoTriggerSizeThresholdBytes: "1000000000" # 1 Gi
# Function Worker Settings
# function worker configuration
functionsWorkerEnabled: "false"
PF_functionRuntimeFactoryClassName:
"org.apache.pulsar.functions.runtime.kubernetes.KubernetesRuntimeFactory"
PF_pulsarFunctionsCluster: 2.6.1-pulsar
PF_connectorsDirectory: ./connectors
PF_containerFactory: k8s
PF_numFunctionPackageReplicas: "2"
# support version >= 2.5.0
PF_functionRuntimeFactoryConfigs_pulsarRootDir: /pulsar
PF_kubernetesContainerFactory_pulsarRootDir: /pulsar
PF_functionRuntimeFactoryConfigs_pulsarDockerImageName:
"651844176281.dkr.ecr.cn-northwest-1.amazonaws.com.cn/apachepulsar/pulsar:2.8.1"
PF_functionRuntimeFactoryConfigs_submittingInsidePod: "true"
PF_functionRuntimeFactoryConfigs_installUserCodeDependencies: "true"
PF_functionRuntimeFactoryConfigs_jobNamespace: pulsar
PF_functionRuntimeFactoryConfigs_expectedMetricsCollectionInterval: "30"
PF_functionRuntimeFactoryConfigs_pulsarAdminUrl:
"http://pulsar-broker:8080/"
PF_functionRuntimeFactoryConfigs_pulsarServiceUrl:
"pulsar://pulsar-broker:6650/"
PF_functionRuntimeFactoryConfigs_changeConfigMap: "pulsar-functions-worker"
PF_functionRuntimeFactoryConfigs_changeConfigMapNamespace: pulsar
# # support version < 2.5.0
PF_kubernetesContainerFactory_pulsarDockerImageName:
"651844176281.dkr.ecr.cn-northwest-1.amazonaws.com.cn/apachepulsar/pulsar:2.8.1"
PF_kubernetesContainerFactory_submittingInsidePod: "true"
PF_kubernetesContainerFactory_installUserCodeDependencies: "true"
PF_kubernetesContainerFactory_jobNamespace: pulsar
PF_kubernetesContainerFactory_expectedMetricsCollectionInterval: "30"
PF_kubernetesContainerFactory_pulsarAdminUrl: "http://pulsar-broker:8080/"
PF_kubernetesContainerFactory_pulsarServiceUrl:
"pulsar://pulsar-broker:6650/"
PF_kubernetesContainerFactory_changeConfigMap: "pulsar-functions-worker"
PF_kubernetesContainerFactory_changeConfigMapNamespace: pulsar
# prometheus needs to access /metrics endpoint
webServicePort: "8080"
brokerServicePort: "6650"
# Java Env
PULSAR_GC: >
-XX:+UseG1GC
-XX:MaxGCPauseMillis=10
-XX:+ParallelRefProcEnabled
-XX:+UnlockExperimentalVMOptions
-XX:+DoEscapeAnalysis
-XX:ParallelGCThreads=4
-XX:ConcGCThreads=4
-XX:G1NewSizePercent=50
-XX:+DisableExplicitGC
-XX:-ResizePLAB
-XX:+ExitOnOutOfMemoryError
-XX:+PerfDisableSharedMem
PULSAR_MEM: |
-Xms512m -Xmx2048m -XX:MaxDirectMemorySize=3072m
PULSAR_EXTRA_OPTS: >
-Dio.netty.leakDetectionLevel=paranoid
-Dio.netty.recycler.linkCapacity=1024
-Dio.netty.leakDetection.targetRecords=100
-XX:+HeapDumpOnOutOfMemoryError
-XX:NativeMemoryTracking=detail
-Dpulsar.allocator.leak_detection=Paranoid
-XX:+UnlockDiagnosticVMOptions
# Ledger Quorum
managedLedgerDefaultAckQuorum: "2"
managedLedgerDefaultEnsembleSize: "2"
managedLedgerDefaultWriteQuorum: "2"
# quotas
backlogQuotaCheckEnabled: "false"
backlogQuotaDefaultRetentionPolicy: "consumer_backlog_eviction"
```
</details>
<details>
<summary>Bookie Conf</summary>
```
apiVersion: v1
kind: ConfigMap
metadata:
name: bookie
namespace: pulsar
labels:
app: pulsar
release: 2.6.3
component: bookie
cluster: 2.6.1-pulsar
data:
# common config
zkServers: "zookeeper.pulsar:2181"
zkLedgersRootPath: "/ledgers"
# enable bookkeeper http server
httpServerEnabled: "true"
httpServerPort: "8000"
# config the stats provider
statsProviderClass:
org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider
# use hostname as the bookie id
useHostNameAsBookieID: "true"
# disable auto recovery on bookies since we will start AutoRecovery in
separated pods
autoRecoveryDaemonEnabled: "false"
# Do not retain journal files as it increase the disk utilization
journalMaxBackups: "0"
journalDirectories: "/pulsar/data/bookkeeper/journal"
PULSAR_PREFIX_journalDirectories: "/pulsar/data/bookkeeper/journal"
ledgerDirectories: "/pulsar/data/bookkeeper/ledgers"
allowStorageExpansion: "true"
BOOKIE_GC: >
-XX:+UseG1GC
-XX:MaxGCPauseMillis=10
-XX:+ParallelRefProcEnabled
-XX:+UnlockExperimentalVMOptions
-XX:+DoEscapeAnalysis
-XX:ParallelGCThreads=4
-XX:ConcGCThreads=4
-XX:G1NewSizePercent=50
-XX:+DisableExplicitGC
-XX:-ResizePLAB
-XX:+ExitOnOutOfMemoryError
-XX:+PerfDisableSharedMem
-XX:+PrintGCDetails
-verbosegc
-Xlog:gc:/var/log/bookie-gc.log
BOOKIE_MEM: >
-Xms512m -Xmx2048m -XX:MaxDirectMemorySize=1024m
# db storage
# Used to control the maximum entry read-ahead cache size.
# default: 25% of the total direct memory
dbStorage_readAheadCacheMaxSizeMb: ""
# Used to specify the size of the RocksDB block-cache.
# default: 10% of the total direct memory size
dbStorage_rocksDB_blockCacheSize: ""
dbStorage_rocksDB_writeBufferSizeMB: "64"
# Used to specify the max size of the write cache (in MB)
# default: 25% of the total direct memory
dbStorage_writeCacheMaxSizeMb: ""
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]