All,
This week we lost 23.000 messages in a few days time on our production
Cluster running Artemis 2.26.0, see our settings below. We've reverted
back to Artemis 2.20.0 just in case
A few observatoins:
* In version 2.24.0, 2.25.0 and 2.26.0 running on ZGC we noticed
messages being produced to a queue without errors, that we didn't
find in that queue. At the same time we saw incorrect counters. We
did restart nodes to resolve, but on one occasion the error
continued for some time after that, and we never found the messages
again. Not even when exporting the journal files. The errors showed
after running a few days
* In version 2.20.0 running on G1GC and on ZGC we did not lose any
messages. We did experience memory issues resulting in (to) long
garbage collection times every other week, maybe due to lack of JVM
tuning on our side. We were running 2.20 on G1GC for serveral months
We're running a symetric Cluser of 3 live/backup pairs in Docker JRE
(temurin) containers on VMWare CentOS7 hosts. Each live node has around
1.000 producers & consumers continuously.
I hope the Artemis community can advise us in this?
Best Regards,
Walter
Our setup:
*
**docker-compose.yaml**
*
version: "3.8"
services:
artemis:
container_name: 'artemis'
network_mode: "host"
image: "cdplatform/activemq-artemis:2.26.0"
restart: 'always'
hostname: cjiblx8408.ato.cjib.minjus.nl
volumes:
- "/data/artemis/data:/var/lib/artemis/data"
- "/data/artemis/plugins:/var/lib/artemis/lib"
- "/data/artemis/etc:/var/lib/artemis/etc"
- "/data/artemis/etc-override:/var/lib/artemis/etc-override"
- "/logging/artemis:/var/lib/artemis/log"
environment:
ARTEMIS_MIN_MEMORY: "14051615047"
ARTEMIS_MAX_MEMORY: "14051615047"
JAVA_XTRA_ARGS: "-XX:ActiveProcessorCount=4 -XX:+UseZGC
-XX:+UseDynamicNumberOfGCThreads -XX:+UseStringDeduplication "
BROKER_SETTINGS_FILE: "broker-settings.xml"
ENABLE_JMX: "true"
JMX_PORT: "3333"
ENABLE_JMX_EXPORTER: "true"
JMX_RMI_PORT: "1098"
mem_swappiness: 0
memswap_limit: 20073735782
deploy:
resources:
limits:
memory: "20073735782"
reservations:
memory: "20073735782"
*Command line options:*
/opt/java/openjdk/bin/java
-javaagent:/opt/jmx-exporter/jmx_prometheus_javaagent.jar=9404:/opt/jmx-exporter/etc/jmx-exporter-config.yaml
-Xmx17564518809
-Xms17564518809
-Dcom.sun.management.jmxremote.authenticate=true
-Dcom.sun.management.jmxremote.password.file=/var/lib/artemis/etc/jmxremote.password
-Dcom.sun.management.jmxremote.access.file=/var/lib/artemis/etc/jmxremote.access
-Dcom.sun.management.jmxremote.port=3333
-Dcom.sun.management.jmxremote.rmi.port=1098
-Dcom.sun.management.jmxremote.ssl=false
-Djava.net.preferIPv4Addresses=true
-Djava.net.preferIPv4Stack=true
-XX:ActiveProcessorCount=4
-XX:+UseZGC
-XX:+UseDynamicNumberOfGCThreads
-XX:+UseStringDeduplication
-Dhawtio.realm=activemq
-Dhawtio.offline=true
-Dhawtio.role=gs-auth-Artemis_Admin,gs-auth-Artemis_User
-DPrincipalClasses=org.apache.activemq.artemis.spi.core.security.jaas.RolePrincipal
-Djolokia.policyLocation=file:/var/lib/artemis/etc/jolokia-access.xml
-Dcom.sun.management.jmxremote.ssl=false
-Xbootclasspath/a:/var/lib/artemis/lib/javax.json-1.1.4.jar
-Dhawtio.role=gs-auth-Artemis_Admin,gs-auth-Artemis_User
-Xbootclasspath/a:/opt/apache-artemis/lib/jboss-logmanager-2.1.18.Final.jar:/opt/apache-artemis/lib/wildfly-common-1.5.2.Final.jar:/opt/apache-artemis/lib/javax.json-1.1.4.jar
-Djava.security.auth.login.config=/var/lib/artemis/etc/login.config
-classpath /opt/apache-artemis/lib/artemis-boot.jar
-Dartemis.home=/opt/apache-artemis
-Dartemis.instance=/var/lib/artemis
-Djava.library.path=/opt/apache-artemis/bin/lib/linux-x86_64
-Djava.io.tmpdir=/var/lib/artemis/tmp
-Ddata.dir=/var/lib/artemis/data
-Dartemis.instance.etc=/var/lib/artemis/etc
-Djava.util.logging.manager=org.jboss.logmanager.LogManager
-Dlogging.configuration=file:/var/lib/artemis/etc//logging.properties
-Dartemis.default.sensitive.string.codec.key=
org.apache.activemq.artemis.boot.Artemis
run
*broker-settings.xml**:*
<core xmlns="urn:activemq:core">
<global-max-size>2810323009</global-max-size>
<name>xxxxxxx.xxxxxx.xx</name>
<graceful-shutdown-enabled
xmlns="urn:activemq:core">true</graceful-shutdown-enabled>
<graceful-shutdown-timeout
xmlns="urn:activemq:core">10000</graceful-shutdown-timeout>
<management-address
xmlns="urn:activemq:core">activemq.management</management-address>
<persistence-enabled
xmlns="urn:activemq:core">true</persistence-enabled>
<id-cache-size xmlns="urn:activemq:core">20000</id-cache-size>
<persist-id-cache xmlns="urn:activemq:core">true</persist-id-cache>
<paging-directory
xmlns="urn:activemq:core">data/paging</paging-directory>
<bindings-directory
xmlns="urn:activemq:core">data/bindings</bindings-directory>
<large-messages-directory
xmlns="urn:activemq:core">data/large-messages</large-messages-directory>
<journal-directory
xmlns="urn:activemq:core">data/journal</journal-directory>
<journal-type xmlns="urn:activemq:core">ASYNCIO</journal-type>
<journal-datasync xmlns="urn:activemq:core">true</journal-datasync>
<journal-min-files xmlns="urn:activemq:core">2</journal-min-files>
<journal-pool-files xmlns="urn:activemq:core">10</journal-pool-files>
<journal-device-block-size
xmlns="urn:activemq:core">4096</journal-device-block-size>
<journal-file-size xmlns="urn:activemq:core">10MB</journal-file-size>
<journal-buffer-size
xmlns="urn:activemq:core">490KB</journal-buffer-size>
<journal-compact-min-files
xmlns="urn:activemq:core">10</journal-compact-min-files>
<journal-compact-percentage
xmlns="urn:activemq:core">30</journal-compact-percentage>
<journal-lock-acquisition-timeout
xmlns="urn:activemq:core">-1</journal-lock-acquisition-timeout>
<journal-file-open-timeout
xmlns="urn:activemq:core">5</journal-file-open-timeout>
<journal-sync-non-transactional
xmlns="urn:activemq:core">true</journal-sync-non-transactional>
<journal-sync-transactional
xmlns="urn:activemq:core">true</journal-sync-transactional>
<disk-scan-period xmlns="urn:activemq:core">5000</disk-scan-period>
<max-disk-usage xmlns="urn:activemq:core">90</max-disk-usage>
<critical-analyzer xmlns="urn:activemq:core">true</critical-analyzer>
<critical-analyzer-timeout
xmlns="urn:activemq:core">120000</critical-analyzer-timeout>
<critical-analyzer-check-period
xmlns="urn:activemq:core">60000</critical-analyzer-check-period>
<critical-analyzer-policy
xmlns="urn:activemq:core">LOG</critical-analyzer-policy>
<page-sync-timeout
xmlns="urn:activemq:core">548000</page-sync-timeout>
<acceptors xmlns="urn:activemq:core">
<acceptor
name="artemis">tcp://0.0.0.0:61616?tcpSendBufferSize=1048576;tcpReceiveBufferSize=1048576;amqpMinLargeMessageSize=102400;connectionsAllowed=1536;directDeliver=false;useEpoll=true;amqpCredits=1000;amqpLowCredits=300;amqpDuplicateDetection=true;protocols=CORE,AMQP,STOMP,HORNETQ,OPENWIRE;</acceptor>
</acceptors>
<connectors xmlns="urn:activemq:core">
<connector name="artemis">tcp://xxxxxxx.xxxxxx.xx:61616</connector>
</connectors>
<cluster-user xmlns="urn:activemq:core">artemis</cluster-user>
<cluster-password
xmlns="urn:activemq:core">xxxxxxxx</cluster-password>
<broadcast-groups xmlns="urn:activemq:core">
<broadcast-group name="bg-group1">
<group-address>231.7.7.10</group-address>
<group-port>9876</group-port>
<broadcast-period>5000</broadcast-period>
<connector-ref>artemis</connector-ref>
</broadcast-group>
</broadcast-groups>
<discovery-groups xmlns="urn:activemq:core">
<discovery-group name="dg-group1">
<group-address>231.7.7.10</group-address>
<group-port>9876</group-port>
<refresh-timeout>10000</refresh-timeout>
</discovery-group>
</discovery-groups>
<cluster-connections xmlns="urn:activemq:core">
<cluster-connection name="artemis-ato">
<connector-ref>artemis</connector-ref>
<retry-interval>2000</retry-interval>
<initial-connect-attempts>1000</initial-connect-attempts>
<reconnect-attempts>1000</reconnect-attempts>
<message-load-balancing>ON_DEMAND</message-load-balancing>
<max-hops>1</max-hops>
<discovery-group-ref discovery-group-name="dg-group1"/>
</cluster-connection>
</cluster-connections>
<ha-policy xmlns="urn:activemq:core">
<replication>
<master>
<check-for-live-server>true</check-for-live-server>
<vote-on-replication-failure>true</vote-on-replication-failure>
<group-name>ato-hapair-1</group-name>
</master>
</replication>
</ha-policy>
<metrics xmlns="urn:activemq:core">
<jvm-memory>true</jvm-memory>
<jvm-gc>true</jvm-gc>
<jvm-threads>true</jvm-threads>
<plugin
class-name="org.apache.activemq.artemis.core.server.metrics.plugins.ArtemisPrometheusMetricsPlugin"/>
</metrics>
<security-settings xmlns="urn:activemq:core">
<security-setting match="activemq.management">
<permission type="manage" roles="amq,service"/>
</security-setting>
<security-setting match="#">
<permission type="manage" roles="amq,service"/>
<permission type="send" roles="amq,service,b2bi"/>
<permission type="consume" roles="amq,service,b2bi"/>
<permission type="browse" roles="amq,service"/>
<permission type="createAddress" roles="amq,service"/>
<permission type="deleteAddress" roles="amq,service"/>
<permission type="createDurableQueue" roles="amq,service"/>
<permission type="deleteDurableQueue" roles="amq,service"/>
<permission type="createNonDurableQueue" roles="amq,service"/>
<permission type="deleteNonDurableQueue" roles="amq,service"/>
</security-setting>
<role-mapping from="gs-auth-Artemis_Admin" to="amq"/>
<role-mapping from="gs-auth-Artemis_User" to="service"/>
</security-settings>
<address-settings xmlns="urn:activemq:core">
<address-setting match="activemq.management#">
<dead-letter-address>DLQ</dead-letter-address>
<expiry-address>ExpiryQueue</expiry-address>
<redelivery-delay>0</redelivery-delay>
<message-counter-history-day-limit>10</message-counter-history-day-limit>
<max-size-bytes>-1</max-size-bytes>
<max-size-messages>-1</max-size-messages>
<address-full-policy>PAGE</address-full-policy>
<auto-create-queues>true</auto-create-queues>
<auto-create-addresses>true</auto-create-addresses>
<auto-create-jms-queues>true</auto-create-jms-queues>
<auto-create-jms-topics>true</auto-create-jms-topics>
</address-setting>
<address-setting match="#">
<dead-letter-address>DLQ</dead-letter-address>
<expiry-address>ExpiryQueue</expiry-address>
<redelivery-delay>0</redelivery-delay>
<message-counter-history-day-limit>10</message-counter-history-day-limit>
<max-size-bytes>-1</max-size-bytes>
<max-size-messages>-1</max-size-messages>
<address-full-policy>PAGE</address-full-policy>
<auto-create-queues>true</auto-create-queues>
<auto-create-addresses>true</auto-create-addresses>
<auto-create-jms-queues>true</auto-create-jms-queues>
<auto-create-jms-topics>true</auto-create-jms-topics>
</address-setting>
<address-setting match="jms.#">
<dead-letter-address>DLQ</dead-letter-address>
<expiry-address>ExpiryQueue</expiry-address>
<max-delivery-attempts>5</max-delivery-attempts>
<redelivery-delay>500</redelivery-delay>
<redelivery-delay-multiplier>1.5</redelivery-delay-multiplier>
<redelivery-collision-avoidance-factor>0.5</redelivery-collision-avoidance-factor>
<redistribution-delay>30000</redistribution-delay>
<send-to-dla-on-no-route>true</send-to-dla-on-no-route>
<max-size-bytes>-1</max-size-bytes>
<max-size-messages>-1</max-size-messages>
<address-full-policy>PAGE</address-full-policy>
<message-counter-history-day-limit>10</message-counter-history-day-limit>
<auto-create-queues>false</auto-create-queues>
<auto-delete-queues>false</auto-delete-queues>
<auto-delete-created-queues>false</auto-delete-created-queues>
<auto-delete-queues-delay>30000</auto-delete-queues-delay>
<config-delete-queues>OFF</config-delete-queues>
<auto-create-addresses>false</auto-create-addresses>
<auto-delete-addresses>false</auto-delete-addresses>
<auto-delete-addresses-delay>30000</auto-delete-addresses-delay>
<config-delete-addresses>OFF</config-delete-addresses>
</address-setting>
<address-setting match="activemq.notifications">
<max-size-bytes>-1</max-size-bytes>
<max-size-messages>-1</max-size-messages>
<address-full-policy>PAGE</address-full-policy>
</address-setting>
<address-setting match="jms.queue.#">
<default-address-routing-type>ANYCAST</default-address-routing-type>
<default-queue-routing-type>ANYCAST</default-queue-routing-type>
</address-setting>
<address-setting match="jms.topic.#">
<default-address-routing-type>MULTICAST</default-address-routing-type>
<default-queue-routing-type>MULTICAST</default-queue-routing-type>
</address-setting>
</address-settings>
<addresses xmlns="urn:activemq:core">
<address name="DLQ">
<anycast>
<queue name="DLQ"/>
</anycast>
</address>
<address name="ExpiryQueue">
<anycast>
<queue name="ExpiryQueue"/>
</anycast>
</address>
</addresses>
<broker-plugins xmlns="urn:activemq:core">
<broker-plugin
class-name="org.apache.activemq.artemis.core.server.plugin.impl.LoggingActiveMQServerPlugin">
<property key="LOG_ALL_EVENTS" value="false"/>
<property key="LOG_CONNECTION_EVENTS" value="false"/>
<property key="LOG_SESSION_EVENTS" value="false"/>
<property key="LOG_CONSUMER_EVENTS" value="false"/>
<property key="LOG_DELIVERING_EVENTS" value="false"/>
<property key="LOG_SENDING_EVENTS" value="false"/>
<property key="LOG_INTERNAL_EVENTS" value="false"/>
</broker-plugin>
</broker-plugins>
</core>