All,

This week we lost 23.000 messages in a few days time on our production Cluster running Artemis 2.26.0, see our settings below. We've reverted back to Artemis 2.20.0 just in case

A few observatoins:

 * In version 2.24.0, 2.25.0 and 2.26.0 running on ZGC we noticed
   messages being produced to a queue without errors, that we didn't
   find in that queue. At the same time we saw incorrect counters. We
   did restart nodes to resolve, but on one occasion the error
   continued for some time after that, and we never found the messages
   again. Not even when exporting the journal files. The errors showed
   after running a few days
 * In version 2.20.0 running on G1GC and on ZGC we did not lose any
   messages. We did experience memory issues resulting in (to) long
   garbage collection times every other week, maybe due to lack of JVM
   tuning on our side. We were running 2.20 on G1GC for serveral months

We're running a symetric Cluser of 3 live/backup pairs in Docker JRE (temurin) containers on VMWare CentOS7 hosts. Each live node has around 1.000 producers & consumers continuously.

I hope the Artemis community can advise us in this?

Best Regards,

Walter


Our setup:

*
**docker-compose.yaml**
*
     version: "3.8"

     services:
       artemis:
         container_name: 'artemis'
         network_mode: "host"
         image: "cdplatform/activemq-artemis:2.26.0"
         restart: 'always'
         hostname: cjiblx8408.ato.cjib.minjus.nl
         volumes:
           - "/data/artemis/data:/var/lib/artemis/data"
           - "/data/artemis/plugins:/var/lib/artemis/lib"
           - "/data/artemis/etc:/var/lib/artemis/etc"
           - "/data/artemis/etc-override:/var/lib/artemis/etc-override"
           - "/logging/artemis:/var/lib/artemis/log"
         environment:
           ARTEMIS_MIN_MEMORY:  "14051615047"
           ARTEMIS_MAX_MEMORY:  "14051615047"
           JAVA_XTRA_ARGS: "-XX:ActiveProcessorCount=4 -XX:+UseZGC -XX:+UseDynamicNumberOfGCThreads -XX:+UseStringDeduplication "
           BROKER_SETTINGS_FILE: "broker-settings.xml"
           ENABLE_JMX:          "true"
           JMX_PORT:            "3333"
           ENABLE_JMX_EXPORTER: "true"
           JMX_RMI_PORT:        "1098"
         mem_swappiness: 0
         memswap_limit: 20073735782
         deploy:
           resources:
             limits:
               memory: "20073735782"
             reservations:
               memory: "20073735782"

*Command line options:*

   /opt/java/openjdk/bin/java
   
-javaagent:/opt/jmx-exporter/jmx_prometheus_javaagent.jar=9404:/opt/jmx-exporter/etc/jmx-exporter-config.yaml
      -Xmx17564518809
      -Xms17564518809
      -Dcom.sun.management.jmxremote.authenticate=true
   
-Dcom.sun.management.jmxremote.password.file=/var/lib/artemis/etc/jmxremote.password
   
-Dcom.sun.management.jmxremote.access.file=/var/lib/artemis/etc/jmxremote.access
      -Dcom.sun.management.jmxremote.port=3333
      -Dcom.sun.management.jmxremote.rmi.port=1098
      -Dcom.sun.management.jmxremote.ssl=false
      -Djava.net.preferIPv4Addresses=true
      -Djava.net.preferIPv4Stack=true
      -XX:ActiveProcessorCount=4
      -XX:+UseZGC
      -XX:+UseDynamicNumberOfGCThreads
      -XX:+UseStringDeduplication
      -Dhawtio.realm=activemq
      -Dhawtio.offline=true
      -Dhawtio.role=gs-auth-Artemis_Admin,gs-auth-Artemis_User
   
-DPrincipalClasses=org.apache.activemq.artemis.spi.core.security.jaas.RolePrincipal
   -Djolokia.policyLocation=file:/var/lib/artemis/etc/jolokia-access.xml
      -Dcom.sun.management.jmxremote.ssl=false
      -Xbootclasspath/a:/var/lib/artemis/lib/javax.json-1.1.4.jar
      -Dhawtio.role=gs-auth-Artemis_Admin,gs-auth-Artemis_User
   
-Xbootclasspath/a:/opt/apache-artemis/lib/jboss-logmanager-2.1.18.Final.jar:/opt/apache-artemis/lib/wildfly-common-1.5.2.Final.jar:/opt/apache-artemis/lib/javax.json-1.1.4.jar
   -Djava.security.auth.login.config=/var/lib/artemis/etc/login.config
      -classpath /opt/apache-artemis/lib/artemis-boot.jar
      -Dartemis.home=/opt/apache-artemis
      -Dartemis.instance=/var/lib/artemis
      -Djava.library.path=/opt/apache-artemis/bin/lib/linux-x86_64
      -Djava.io.tmpdir=/var/lib/artemis/tmp
      -Ddata.dir=/var/lib/artemis/data
      -Dartemis.instance.etc=/var/lib/artemis/etc
      -Djava.util.logging.manager=org.jboss.logmanager.LogManager
   -Dlogging.configuration=file:/var/lib/artemis/etc//logging.properties
      -Dartemis.default.sensitive.string.codec.key=
   org.apache.activemq.artemis.boot.Artemis
      run

*broker-settings.xml**:*

   <core xmlns="urn:activemq:core">
      <global-max-size>2810323009</global-max-size>
      <name>xxxxxxx.xxxxxx.xx</name>
      <graceful-shutdown-enabled
   xmlns="urn:activemq:core">true</graceful-shutdown-enabled>
      <graceful-shutdown-timeout
   xmlns="urn:activemq:core">10000</graceful-shutdown-timeout>
      <management-address
   xmlns="urn:activemq:core">activemq.management</management-address>
      <persistence-enabled
   xmlns="urn:activemq:core">true</persistence-enabled>
      <id-cache-size xmlns="urn:activemq:core">20000</id-cache-size>
      <persist-id-cache xmlns="urn:activemq:core">true</persist-id-cache>
      <paging-directory
   xmlns="urn:activemq:core">data/paging</paging-directory>
      <bindings-directory
   xmlns="urn:activemq:core">data/bindings</bindings-directory>
      <large-messages-directory
   xmlns="urn:activemq:core">data/large-messages</large-messages-directory>
      <journal-directory
   xmlns="urn:activemq:core">data/journal</journal-directory>
      <journal-type xmlns="urn:activemq:core">ASYNCIO</journal-type>
      <journal-datasync xmlns="urn:activemq:core">true</journal-datasync>
      <journal-min-files xmlns="urn:activemq:core">2</journal-min-files>
      <journal-pool-files xmlns="urn:activemq:core">10</journal-pool-files>
      <journal-device-block-size
   xmlns="urn:activemq:core">4096</journal-device-block-size>
      <journal-file-size xmlns="urn:activemq:core">10MB</journal-file-size>
      <journal-buffer-size
   xmlns="urn:activemq:core">490KB</journal-buffer-size>
      <journal-compact-min-files
   xmlns="urn:activemq:core">10</journal-compact-min-files>
      <journal-compact-percentage
   xmlns="urn:activemq:core">30</journal-compact-percentage>
      <journal-lock-acquisition-timeout
   xmlns="urn:activemq:core">-1</journal-lock-acquisition-timeout>
      <journal-file-open-timeout
   xmlns="urn:activemq:core">5</journal-file-open-timeout>
      <journal-sync-non-transactional
   xmlns="urn:activemq:core">true</journal-sync-non-transactional>
      <journal-sync-transactional
   xmlns="urn:activemq:core">true</journal-sync-transactional>
      <disk-scan-period xmlns="urn:activemq:core">5000</disk-scan-period>
      <max-disk-usage xmlns="urn:activemq:core">90</max-disk-usage>
      <critical-analyzer xmlns="urn:activemq:core">true</critical-analyzer>
      <critical-analyzer-timeout
   xmlns="urn:activemq:core">120000</critical-analyzer-timeout>
      <critical-analyzer-check-period
   xmlns="urn:activemq:core">60000</critical-analyzer-check-period>
      <critical-analyzer-policy
   xmlns="urn:activemq:core">LOG</critical-analyzer-policy>
      <page-sync-timeout
   xmlns="urn:activemq:core">548000</page-sync-timeout>
      <acceptors xmlns="urn:activemq:core">
        <acceptor
   
name="artemis">tcp://0.0.0.0:61616?tcpSendBufferSize=1048576;tcpReceiveBufferSize=1048576;amqpMinLargeMessageSize=102400;connectionsAllowed=1536;directDeliver=false;useEpoll=true;amqpCredits=1000;amqpLowCredits=300;amqpDuplicateDetection=true;protocols=CORE,AMQP,STOMP,HORNETQ,OPENWIRE;</acceptor>
      </acceptors>
      <connectors xmlns="urn:activemq:core">
        <connector name="artemis">tcp://xxxxxxx.xxxxxx.xx:61616</connector>
      </connectors>
      <cluster-user xmlns="urn:activemq:core">artemis</cluster-user>
      <cluster-password
   xmlns="urn:activemq:core">xxxxxxxx</cluster-password>
      <broadcast-groups xmlns="urn:activemq:core">
        <broadcast-group name="bg-group1">
          <group-address>231.7.7.10</group-address>
          <group-port>9876</group-port>
          <broadcast-period>5000</broadcast-period>
          <connector-ref>artemis</connector-ref>
        </broadcast-group>
      </broadcast-groups>
      <discovery-groups xmlns="urn:activemq:core">
        <discovery-group name="dg-group1">
          <group-address>231.7.7.10</group-address>
          <group-port>9876</group-port>
          <refresh-timeout>10000</refresh-timeout>
        </discovery-group>
      </discovery-groups>
      <cluster-connections xmlns="urn:activemq:core">
        <cluster-connection name="artemis-ato">
          <connector-ref>artemis</connector-ref>
          <retry-interval>2000</retry-interval>
   <initial-connect-attempts>1000</initial-connect-attempts>
          <reconnect-attempts>1000</reconnect-attempts>
   <message-load-balancing>ON_DEMAND</message-load-balancing>
          <max-hops>1</max-hops>
          <discovery-group-ref discovery-group-name="dg-group1"/>
        </cluster-connection>
      </cluster-connections>
      <ha-policy xmlns="urn:activemq:core">
        <replication>
          <master>
   <check-for-live-server>true</check-for-live-server>
   <vote-on-replication-failure>true</vote-on-replication-failure>
            <group-name>ato-hapair-1</group-name>
          </master>
        </replication>
      </ha-policy>
      <metrics xmlns="urn:activemq:core">
        <jvm-memory>true</jvm-memory>
        <jvm-gc>true</jvm-gc>
        <jvm-threads>true</jvm-threads>
        <plugin
   
class-name="org.apache.activemq.artemis.core.server.metrics.plugins.ArtemisPrometheusMetricsPlugin"/>
      </metrics>
      <security-settings xmlns="urn:activemq:core">
        <security-setting match="activemq.management">
          <permission type="manage" roles="amq,service"/>
        </security-setting>
        <security-setting match="#">
          <permission type="manage" roles="amq,service"/>
          <permission type="send" roles="amq,service,b2bi"/>
          <permission type="consume" roles="amq,service,b2bi"/>
          <permission type="browse" roles="amq,service"/>
          <permission type="createAddress" roles="amq,service"/>
          <permission type="deleteAddress" roles="amq,service"/>
          <permission type="createDurableQueue" roles="amq,service"/>
          <permission type="deleteDurableQueue" roles="amq,service"/>
          <permission type="createNonDurableQueue" roles="amq,service"/>
          <permission type="deleteNonDurableQueue" roles="amq,service"/>
        </security-setting>
        <role-mapping from="gs-auth-Artemis_Admin" to="amq"/>
        <role-mapping from="gs-auth-Artemis_User" to="service"/>
      </security-settings>
      <address-settings xmlns="urn:activemq:core">
        <address-setting match="activemq.management#">
          <dead-letter-address>DLQ</dead-letter-address>
          <expiry-address>ExpiryQueue</expiry-address>
          <redelivery-delay>0</redelivery-delay>
   <message-counter-history-day-limit>10</message-counter-history-day-limit>
          <max-size-bytes>-1</max-size-bytes>
          <max-size-messages>-1</max-size-messages>
   <address-full-policy>PAGE</address-full-policy>
          <auto-create-queues>true</auto-create-queues>
   <auto-create-addresses>true</auto-create-addresses>
   <auto-create-jms-queues>true</auto-create-jms-queues>
   <auto-create-jms-topics>true</auto-create-jms-topics>
        </address-setting>
        <address-setting match="#">
          <dead-letter-address>DLQ</dead-letter-address>
          <expiry-address>ExpiryQueue</expiry-address>
          <redelivery-delay>0</redelivery-delay>
   <message-counter-history-day-limit>10</message-counter-history-day-limit>
          <max-size-bytes>-1</max-size-bytes>
          <max-size-messages>-1</max-size-messages>
   <address-full-policy>PAGE</address-full-policy>
          <auto-create-queues>true</auto-create-queues>
   <auto-create-addresses>true</auto-create-addresses>
   <auto-create-jms-queues>true</auto-create-jms-queues>
   <auto-create-jms-topics>true</auto-create-jms-topics>
        </address-setting>
        <address-setting match="jms.#">
          <dead-letter-address>DLQ</dead-letter-address>
          <expiry-address>ExpiryQueue</expiry-address>
   <max-delivery-attempts>5</max-delivery-attempts>
          <redelivery-delay>500</redelivery-delay>
   <redelivery-delay-multiplier>1.5</redelivery-delay-multiplier>
   
<redelivery-collision-avoidance-factor>0.5</redelivery-collision-avoidance-factor>
   <redistribution-delay>30000</redistribution-delay>
   <send-to-dla-on-no-route>true</send-to-dla-on-no-route>
          <max-size-bytes>-1</max-size-bytes>
          <max-size-messages>-1</max-size-messages>
   <address-full-policy>PAGE</address-full-policy>
   <message-counter-history-day-limit>10</message-counter-history-day-limit>
          <auto-create-queues>false</auto-create-queues>
          <auto-delete-queues>false</auto-delete-queues>
   <auto-delete-created-queues>false</auto-delete-created-queues>
   <auto-delete-queues-delay>30000</auto-delete-queues-delay>
   <config-delete-queues>OFF</config-delete-queues>
   <auto-create-addresses>false</auto-create-addresses>
   <auto-delete-addresses>false</auto-delete-addresses>
   <auto-delete-addresses-delay>30000</auto-delete-addresses-delay>
   <config-delete-addresses>OFF</config-delete-addresses>
        </address-setting>
        <address-setting match="activemq.notifications">
          <max-size-bytes>-1</max-size-bytes>
          <max-size-messages>-1</max-size-messages>
   <address-full-policy>PAGE</address-full-policy>
        </address-setting>
        <address-setting match="jms.queue.#">
   <default-address-routing-type>ANYCAST</default-address-routing-type>
   <default-queue-routing-type>ANYCAST</default-queue-routing-type>
        </address-setting>
        <address-setting match="jms.topic.#">
   <default-address-routing-type>MULTICAST</default-address-routing-type>
   <default-queue-routing-type>MULTICAST</default-queue-routing-type>
        </address-setting>
      </address-settings>
      <addresses xmlns="urn:activemq:core">
        <address name="DLQ">
          <anycast>
            <queue name="DLQ"/>
          </anycast>
        </address>
        <address name="ExpiryQueue">
          <anycast>
            <queue name="ExpiryQueue"/>
          </anycast>
        </address>
      </addresses>
      <broker-plugins xmlns="urn:activemq:core">
        <broker-plugin
   
class-name="org.apache.activemq.artemis.core.server.plugin.impl.LoggingActiveMQServerPlugin">
          <property key="LOG_ALL_EVENTS" value="false"/>
          <property key="LOG_CONNECTION_EVENTS" value="false"/>
          <property key="LOG_SESSION_EVENTS" value="false"/>
          <property key="LOG_CONSUMER_EVENTS" value="false"/>
          <property key="LOG_DELIVERING_EVENTS" value="false"/>
          <property key="LOG_SENDING_EVENTS" value="false"/>
          <property key="LOG_INTERNAL_EVENTS" value="false"/>
        </broker-plugin>
      </broker-plugins>
   </core>



Reply via email to