Hello!

No, I have a directly attached NVMe disk, and there are no IO or network issues.

пт, 25 февр. 2022 г. в 12:50, Bowen Song <bo...@bso.ng>:
>
> Do you have any network based mountpoint, such as NFS or samba? I have
> seen similar behaviour on other Java based applications at GC safepoint
> when the network based filesystem loses their connection and reconnects.
>
> On 25/02/2022 06:09, Azamat Hackimov wrote:
> > Hello!
> >
> > I recently migrated Cassandra from 3.11.x to 4.0 and got strange
> > freezes on heavy load. It looks like some nodes in DC stopped
> > responding and got DN status.
> > I cannot check status directly on node via nodetool status or even
> > restart Cassandra with `systemctl restart cassandra` command. Only
> > viable method is to `kill -9` hanging process and restart Cassandra
> > again. On 3.11.x there are no such problems.
> >
> > I have 2 DC with 8 nodes each deployed on good hardware servers, on
> > CentOS 7 and Java 11 environments with slightly changed default
> > settings inherited from 3.11.x installation.
> >
> > The problem shows randomly, I can't determine its source, in the
> > system.log and debug.log the last event that I could trace does not
> > have anything to do with the hang. The service just stops responding
> > and freezes. I can reproduce this with a huge load using dsbulk, but
> > still can't determine the cause of the problem.
> >
> > Has anyone encountered a similar problem, and is there any way other
> > than rolling back to the previous version?
> >
> > Here my config:
> >
> > cluster_name: 'mycluster'
> > num_tokens: '256'
> > allocate_tokens_for_local_replication_factor: 3
> > hinted_handoff_enabled: true
> > max_hint_window_in_ms: 10800000
> > hinted_handoff_throttle_in_kb: 1024
> > max_hints_delivery_threads: 2
> > hints_directory: /data/cassandra/hints
> > hints_flush_period_in_ms: 10000
> > max_hints_file_size_in_mb: 128
> > batchlog_replay_throttle_in_kb: 1024
> > authenticator: PasswordAuthenticator
> > authorizer: AllowAllAuthorizer
> > role_manager: CassandraRoleManager
> > network_authorizer: AllowAllNetworkAuthorizer
> > roles_validity_in_ms: 2000
> > permissions_validity_in_ms: 2000
> > credentials_validity_in_ms: 2000
> > partitioner: org.apache.cassandra.dht.Murmur3Partitioner
> > data_file_directories:
> >      - /data/cassandra/data
> > commitlog_directory: /data/cassandra/commitlog
> > cdc_enabled: false
> > disk_failure_policy: stop
> > commit_failure_policy: stop
> > prepared_statements_cache_size_mb:
> > key_cache_size_in_mb:
> > key_cache_save_period: 14400
> > row_cache_size_in_mb: 0
> > row_cache_save_period: 0
> > counter_cache_size_in_mb:
> > counter_cache_save_period: 7200
> > saved_caches_directory: /data/cassandra/saved_caches
> > commitlog_sync: periodic
> > commitlog_sync_period_in_ms: 10000
> > commitlog_segment_size_in_mb: 32
> > seed_provider:
> >      - class_name: org.apache.cassandra.locator.SimpleSeedProvider
> >        parameters:
> >            - seeds: 'node1-1,node1-4,node2-1,node2-4'
> > concurrent_reads: 32
> > concurrent_writes: 32
> > concurrent_counter_writes: 32
> > concurrent_materialized_view_writes: 32
> > file_cache_size_in_mb: '1024'
> > memtable_allocation_type: heap_buffers
> > index_summary_capacity_in_mb:
> > index_summary_resize_interval_in_minutes: 60
> > trickle_fsync: false
> > trickle_fsync_interval_in_kb: 10240
> > storage_port: 7000
> > ssl_storage_port: 7001
> > listen_address:
> > start_native_transport: true
> > native_transport_port: 9042
> > native_transport_allow_older_protocols: true
> > rpc_address:
> > rpc_keepalive: true
> > incremental_backups: false
> > snapshot_before_compaction: false
> > auto_snapshot: true
> > snapshot_links_per_second: 0
> > column_index_size_in_kb: 64
> > column_index_cache_size_in_kb: 2
> > concurrent_compactors: 5
> > concurrent_materialized_view_builders: 1
> > compaction_throughput_mb_per_sec: 200
> > sstable_preemptive_open_interval_in_mb: 50
> > read_request_timeout_in_ms: 5000
> > range_request_timeout_in_ms: 10000
> > write_request_timeout_in_ms: 2000
> > counter_write_request_timeout_in_ms: 5000
> > cas_contention_timeout_in_ms: 1000
> > truncate_request_timeout_in_ms: 60000
> > request_timeout_in_ms: 10000
> > slow_query_log_timeout_in_ms: 500
> > endpoint_snitch: GossipingPropertyFileSnitch
> > dynamic_snitch_update_interval_in_ms: 100
> > dynamic_snitch_reset_interval_in_ms: 600000
> > dynamic_snitch_badness_threshold: 1.0
> > server_encryption_options:
> >      internode_encryption: none
> >      enable_legacy_ssl_storage_port: false
> >      keystore: conf/.keystore
> >      keystore_password: cassandra
> >      require_client_auth: false
> >      truststore: conf/.truststore
> >      truststore_password: cassandra
> >      require_endpoint_verification: false
> > client_encryption_options:
> >      enabled: false
> >      keystore: conf/.keystore
> >      keystore_password: cassandra
> >      require_client_auth: false
> > internode_compression: dc
> > inter_dc_tcp_nodelay: false
> > tracetype_query_ttl: 86400
> > tracetype_repair_ttl: 604800
> > enable_user_defined_functions: false
> > enable_scripted_user_defined_functions: false
> > windows_timer_interval: 1
> > transparent_data_encryption_options:
> >      enabled: false
> >      chunk_length_kb: 64
> >      cipher: AES/CBC/PKCS5Padding
> >      key_alias: testing:1
> >      key_provider:
> >        - class_name: org.apache.cassandra.security.JKSKeyProvider
> >          parameters:
> >            - keystore: conf/.keystore
> >              keystore_password: cassandra
> >              store_type: JCEKS
> >              key_password: cassandra
> > tombstone_warn_threshold: 1000
> > tombstone_failure_threshold: 100000
> > replica_filtering_protection:
> >      cached_rows_warn_threshold: 2000
> >      cached_rows_fail_threshold: 32000
> > batch_size_warn_threshold_in_kb: 5
> > batch_size_fail_threshold_in_kb: 50
> > unlogged_batch_across_partitions_warn_threshold: 10
> > compaction_large_partition_warning_threshold_mb: 100
> >
> > audit_logging_options:
> >      enabled: true
> >      logger:
> >        - class_name: BinAuditLogger
> >      excluded_categories: DML,QUERY,PREPARE
> >      max_log_size: 1073741824
> >
> > diagnostic_events_enabled: false
> > repaired_data_tracking_for_range_reads_enabled: false
> > repaired_data_tracking_for_partition_reads_enabled: false
> > report_unconfirmed_repaired_data_mismatches: false
> >
> > enable_materialized_views: true
> > enable_sasi_indexes: false
> > enable_transient_replication: false
> > enable_drop_compact_storage: false
> >



-- 
>From Siberia with Love!

Reply via email to