Hiya,

I've noticed the below a few times per day in my logs.  These resources were 
created a few days ago.  Running 8.3.7 on Ubuntu.  Nothing is really under 
load, especially not the sync links.

Interestingly: the integrity check failure is happening on drbd5 and drbd3, and 
not on the other 7 resources.  drbd5 and drbd3 are both holding raw disk images 
for VMs that run a particular bit of software (which has a postgres database 
inside) that the others don't run.  The software isn't in production yet, so 
it's not doing very much disk thrashing.

My question for the list is: any suggestions where to look for trouble first?  
Any known bugs when using data-integrity-alg md5?  I am reasonably sure the 
network is ok - it's a link via a cisco 2960G, and the interfaces are not being 
shared with anything except heartbeat.  Syncer rate is 30M for both sides, and 
there are no errors on the "show int" stats on the cisco.  The interfaces are 
running at a gigabit.

Config for drbd5 is below below.  It was carefully and lovingly handcrafted by 
me, and then obliterated and roughly reformatted by lcmc.

Replies like "do some basic troubleshooting, n00b" warmly accepted.


Nov  3 09:57:23 kvm-host-02 kernel: [144099.170565] block drbd5: Digest 
integrity check FAILED.
Nov  3 09:57:23 kvm-host-02 kernel: [144099.177014] block drbd5: error 
receiving Data, l: 4136!
Nov  3 09:57:23 kvm-host-02 kernel: [144099.183660] block drbd5: peer( Primary 
-> Unknown ) conn( Connected -> ProtocolError ) pdsk( UpToDate -> DUnknown ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.183679] block drbd5: asender 
terminated
Nov  3 09:57:23 kvm-host-02 kernel: [144099.183683] block drbd5: Terminating 
asender thread
Nov  3 09:57:23 kvm-host-02 kernel: [144099.184007] block drbd5: Connection 
closed
Nov  3 09:57:23 kvm-host-02 kernel: [144099.184013] block drbd5: conn( 
ProtocolError -> Unconnected ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.184019] block drbd5: receiver 
terminated
Nov  3 09:57:23 kvm-host-02 kernel: [144099.184021] block drbd5: Restarting 
receiver thread
Nov  3 09:57:23 kvm-host-02 kernel: [144099.184024] block drbd5: receiver 
(re)started
Nov  3 09:57:23 kvm-host-02 kernel: [144099.184028] block drbd5: conn( 
Unconnected -> WFConnection ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.599546] block drbd5: Handshake 
successful: Agreed network protocol version 91
Nov  3 09:57:23 kvm-host-02 kernel: [144099.599860] block drbd5: Peer 
authenticated using 20 bytes of 'sha1' HMAC
Nov  3 09:57:23 kvm-host-02 kernel: [144099.599869] block drbd5: conn( 
WFConnection -> WFReportParams ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.599981] block drbd5: Starting 
asender thread (from drbd5_receiver [26654])
Nov  3 09:57:23 kvm-host-02 kernel: [144099.702240] block drbd5: 
data-integrity-alg: md5
Nov  3 09:57:23 kvm-host-02 kernel: [144099.702263] block drbd5: 
drbd_sync_handshake:
Nov  3 09:57:23 kvm-host-02 kernel: [144099.702268] block drbd5: self 
36C22CBC3A57C4A0:0000000000000000:3793513D2683E058:F690ADE1C6F15903 bits:0 
flags:0
Nov  3 09:57:23 kvm-host-02 kernel: [144099.702272] block drbd5: peer 
2206AABC864DF1F9:36C22CBC3A57C4A1:3793513D2683E058:F690ADE1C6F15903 bits:2 
flags:0
Nov  3 09:57:23 kvm-host-02 kernel: [144099.702275] block drbd5: 
uuid_compare()=-1 by rule 50
Nov  3 09:57:23 kvm-host-02 kernel: [144099.702281] block drbd5: peer( Unknown 
-> Primary ) conn( WFReportParams -> WFBitMapT ) pdsk( DUnknown -> UpToDate ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.769861] block drbd5: conn( 
WFBitMapT -> WFSyncUUID ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.776607] block drbd5: helper 
command: /sbin/drbdadm before-resync-target minor-5
Nov  3 09:57:23 kvm-host-02 kernel: [144099.780176] block drbd5: helper 
command: /sbin/drbdadm before-resync-target minor-5 exit code 0 (0x0)
Nov  3 09:57:23 kvm-host-02 kernel: [144099.780184] block drbd5: conn( 
WFSyncUUID -> SyncTarget ) disk( UpToDate -> Inconsistent ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.780191] block drbd5: Began resync 
as SyncTarget (will sync 8 KB [2 bits set]).
Nov  3 09:57:23 kvm-host-02 kernel: [144099.809790] block drbd5: Resync done 
(total 1 sec; paused 0 sec; 8 K/sec)
Nov  3 09:57:23 kvm-host-02 kernel: [144099.809800] block drbd5: conn( 
SyncTarget -> Connected ) disk( Inconsistent -> UpToDate ) 
Nov  3 09:57:23 kvm-host-02 kernel: [144099.809808] block drbd5: helper 
command: /sbin/drbdadm after-resync-target minor-5


resource host {
 protocol C;

 handlers {
  pri-on-incon-degr     "/usr/lib/drbd/notify-pri-on-incon-degr.sh; 
/usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot 
-f";
  pri-lost-after-sb     "/usr/lib/drbd/notify-pri-lost-after-sb.sh; 
/usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot 
-f";
  fence-peer    /usr/lib/drbd/crm-fence-peer.sh;
  local-io-error        "/usr/lib/drbd/notify-io-error.sh; 
/usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt 
-f";
  after-resync-target   /usr/lib/drbd/crm-unfence-peer.sh;
 }

 startup {
  wfc-timeout   15;
  degr-wfc-timeout      60;
 }

 net {
  cram-hmac-alg sha1;
  shared-secret longscrambledascii;
  after-sb-0pri discard-zero-changes;
  after-sb-1pri discard-secondary;
  data-integrity-alg    md5;
 }

 disk {
  fencing       resource-only;
 }

 syncer {
  rate  30M;
 }

 on kvm-host-01 {
  device                /dev/drbd5;
  disk          /dev/kvm-host-01/host;
  flexible-meta-disk    internal;
  address               172.16.1.1:7793;
 }
 on kvm-host-02 {
  device                /dev/drbd5;
  disk          /dev/kvm-host-02/host;
  flexible-meta-disk    internal;
  address               172.16.1.2:7793;
 }
}

_______________________________________________
drbd-user mailing list
drbd-user@lists.linbit.com
http://lists.linbit.com/mailman/listinfo/drbd-user

Reply via email to