Hi Mr. Tomas, You need the same version of OCFS2 drivers in both nodes to mount the volume. The OCFS2 versions are on-disk compatible but the network protocol can change. It seems your problem.
Regards, Sérgio Em Mon, 1 Jun 2009 11:06:33 +0200 Tomas Lavicky <tomas.lavi...@racom.eu> escreveu: > Hi, > we have two node HP Proliant DL140 / Ubuntu 8.04 LTS / Heartbeat > 2.1.3-2 cluster with bunch of KVM virtual guests. Images for virtual > servers are stored in two DRBD 8.0.11 / OCFS2 1.3.9 dual-primary mode > resources. I've run this cluster for one year yet and I need to > upgrade it now. I can't stop cluster and upgrade both nodes at once. > So I moved all virtual servers to one node, stopped Heartbeat, OCFS2 > and DRBD and upgrade another node to Ubuntu 8.10. All seems to be > well after reboot except I'm not able mount OCFS2/DRBD partitions on > upgraded node: > > ~# mount -t ocfs2 /dev/drbd1 /drbd1/ > mount.ocfs2: Transport endpoint is not connected while > mounting /dev/drbd1 on /drbd1/. Check 'dmesg' for more information on > this error. > > Tail of dmesg on upgraded node: > > [329491.075605] (4849,0):o2net_check_handshake:1227 node > rrhb-gandalfb (num 0) at 10.123.45.18:7777 advertised net protocol > version 8 but 11 is required, disconnecting > [329495.106007] (7751,0):dlm_request_join:1033 ERROR: status = -107 > [329495.106055] (7751,0):dlm_try_to_join_domain:1207 ERROR: status = > -107 [329495.106086] (7751,0):dlm_join_domain:1485 ERROR: status = > -107 [329495.106164] (7751,0):dlm_register_domain:1732 ERROR: status > = -107 [329495.106198] (7751,0):o2cb_cluster_connect:302 ERROR: > status = -107 [329495.106229] (7751,0):ocfs2_dlm_init:2756 ERROR: > status = -107 [329495.106268] (7751,0):ocfs2_mount_volume:1208 ERROR: > status = -107 [329495.106323] ocfs2: Unmounting device (147,1) on > (node 0) > > And on non-upgraded node: > > [1626792.088584] (4888,0):o2net_check_handshake:1144 node > rrhb-gandalfs (num 1) at 10.123.45.19:7777 advertised net protocol > version 11 but 8 is required, disconnecting > > Configuration files are the same on both nodes: > > #/etc/ocfs2/cluster.conf > node: > ip_port = 7777 > ip_address = 10.123.45.18 > number = 0 > name = rrhb-gandalfb > cluster = ocfs2 > node: > ip_port = 7777 > ip_address = 10.123.45.19 > number = 1 > name = rrhb-gandalfs > cluster = ocfs2 > cluster: > node_count = 2 > name = ocfs2 > > Along to this thread > http://oss.oracle.com/pipermail/ocfs2-users/2008-April/002769.html it > could be kernel incompatibility problem. Is it chance to solve it > anyhow with different kernels? I need to keep one node running > continually so I'm afraid to upgrade it while the other one won't be > OK. Thanks for your advice. > > Tomas > > > Some other investigations: > > ~# /etc/init.d/o2cb status > Module "configfs": Loaded > Filesystem "configfs": Mounted > Module "ocfs2_nodemanager": Loaded > Module "ocfs2_dlm": Loaded > Module "ocfs2_dlmfs": Loaded > Module "ocfs2_stackglue": Loaded > Filesystem "ocfs2_dlmfs": Mounted > Checking O2CB cluster ocfs2: Online > Heartbeat dead threshold = 61 > Network idle timeout: 120000 > Network keepalive delay: 5000 > Network reconnect delay: 5000 > Checking O2CB heartbeat: Not active > > ~# /etc/init.d/o2cb load > > ~# strace -ff -o /tmp/out o2cb_ctl -H -n ocfs2 -t cluster -a > online=yes > > ~# cat /tmp/out.7794 > execve("/sbin/o2cb_ctl", > ["o2cb_ctl", "-H", "-n", "ocfs2", "-t", "cluster", "-a", > "online=yes"], [/* 17 vars */]) = 0 > brk(0) = 0xecd000 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, > 0) = 0x7f3739f06000 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, > 0) = 0x7f3739f04000 > access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or > directory) > open("/etc/ld.so.cache", O_RDONLY) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=17967, ...}) = 0 > mmap(NULL, 17967, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3739eff000 > close(3) = 0 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > open("/usr/lib/libglib-2.0.so.0", O_RDONLY) = 3 > read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0PQ\1\0\0"..., > 832) = 832 > fstat(3, {st_mode=S_IFREG|0644, st_size=802936, ...}) = 0 > mmap(NULL, 2900104, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, > 3, 0) = 0x7f3739a24000 > mprotect(0x7f3739ae7000, 2093056, PROT_NONE) = 0 > mmap(0x7f3739ce6000, 8192, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED| MAP_DENYWRITE, 3, 0xc2000) = 0x7f3739ce6000 > mmap(0x7f3739ce8000, 136, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED| > MAP_ANONYMOUS, -1, 0) = 0x7f3739ce8000 > close(3) = 0 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > open("/lib/libcom_err.so.2", O_RDONLY) = 3 > read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\22\0"..., > 832) = 832 > fstat(3, {st_mode=S_IFREG|0644, st_size=14496, ...}) = 0 > mmap(NULL, 2109808, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, > 3, 0) = 0x7f3739820000 > mprotect(0x7f3739823000, 2093056, PROT_NONE) = 0 > mmap(0x7f3739a22000, 8192, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED| MAP_DENYWRITE, 3, 0x2000) = 0x7f3739a22000 > close(3) = 0 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > open("/lib/libc.so.6", O_RDONLY) = 3 > read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\345"..., > 832) = 832 > fstat(3, {st_mode=S_IFREG|0755, st_size=1502520, ...}) = 0 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, > 0) = 0x7f3739efe000 > mmap(NULL, 3609304, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, > 3, 0) = 0x7f37394ae000 > mprotect(0x7f3739617000, 2093056, PROT_NONE) = 0 > mmap(0x7f3739816000, 20480, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED| MAP_DENYWRITE, 3, 0x168000) = 0x7f3739816000 > mmap(0x7f373981b000, 17112, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED| MAP_ANONYMOUS, -1, 0) = 0x7f373981b000 > close(3) = 0 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > open("/lib/libpcre.so.3", O_RDONLY) = 3 > read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\23\0"..., > 832) = 832 > fstat(3, {st_mode=S_IFREG|0644, st_size=165872, ...}) = 0 > mmap(NULL, 2261152, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, > 3, 0) = 0x7f3739285000 > mprotect(0x7f37392ad000, 2093056, PROT_NONE) = 0 > mmap(0x7f37394ac000, 8192, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED| MAP_DENYWRITE, 3, 0x27000) = 0x7f37394ac000 > close(3) = 0 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > open("/lib/libpthread.so.0", O_RDONLY) = 3 > read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320W\0\0"..., > 832) = 832 > fstat(3, {st_mode=S_IFREG|0755, st_size=130214, ...}) = 0 > mmap(NULL, 2208624, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, > 3, 0) = 0x7f3739069000 > mprotect(0x7f3739080000, 2093056, PROT_NONE) = 0 > mmap(0x7f373927f000, 8192, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED| MAP_DENYWRITE, 3, 0x16000) = 0x7f373927f000 > mmap(0x7f3739281000, 13168, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED| MAP_ANONYMOUS, -1, 0) = 0x7f3739281000 > close(3) = 0 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, > 0) = 0x7f3739efd000 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, > 0) = 0x7f3739efc000 > arch_prctl(ARCH_SET_FS, 0x7f3739efc700) = 0 > mprotect(0x7f373927f000, 4096, PROT_READ) = 0 > mprotect(0x7f37394ac000, 4096, PROT_READ) = 0 > mprotect(0x7f3739816000, 16384, PROT_READ) = 0 > mprotect(0x7f3739a22000, 4096, PROT_READ) = 0 > mprotect(0x7f3739ce6000, 4096, PROT_READ) = 0 > mprotect(0x7f3739f07000, 4096, PROT_READ) = 0 > munmap(0x7f3739eff000, 17967) = 0 > set_tid_address(0x7f3739efc790) = 7794 > set_robust_list(0x7f3739efc7a0, 0x18) = 0 > futex(0x7fff41f0866c, 0x81 /* FUTEX_??? */, 1) = 0 > rt_sigaction(SIGRTMIN, {0x7f373906e660, [], SA_RESTORER|SA_SIGINFO, > 0x7f37390780f0}, NULL, 8) = 0 > rt_sigaction(SIGRT_1, {0x7f373906e6f0, [], > SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f37390780f0}, NULL, 8) = 0 > rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0 > getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) > = 0 brk(0) = 0xecd000 > brk(0xeee000) = 0xeee000 > open("/sys/o2cb/interface_revision", O_RDONLY) = 3 > read(3, "5\n", 15) = 2 > read(3, "", 13) = 0 > close(3) = 0 > stat("/sys/kernel/config", {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0 > statfs("/sys/kernel/config", {f_type=0x62656570, f_bsize=4096, > f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, > 0}, f_namelen=255, f_frsize=4096}) = 0 > stat("/etc/ocfs2/cluster.conf", {st_mode=S_IFREG|0644, > st_size=348, ...}) = 0 open("/etc/ocfs2/cluster.conf", O_RDONLY) = 3 > read(3, "#/etc/ocfs2/cluster.conf\nnode:\n "..., 4000) = 348 > read(3, "", 4000) = 0 > close(3) = 0 > mkdir("/sys/kernel/config/cluster/ocfs2", 0755) = -1 EEXIST (File > exists) uname({sys="Linux", node="rrhb-gandalfs", ...}) = 0 > mkdir("/sys/kernel/config/cluster/ocfs2/node/rrhb-gandalfb", 0755) = > -1 EEXIST (File exists) > uname({sys="Linux", node="rrhb-gandalfs", ...}) = 0 > mkdir("/sys/kernel/config/cluster/ocfs2/node/rrhb-gandalfs", 0755) = > -1 EEXIST (File exists) > exit_group(0) = ? > > _______________________________________________ > Ocfs2-users mailing list > Ocfs2-users@oss.oracle.com > http://oss.oracle.com/mailman/listinfo/ocfs2-users -- .:''''':. .:' ` Sérgio Surkamp | Gerente de Rede :: ........ ser...@gruposinternet.com.br `:. .:' `:, ,.:' *Grupos Internet S.A.* `: :' R. Lauro Linhares, 2123 Torre B - Sala 201 : : Trindade - Florianópolis - SC :.' :: +55 48 3234-4109 : ' http://www.gruposinternet.com.br _______________________________________________ Ocfs2-users mailing list Ocfs2-users@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-users