Hi Russell,
Yes, oddly they were actually kickstarted (and installed) via xCAT
provisioning. This problem is happening on already provisioned metals,
as well as not yet installed ones. I'm testing with the following scenario:
xclustn4 (already provisioned with centos63, boots up, looks good
except that it kernel panics while trying to re-install)
xclustn5 (never provisioned, no OS, kernel panics while trying to
install)
Couldn't find any useful information either by google'n the panic problem.
Here's lsdef for these two nodes:
[root@xcat ~]# lsdef xclustn4
Object name: xclustn4
arch=x86_64
bmc=10.32.1.154
bmcpassword=PASSW0RD
bmcport=0
bmcusername=USERID
chain=runcmd=bmcsetup,install
currchain=boot
currstate=install centos63-x86_64-compute
groups=compute,ipmi,all,xclustn,metal
initrd=xcat/centos63/x86_64/initrd.img
installnic=eth0
ip=10.32.0.154
kcmdline=quiet repo=http://10.32.0.6/install/centos63/x86_64/
ks=http://10.32.0.6/install/autoinst/xclustn4 ksdevice=eth0 cmdline
console=tty0 console=ttyS1,115200n8r
kernel=xcat/centos63/x86_64/vmlinuz
mac=00:1b:24:93:78:16
mgt=ipmi
netboot=xnba
nfsserver=10.32.0.6
os=centos63
postbootscripts=otherpkgs
postscripts=syslog,remoteshell,syncfiles
primarynic=eth0
profile=compute
provmethod=install
serial=To Be Filled By O.E.M.
serialflow=hard
serialport=1
serialspeed=115200
status=booting
statustime=01-21-2013 09:49:47
supportedarchs=x86,x86_64
switch=switch1
switchport=25
tftpserver=10.32.0.6
[root@xcat ~]# lsdef xclustn5
Object name: xclustn5
arch=x86_64
bmc=10.32.1.155
bmcpassword=<some-passwd>
bmcport=0
bmcusername=<some-user>
chain=runcmd=bmcsetup,install
currchain=boot
currstate=install centos63-x86_64-compute
groups=compute,ipmi,all,xclustn,metal
initrd=xcat/centos63/x86_64/initrd.img
installnic=eth0
ip=10.32.0.155
kcmdline=quiet repo=http://10.32.0.6/install/centos63/x86_64/
ks=http://10.32.0.6/install/autoinst/xclustn5 ksdevice=eth0 cmdline
console=tty0 console=ttyS1,115200n8r
kernel=xcat/centos63/x86_64/vmlinuz
mac=00:1b:24:93:b3:c0
mgt=ipmi
netboot=xnba
nfsserver=10.32.0.6
os=centos63
postbootscripts=otherpkgs
postscripts=syslog,remoteshell,syncfiles
primarynic=eth0
profile=compute
provmethod=install
serial=To Be Filled By O.E.M.
serialflow=hard
serialport=1
serialspeed=115200
status=installing
statustime=01-20-2013 15:32:54
supportedarchs=x86_64
switch=switch1
switchport=28
tftpserver=10.32.0.6
And some tabdumps, let me know if I've missed some important ones.
[root@xcat ~]# tabdump networks
#netname,net,mask,mgtifname,gateway,dhcpserver,tftpserver,nameservers,ntpservers,logservers,dynamicrange,nodehostname,ddnsdomain,vlanid,domain,comments,disable
"10_32_0_0-255_255_0_0","10.32.0.0","255.255.0.0","eth0","10.32.0.1","10.32.0.6","10.32.0.6","10.32.0.6",,,"10.32.0.60-10.32.0.80",,,,,,
[root@xcat ~]# tabdump nodelist
#node,groups,status,statustime,appstatus,appstatustime,primarysn,hidden,comments,disable
"xclustn0","compute,ipmi,all,xclustn,metal",,,,,,,,
"xclustn1","compute,ipmi,all,xclustn,metal",,,,,,,,
"xclustn2","compute,ipmi,all,xclustn,metal",,,,,,,,
"xclustn3","compute,ipmi,all,xclustn,metal",,,,,,,,
"xclustn4","compute,ipmi,all,xclustn,metal","booting","01-21-2013
09:49:47",,,,,,
"xclustn5","compute,ipmi,all,xclustn,metal","installing","01-20-2013
15:32:54",,,,,,
"switch1","sw",,,,,,,,
"dumpster","storage",,,,,,,,
[root@xcat ~]# tabdump nodetype
#node,os,arch,profile,provmethod,supportedarchs,nodetype,comments,disable
"xclustn4","centos63","x86_64","compute","install","x86,x86_64",,,
"xclustn5","centos63","x86_64","compute","install","x86_64",,,
[root@xcat ~]# tabdump noderes
#node,servicenode,netboot,tftpserver,tftpdir,nfsserver,monserver,nfsdir,installnic,primarynic,discoverynics,cmdinterface,xcatmaster,current_osimage,next_osimage,nimserver,routenames,comments,disable
"compute",,"xnba","10.32.0.6",,"10.32.0.6",,,"eth0","eth0",,,,,,,,,
"xclustn4",,,,,,,,,,,,,,,,,,
"xclustn0",,,,,,,,,,,,,,,,,,
"xclustn5",,,,,,,,,,,,,,,,,,
"xclustn1",,,,,,,,,,,,,,,,,,
"xclustn3",,,,,,,,,,,,,,,,,,
"xclustn2",,,,,,,,,,,,,,,,,,
Thanks!
On 01/21/2013 08:08 PM, Russell Jones wrote:
> Can you provide an lsdef of a node that is failing to boot?
>
> Can you also provide a tabdump of the standard xcat tables? (network,
> nodelist, nodetype, etc). A single line from each that shows the
> configuration tied to what a failing node is using is fine.
>
> Also, I know you said this happened after rebooting, but humor me -
> these nodes were running 6.3 successfully prior to your MN being
> rebooted? Everything I have ran across on my Google searches of the
> error messages you are seeing are tied to either hardware issues or
> drivers missing.
>
>
>
> On 1/21/2013 3:37 PM, Jesus R. Camou wrote:
>> Reinstalled the management node on a different machine, imported back
>> same xcat database and config. Still same behaviour, kernel panics
>> before triggering installation (after loading initrd.img), so it must
>> be something else. The weird part is that this was working and
>> decided to stop working all of the sudden. Don't remember changing
>> anything significant either.
>>
>> On Mon, Jan 21, 2013 at 12:04 PM, Jesus R. Camou <[email protected]
>> <mailto:[email protected]>> wrote:
>>
>> Made sure no other interfaces are using this ip, cleared the dhcpd
>> leases and rediscovered the node. Same results, here's the mn's
>> /var/log/messages:
>>
>> Jan 21 12:01:20 xcat xCAT: xCAT: Allowing makedhcp to xclustn5 -d
>> for root from localhost
>> Jan 21 12:03:23 xcat dhcpd: DHCPDISCOVER from 00:1b:24:93:b3:c0
>> via eth0
>> Jan 21 12:03:24 xcat dhcpd: DHCPOFFER on 10.32.0.70 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:25 xcat dhcpd: DHCPREQUEST for 10.32.0.70 (10.32.0.5)
>> from 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:25 xcat dhcpd: DHCPACK on 10.32.0.70 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:25 xcat in.tftpd[9282]: RRQ from 10.32.0.70 filename
>> xcat/xnba.kpxe
>> Jan 21 12:03:25 xcat in.tftpd[9282]: tftp: client does not accept
>> options
>> Jan 21 12:03:25 xcat in.tftpd[9283]: RRQ from 10.32.0.70 filename
>> xcat/xnba.kpxe
>> Jan 21 12:03:25 xcat dhcpd: DHCPDISCOVER from 00:1b:24:93:b3:c0
>> via eth0
>> Jan 21 12:03:26 xcat dhcpd: DHCPOFFER on 10.32.0.66 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:26 xcat dhcpd: DHCPREQUEST for 10.32.0.66 (10.32.0.5)
>> from 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:26 xcat dhcpd: DHCPACK on 10.32.0.66 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:51 xcat dhcpd: DHCPDISCOVER from 00:1b:24:93:b3:c0
>> via eth0
>> Jan 21 12:03:51 xcat dhcpd: DHCPOFFER on 10.32.0.70 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:51 xcat dhcpd: DHCPREQUEST for 10.32.0.70 (10.32.0.5)
>> from 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:51 xcat dhcpd: DHCPACK on 10.32.0.70 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:03:51 xcat xCAT: xCAT: Allowing getcredentials x509cert
>> Jan 21 12:04:02 xcat xCAT: xcatd: Processing discovery request
>> from 10.32.0.70
>> Jan 21 12:04:04 xcat xCAT node discovery: xclustn5 has been
>> discovered
>> Jan 21 12:04:07 xcat dhcpd: DHCPRELEASE of 10.32.0.70 from
>> 00:1b:24:93:b3:c0 via eth0 (found)
>> Jan 21 12:04:09 xcat dhcpd: DHCPDISCOVER from 00:1b:24:93:b3:c0
>> via eth0
>> Jan 21 12:04:09 xcat dhcpd: DHCPOFFER on 10.32.0.155 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:04:09 xcat dhcpd: DHCPREQUEST for 10.32.0.155
>> (10.32.0.5) from 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:04:09 xcat dhcpd: DHCPACK on 10.32.0.155 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:04:27 xcat xCAT: xCAT: Allowing getcredentials x509cert
>> from xclustn5
>> Jan 21 12:04:28 xcat xCAT: xCAT: Allowing nextdestiny for xclustn5
>> from xclustn5
>> Jan 21 12:04:36 xcat xCAT: xCAT: Allowing getbmcconfig for
>> xclustn5 from xclustn5
>> Jan 21 12:07:19 xcat dhcpd: DHCPDISCOVER from 00:1b:24:93:b3:c0
>> via eth0
>> Jan 21 12:07:19 xcat dhcpd: DHCPOFFER on 10.32.0.155 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:07:21 xcat dhcpd: DHCPREQUEST for 10.32.0.155
>> (10.32.0.5) from 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:07:21 xcat dhcpd: DHCPACK on 10.32.0.155 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:07:21 xcat in.tftpd[9391]: RRQ from 10.32.0.155 filename
>> xcat/xnba.kpxe
>> Jan 21 12:07:21 xcat in.tftpd[9391]: tftp: client does not accept
>> options
>> Jan 21 12:07:21 xcat in.tftpd[9392]: RRQ from 10.32.0.155 filename
>> xcat/xnba.kpxe
>> Jan 21 12:07:21 xcat dhcpd: uid lease 10.32.0.66 for client
>> 00:1b:24:93:b3:c0 is duplicate on eth0
>> Jan 21 12:07:21 xcat dhcpd: DHCPDISCOVER from 00:1b:24:93:b3:c0
>> via eth0
>> Jan 21 12:07:21 xcat dhcpd: DHCPOFFER on 10.32.0.155 to
>> 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:07:21 xcat dhcpd: uid lease 10.32.0.66 for client
>> 00:1b:24:93:b3:c0 is duplicate on eth0
>> Jan 21 12:07:21 xcat dhcpd: DHCPREQUEST for 10.32.0.155
>> (10.32.0.5) from 00:1b:24:93:b3:c0 via eth0
>> Jan 21 12:07:21 xcat dhcpd: DHCPACK on 10.32.0.155 to
>> 00:1b:24:93:b3:c0 via eth0
>>
>>
>>
>> On Mon, Jan 21, 2013 at 8:27 AM, Russell Jones
>> <[email protected] <mailto:[email protected]>> wrote:
>>
>> Jan 21 01:16:09 xcat dhcpd: uid lease 10.32.0.66 for client
>> 00:1b:24:93:b3:c0 is duplicate on eth0
>>
>> I haven't seen that before. What's in your leases file? Any
>> duplicates for different macs?
>>
>> Anything else responding to pings on that IPthat shouldn't be?
>>
>>
>>
>>
>> On 1/21/2013 3:10 AM, Jesus R. Camou wrote:
>>> Jan 21 01:16:09 xcat dhcpd: uid lease 10.32.0.66 for client
>>> 00:1b:24:93:b3:c0 is duplicate on eth0
>>
>>
>> ------------------------------------------------------------------------------
>> Master Visual Studio, SharePoint, SQL, ASP.NET
>> <http://ASP.NET>, C# 2012, HTML5, CSS,
>> MVC, Windows 8 Apps, JavaScript and much more. Keep your
>> skills current
>> with LearnDevNow - 3,200 step-by-step video tutorials by
>> Microsoft
>> MVPs and experts. SALE $99.99 this month only -- learn more at:
>> http://p.sf.net/sfu/learnmore_122412
>> _______________________________________________
>> xCAT-user mailing list
>> [email protected]
>> <mailto:[email protected]>
>> https://lists.sourceforge.net/lists/listinfo/xcat-user
>>
>>
>>
>>
>> -- Jesus R. Camou @ nearnix
>> UNIX Engineering - [email protected] <mailto:[email protected]>
>> US:(310)945-5360 MEX:+52(662)299-7503
>>
>>
>>
>>
>> --
>> Jesus R. Camou @ nearnix
>> UNIX Engineering - [email protected] <mailto:[email protected]>
>> US:(310)945-5360 MEX:+52(662)299-7503
>>
>>
>>
>> ------------------------------------------------------------------------------
>>
>>
>> Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
>> MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
>> with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
>> MVPs and experts. SALE $99.99 this month only -- learn more at:
>> http://p.sf.net/sfu/learnmore_122412
>>
>>
>> _______________________________________________
>> xCAT-user mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/xcat-user
>
>
>
>
> ------------------------------------------------------------------------------
> Master SQL Server Development, Administration, T-SQL, SSAS, SSIS, SSRS
> and more. Get SQL Server skills now (including 2012) with LearnDevNow -
> 200+ hours of step-by-step video tutorials by Microsoft MVPs and experts.
> SALE $99.99 this month only - learn more at:
> http://p.sf.net/sfu/learnmore_122512
>
>
> _______________________________________________
> xCAT-user mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/xcat-user
------------------------------------------------------------------------------
Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
MVPs and experts. ON SALE this month only -- learn more at:
http://p.sf.net/sfu/learnnow-d2d
_______________________________________________
xCAT-user mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/xcat-user