Hi Chad,
Looking at the first core dump, this is what I found:
[root@headnode (coal) /var/tmp]# mdb core.metadata.3535
Loading modules: [ libumem.so.1 libc.so.1 libnvpair.so.1 ld.so.1 ]
> ::load v8
mdb_v8 version: 1.2.1 (release, from d31211b)
V8 version: 3.14.5.9
Autoconfigured V8 support from target
C++ symbol demangling enabled
> ::jsstack -vn0
native: libc.so.1`_lwp_kill+0x15
native: libc.so.1`raise+0x2b
native: libc.so.1`abort+0x10e
(1 internal frame elided)
native: v8::internal::Isolate::DoThrow+0x34c
native: v8::internal::Isolate::Throw+0x14
native: v8::internal::IC::TypeError+0x5a
native: v8::internal::LoadIC::Load+0x14c
native: v8::internal::LoadIC_Miss+0xb3
(1 internal frame elided)
js: <anonymous> (as async.each)
file: /usr/node/0.10/node_modules/async.js
posn: line 111
this: b9c68a35 (JSObject: Object)
arg1: 9ed08091 (Oddball: "undefined")
arg2: bffecb35 (JSFunction)
arg3: bffecb59 (JSFunction)
js: <anonymous> (as <anon>)
file: /usr/vm/lib/metadata/agent.js
posn: line 430
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
arg2: 9ed08091 (Oddball: "undefined")
(1 internal frame elided)
js: gotVMs
file: /usr/vm/node_modules/VM.js
posn: line 1561
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
arg2: 9ed08091 (Oddball: "undefined")
(1 internal frame elided)
js: <anonymous> (as <anon>)
file: /usr/vm/node_modules/vmload/index.js
posn: line 988
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
(1 internal frame elided)
js: <anonymous> (as <anon>)
file: /usr/node/0.10/node_modules/async.js
posn: line 239
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
js: <anonymous> (as <anon>)
file: /usr/node/0.10/node_modules/async.js
posn: line 142
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
js: <anonymous> (as <anon>)
file: /usr/node/0.10/node_modules/async.js
posn: line 235
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
arg2: 9ed08091 (Oddball: "undefined")
js: <anonymous> (as <anon>)
file: /usr/node/0.10/node_modules/async.js
posn: line 595
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
js: <anonymous> (as <anon>)
file: /usr/vm/node_modules/vmload/index.js
posn: line 949
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
arg2: 9ed08091 (Oddball: "undefined")
(1 internal frame elided)
js: <anonymous> (as <anon>)
file: /usr/vm/node_modules/vmload/index.js
posn: line 1112
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
js: <anonymous> (as <anon>)
file: /usr/node/0.10/node_modules/async.js
posn: line 142
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
js: <anonymous> (as <anon>)
file: /usr/vm/node_modules/vmload/index.js
posn: line 1379
this: 9ed096c1 (<unknown>)
arg1: bffcc1ad (JSObject: SyntaxError)
arg2: 9ed08091 (Oddball: "undefined")
(1 internal frame elided)
js: <anonymous> (as <anon>)
file: /usr/vm/node_modules/vmload/index.js
posn: line 546
this: 9ed096c1 (<unknown>)
arg1: 9ed08081 (Oddball: "null")
arg2: bffcb69d (JSObject: Buffer)
js: <anonymous> (as <anon>)
file: fs.js
posn: line 257
this: 9ed096c1 (<unknown>)
arg1: 9ed08081 (Oddball: "null")
(1 internal frame elided)
js: <anonymous> (as <anon>)
file: fs.js
posn: line 106
this: bffcb865 (JSObject: Object)
(1 internal frame elided)
(1 internal frame elided)
(1 internal frame elided)
native: _ZN2v88internalL6InvokeEbNS0_6HandleINS0_10JSFunctionEEENS1_INS0...
native: v8::internal::Execution::Call+0x5c
native: v8::Function::Call+0xf0
native: node::MakeCallback+0x49
native: node::MakeCallback+0x49
native: _ZN4nodeL5AfterEP7uv_fs_s+0x10f
native: uv__work_done+0x6a
native: uv__async_event+0x51
native: uv__async_io+0x6c
native: uv__io_poll+0x244
native: uv_run+0xbd
native: node::Start+0x124
native: main+0x1b
native: _start_crt+0x97
native: _start+0x1a
> bffcb69d::nodebuffer
8e1a008
> 8e1a008/s
0x8e1a008: {
"Live Image": "20180118T013028Z",
"System Type": "SunOS",
"Boot Time": "1516727655",
"SDC Version": "7.0",
"Manufacturer": "HP",
"Product": "ProLiant DL360 G5",
"Serial Number": "",
"SKU Number": "",
"HW Version": "",
"HW Family": "ProLiant",
"Setup": "false",
"VM Capable": true,
"CPU Type": "Unknown",
"CPU Virtualization": "vmx",
"CPU Physical Cores": 0,
"UUID": "00000000-0000-1100-0000-000000000000",
"Hostname": "articulate",
"CPU Total Cores": 8,
"MiB of Memory": "32765",
"Zpool": "zones",
"Zpool Disks": "c1t0d0,c1t1d0,c1t2d0,c1t3d0,c1t4d0,c1t5d0",
"Zpool Profile": "raidz2",
"Zpool Creation": 1516707618,
"Zpool Size in GiB": 526,
"Disks": {
"c1t0d0": {"Size in GB": 146},
"c1t1d0": {"Size in GB": 146},
"c1t2d0": {"Size in GB": 146},
"c1t3d0": {"Size in GB": 146},
"c1t4d0": {"Size in GB": 146},
"c1t5d0": {"Size in GB": 146}
},
"Boot Parameters": {
"console": "vga",
"vga_mode": "115200,8,n,1,-",
"root_shadow": "$5$2HOHRnK3$NvLlm.1KQBbB0WjoP7xcIwGnllhzp2HnT.mDO7DpxYA",
"smartos": "true",
"boot_args": "",
"bootargs": ""
},
"Network Interfaces": {
"bnx1": {"MAC Address": "00:22:64:0d:68:22", "ip4addr": "", "Link Status":
"down", "NIC Names": []},
"bnx0": {"MAC Address": "00:22:64:0d:68:28", "ip4addr": "10.0.0.8", "Link
Status": "up", "NIC Names": ["admin"]},
"e1000g0": {"MAC Address": "00:1b:78:5c:46:98", "ip4addr": "", "Link
Status": "unknown", "NIC Names": []},
"e1000g1": {"MAC Address": "00:1b:78:5c:46:99", "ip4addr": "", "Link
Status": "unknown", "NIC Names": []}
},
"Virtual Network Interfaces": {
},
"Link Aggregations": {
}
}
e}1
>
So it seems that somehow the JSON representation of sysinfo generated by
“getSysinfo” at
https://github.com/joyent/smartos-live/blob/7f2f38edaf7f2a1e9428ce842081e097c9b3b342/src/vm/node_modules/vmload/index.js#L1379
is malformed and contains a trailing “e}1” string which makes JSON.parse throw
an error, and the process abort.
It seems that that JSON payload actually comes from “/tmp/.sysinfo.json” in the
GZ of that server. Do you mind pasting that content somewhere?
In the meantime, I’ll check if there’s any known ticket about that.
Cheers,
Julien
On Jan 24, 2018, at 3:43 AM, Chad M Stewart <[email protected]> wrote:
> Hi Julien,
>
> I’ve put a couple of cores at https://play.ourspace.io/s/yoP76pDXgYKcFoj
>
> If you want ssh access to the GZ I can make that happen.
>
>
> -Chad
>
>
>> On Jan 23, 2018, at 1:57 PM, Julien Gilli <[email protected]> wrote:
>>
>> Given the sysinfo output you pasted, I’m not sure what caused JSON.parse to
>> throw an exception that wasn’t caught and that thus made the process abort.
>> Would you be able to upload the generated imgadm core file (which should be
>> in /zones/global/cores/) somewhere we can download it so that we can look at
>> the actual sysinfo output that was parsed?
>>
>> On Jan 23, 2018, at 10:42 AM, Chad M Stewart <[email protected]> wrote:
>>
>>>
>>>
>>>> On Jan 23, 2018, at 12:36 PM, Julien Gilli <[email protected]> wrote:
>>>>
>>>> What is the output of /usr/bin/sysinfo on that machine?
>>>
>>>
>>> See below for output. After starting this thread I kept poking around and
>>> discovered that smartdc/metadata is in maintenance mode, while it is online
>>> on the other nodes I have that are working. log output below
>>>
>>>
>>> # /usr/bin/sysinfo
>>> {
>>> "Live Image": "20180118T013028Z",
>>> "System Type": "SunOS",
>>> "Boot Time": "1516727655",
>>> "SDC Version": "7.0",
>>> "Manufacturer": "HP",
>>> "Product": "ProLiant DL360 G5",
>>> "Serial Number": "",
>>> "SKU Number": "",
>>> "HW Version": "",
>>> "HW Family": "ProLiant",
>>> "Setup": "false",
>>> "VM Capable": true,
>>> "CPU Type": "Unknown",
>>> "CPU Virtualization": "vmx",
>>> "CPU Physical Cores": 0,
>>> "UUID": "00000000-0000-1100-0000-000000000000",
>>> "Hostname": "articulate",
>>> "CPU Total Cores": 8,
>>> "MiB of Memory": "32765",
>>> "Zpool": "zones",
>>> "Zpool Disks": "c1t0d0,c1t1d0,c1t2d0,c1t3d0,c1t4d0,c1t5d0",
>>> "Zpool Profile": "raidz2",
>>> "Zpool Creation": 1516707618,
>>> "Zpool Size in GiB": 526,
>>> "Disks": {
>>> "c1t0d0": {"Size in GB": 146},
>>> "c1t1d0": {"Size in GB": 146},
>>> "c1t2d0": {"Size in GB": 146},
>>> "c1t3d0": {"Size in GB": 146},
>>> "c1t4d0": {"Size in GB": 146},
>>> "c1t5d0": {"Size in GB": 146}
>>> },
>>> "Boot Parameters": {
>>> "console": "vga",
>>> "vga_mode": "115200,8,n,1,-",
>>> "root_shadow":
>>> "$5$2HOHRnK3$NvLlm.1KQBbB0WjoP7xcIwGnllhzp2HnT.mDO7DpxYA",
>>> "smartos": "true",
>>> "boot_args": "",
>>> "bootargs": ""
>>> },
>>> "Network Interfaces": {
>>> "bnx1": {"MAC Address": "00:22:64:0d:68:22", "ip4addr": "", "Link
>>> Status": "down", "NIC Names": []},
>>> "bnx0": {"MAC Address": "00:22:64:0d:68:28", "ip4addr": "10.0.0.8",
>>> "Link Status": "up", "NIC Names": ["admin"]},
>>> "e1000g0": {"MAC Address": "00:1b:78:5c:46:98", "ip4addr": "", "Link
>>> Status": "down", "NIC Names": []},
>>> "e1000g1": {"MAC Address": "00:1b:78:5c:46:99", "ip4addr": "", "Link
>>> Status": "down", "NIC Names": []}
>>> },
>>> "Virtual Network Interfaces": {
>>> },
>>> "Link Aggregations": {
>>> }
>>> }
>>>
>>>
>>> smf log output for metadata
>>>
>>> [ Jan 23 17:18:58 Executing start method ("/usr/vm/smf/system-metadata
>>> start"). ]
>>> + . /lib/svc/share/smf_include.sh
>>> ++ SMF_EXIT_OK=0
>>> ++ SMF_EXIT_NODAEMON=94
>>> ++ SMF_EXIT_ERR_FATAL=95
>>> ++ SMF_EXIT_ERR_CONFIG=96
>>> ++ SMF_EXIT_MON_DEGRADE=97
>>> ++ SMF_EXIT_MON_OFFLINE=98
>>> ++ SMF_EXIT_ERR_NOSMF=99
>>> ++ SMF_EXIT_ERR_PERM=100
>>> + PATH=/usr/sbin:/usr/bin
>>> + export PATH
>>> + case "$1" in
>>> + exit 0
>>> + /usr/bin/ctrun -l child -o noorphan /usr/vm/sbin/metadata
>>> [ Jan 23 17:18:58 Method "start" exited with status 0. ]
>>> {"name":"metadata","hostname":"articulate","pid":3545,"class":"status","channel":"com.sun:zones:status","level":30,"args":["-rj","-c","com.sun:zones:status","status"],"msg":"calling
>>> /usr/sbin/sysevent -rj -c com.sun:zones:status
>>> status","time":"2018-01-23T17:18:58.807Z","v":0}
>>> {"name":"metadata","hostname":"articulate","pid":3545,"level":30,"msg":"Setup
>>> timer to purge deleted zones every 300000
>>> ms","time":"2018-01-23T17:18:58.869Z","v":0}
>>> {"name":"metadata","hostname":"articulate","pid":3545,"level":30,"msg":"Setup
>>> timer to detect (missed) new zones every 60000
>>> ms","time":"2018-01-23T17:18:58.869Z","v":0}
>>> {"name":"metadata","hostname":"articulate","pid":3545,"class":"status","channel":"com.sun:zones:status","level":20,"msg":"sysevent
>>> stream ready, took 0.067055355s
>>> (67.06ms)","time":"2018-01-23T17:18:58.874Z","v":0}
>>> Uncaught TypeError: Cannot read property 'length' of undefined
>>>
>>> FROM
>>> Object.async.each (/usr/node/0.10/node_modules/async.js:113:17)
>>> /usr/vm/lib/metadata/agent.js:431:15
>>> gotVMs (/usr/vm/node_modules/VM.js:1565:13)
>>> /usr/vm/node_modules/vmload/index.js:990:13
>>> /usr/node/0.10/node_modules/async.js:240:13
>>> /usr/node/0.10/node_modules/async.js:144:21
>>> /usr/node/0.10/node_modules/async.js:237:17
>>> /usr/node/0.10/node_modules/async.js:600:34
>>> /usr/vm/node_modules/vmload/index.js:951:21
>>> /usr/vm/node_modules/vmload/index.js:1114:13
>>> /usr/node/0.10/node_modules/async.js:144:21
>>> /usr/vm/node_modules/vmload/index.js:1386:13
>>> /usr/vm/node_modules/vmload/index.js:556:13
>>> fs.js:266:14
>>> Object.oncomplete (fs.js:107:15)
>>> [ Jan 23 17:18:58 Stopping because all processes in service exited. ]
>>> [ Jan 23 17:18:58 Executing stop method (:kill). ]
>>> [ Jan 23 17:18:58 Executing start method ("/usr/vm/smf/system-metadata
>>> start"). ]
>>> + . /lib/svc/share/smf_include.sh
>>> ++ SMF_EXIT_OK=0
>>> ++ SMF_EXIT_NODAEMON=94
>>> ++ SMF_EXIT_ERR_FATAL=95
>>> ++ SMF_EXIT_ERR_CONFIG=96
>>> ++ SMF_EXIT_MON_DEGRADE=97
>>> ++ SMF_EXIT_MON_OFFLINE=98
>>> ++ SMF_EXIT_ERR_NOSMF=99
>>> ++ SMF_EXIT_ERR_PERM=100
>>> + PATH=/usr/sbin:/usr/bin
>>> + export PATH
>>> + case "$1" in
>>> + exit 0
>>> + /usr/bin/ctrun -l child -o noorphan /usr/vm/sbin/metadata
>>> [ Jan 23 17:18:59 Method "start" exited with status 0. ]
>>> {"name":"metadata","hostname":"articulate","pid":3550,"class":"status","channel":"com.sun:zones:status","level":30,"args":["-rj","-c","com.sun:zones:status","status"],"msg":"calling
>>> /usr/sbin/sysevent -rj -c com.sun:zones:status
>>> status","time":"2018-01-23T17:18:59.265Z","v":0}
>>> {"name":"metadata","hostname":"articulate","pid":3550,"level":30,"msg":"Setup
>>> timer to purge deleted zones every 300000
>>> ms","time":"2018-01-23T17:18:59.329Z","v":0}
>>> {"name":"metadata","hostname":"articulate","pid":3550,"level":30,"msg":"Setup
>>> timer to detect (missed) new zones every 60000
>>> ms","time":"2018-01-23T17:18:59.330Z","v":0}
>>> {"name":"metadata","hostname":"articulate","pid":3550,"class":"status","channel":"com.sun:zones:status","level":20,"msg":"sysevent
>>> stream ready, took 0.069465188s
>>> (69.47ms)","time":"2018-01-23T17:18:59.333Z","v":0}
>>> Uncaught TypeError: Cannot read property 'length' of undefined
>>>
>>> FROM
>>> Object.async.each (/usr/node/0.10/node_modules/async.js:113:17)
>>> /usr/vm/lib/metadata/agent.js:431:15
>>> gotVMs (/usr/vm/node_modules/VM.js:1565:13)
>>> /usr/vm/node_modules/vmload/index.js:990:13
>>> /usr/node/0.10/node_modules/async.js:240:13
>>> /usr/node/0.10/node_modules/async.js:144:21
>>> /usr/node/0.10/node_modules/async.js:237:17
>>> /usr/node/0.10/node_modules/async.js:600:34
>>> /usr/vm/node_modules/vmload/index.js:951:21
>>> /usr/vm/node_modules/vmload/index.js:1114:13
>>> /usr/node/0.10/node_modules/async.js:144:21
>>> /usr/vm/node_modules/vmload/index.js:1386:13
>>> /usr/vm/node_modules/vmload/index.js:556:13
>>> fs.js:266:14
>>> Object.oncomplete (fs.js:107:15)
>>> [ Jan 23 17:18:59 Stopping because all processes in service exited. ]
>>> [ Jan 23 17:18:59 Executing stop method (:kill). ]
>>> [ Jan 23 17:18:59 Restarting too quickly, changing state to maintenance. ]
>>>
>>> A reboot doesn’t fix metadata. I ran a scrub given the age of this system,
>>> but no errors found.
>>>
>>> History for 'zones':
>>> 2018-01-23.11:40:18 zpool create -f zones raidz2 c1t0d0 c1t1d0 c1t2d0
>>> c1t3d0 c1t4d0 c1t5d0
>>> 2018-01-23.11:40:23 zfs set atime=off zones
>>> 2018-01-23.11:40:29 zfs create -V 1526mb -o checksum=noparity zones/dump
>>> 2018-01-23.11:42:00 zfs create zones/config
>>> 2018-01-23.11:42:01 zfs set mountpoint=legacy zones/config
>>> 2018-01-23.11:42:01 zfs create -o mountpoint=legacy zones/usbkey
>>> 2018-01-23.11:42:02 zfs create -o quota=10g -o
>>> mountpoint=/zones/global/cores -o compression=gzip zones/cores
>>> 2018-01-23.11:42:03 zfs create -o mountpoint=legacy zones/opt
>>> 2018-01-23.11:42:03 zfs create zones/var
>>> 2018-01-23.11:42:04 zfs set mountpoint=legacy zones/var
>>> 2018-01-23.11:42:09 zfs create -V 32765mb zones/swap
>>> 2018-01-23.11:54:54 zpool import -f zones
>>> 2018-01-23.11:54:55 zpool set feature@extensible_dataset=enabled zones
>>> 2018-01-23.11:54:55 zfs set checksum=noparity zones/dump
>>> 2018-01-23.11:54:56 zpool set feature@multi_vdev_crash_dump=enabled zones
>>> 2018-01-23.11:54:56 zfs destroy -r zones/cores
>>> 2018-01-23.11:54:57 zfs create -o compression=gzip -o mountpoint=none
>>> zones/cores
>>> 2018-01-23.11:55:03 zfs create -o quota=10g -o
>>> mountpoint=/zones/global/cores zones/cores/global
>>> 2018-01-23.14:32:26 zpool import -f zones
>>> 2018-01-23.14:32:27 zpool set feature@extensible_dataset=enabled zones
>>> 2018-01-23.14:32:28 zfs set checksum=noparity zones/dump
>>> 2018-01-23.14:32:28 zpool set feature@multi_vdev_crash_dump=enabled zones
>>> 2018-01-23.14:39:08 zpool upgrade zones
>>> 2018-01-23.17:04:08 zfs create -o compression=lzjb -o
>>> mountpoint=/zones/archive zones/archive
>>> 2018-01-23.17:08:37 zpool scrub zones
>>> 2018-01-23.17:16:42 zpool import -f zones
>>> 2018-01-23.17:16:43 zpool set feature@extensible_dataset=enabled zones
>>> 2018-01-23.17:16:43 zfs set checksum=noparity zones/dump
>>> 2018-01-23.17:16:43 zpool set feature@multi_vdev_crash_dump=enabled zones
>>>
>>>
>>>
>>>
>>> -Chad
>>
>
> smartos-discuss | Archives | Modify Your Subscription
-------------------------------------------
smartos-discuss
Archives: https://www.listbox.com/member/archive/184463/=now
RSS Feed: https://www.listbox.com/member/archive/rss/184463/25769125-55cfbc00
Modify Your Subscription:
https://www.listbox.com/member/?member_id=25769125&id_secret=25769125-7688e9fb
Powered by Listbox: http://www.listbox.com