------- Comment From pavsu...@in.ibm.com 2019-11-06 01:44 EDT-------
root@ltc-wspoon11:~# add-apt-repository ppa:ubuntu-power-triage/lp1848127

More info: https://launchpad.net/~ubuntu-power-triage/+archive/ubuntu/lp1848127
Press [ENTER] to continue or Ctrl-c to cancel adding it.

Get:1 file:/var/cuda-repo-10-1-local-10.1.152-418.67  InRelease
Ign:1 file:/var/cuda-repo-10-1-local-10.1.152-418.67  InRelease
Get:2 file:/var/cuda-repo-10-1-local-10.1.152-418.67  Release [574 B]
Get:2 file:/var/cuda-repo-10-1-local-10.1.152-418.67  Release [574 B]
Hit:4 http://us.ports.ubuntu.com/ubuntu-ports bionic InRelease
Hit:5 http://us.ports.ubuntu.com/ubuntu-ports bionic-updates InRelease
Hit:6 http://ports.ubuntu.com/ubuntu-ports bionic-security InRelease
Ign:7 http://ddebs.ubuntu.com bionic InRelease
Hit:8 http://us.ports.ubuntu.com/ubuntu-ports bionic-backports InRelease
Ign:9 http://ddebs.ubuntu.com bionic-updates InRelease
Hit:10 http://ppa.launchpad.net/ubuntu-power-triage/lp1848127/ubuntu bionic 
InRelease
Ign:11 http://ddebs.ubuntu.com bionic-proposed InRelease
Hit:12 http://ddebs.ubuntu.com bionic Release
Hit:14 http://ddebs.ubuntu.com bionic-updates Release
Hit:16 http://ddebs.ubuntu.com bionic-proposed Release
Reading package lists... Done
root@ltc-wspoon11:~# apt-get update
Get:1 file:/var/cuda-repo-10-1-local-10.1.152-418.67  InRelease
Ign:1 file:/var/cuda-repo-10-1-local-10.1.152-418.67  InRelease
Get:2 file:/var/cuda-repo-10-1-local-10.1.152-418.67  Release [574 B]
Get:2 file:/var/cuda-repo-10-1-local-10.1.152-418.67  Release [574 B]
Hit:4 http://us.ports.ubuntu.com/ubuntu-ports bionic InRelease
Hit:5 http://us.ports.ubuntu.com/ubuntu-ports bionic-updates InRelease
Ign:6 http://ddebs.ubuntu.com bionic InRelease
Hit:7 http://ports.ubuntu.com/ubuntu-ports bionic-security InRelease
Hit:8 http://us.ports.ubuntu.com/ubuntu-ports bionic-backports InRelease
Hit:9 http://ppa.launchpad.net/ubuntu-power-triage/lp1848127/ubuntu bionic 
InRelease
Ign:10 http://ddebs.ubuntu.com bionic-updates InRelease
Ign:11 http://ddebs.ubuntu.com bionic-proposed InRelease
Hit:12 http://ddebs.ubuntu.com bionic Release
Hit:14 http://ddebs.ubuntu.com bionic-updates Release
Hit:16 http://ddebs.ubuntu.com bionic-proposed Release
Reading package lists... Done

root@ltc-wspoon11:~# apt-get install 
linux-image-unsigned-4.15.0-68-generic/bionic
Reading package lists... Done
Building dependency tree
Reading state information... Done
Selected version '4.15.0-68.77~lp1848127+build.1' (lp1848127:18.04/bionic 
[ppc64el]) for 'linux-image-unsigned-4.15.0-68-generic'
The following additional packages will be installed:
linux-modules-4.15.0-68-generic
Suggested packages:
fdutils linux-doc-4.15.0 | linux-source-4.15.0 linux-headers-4.15.0-68-generic
The following NEW packages will be installed:
linux-image-unsigned-4.15.0-68-generic linux-modules-4.15.0-68-generic
0 upgraded, 2 newly installed, 0 to remove and 0 not upgraded.
Need to get 18.6 MB of archives.
After this operation, 92.8 MB of additional disk space will be used.
Do you want to continue? [Y/n] Y
Get:1 http://ppa.launchpad.net/ubuntu-power-triage/lp1848127/ubuntu bionic/main 
ppc64el linux-modules-4.15.0-68-generic ppc64el 4.15.0-68.77~lp1848127+build.1 
[12.1 MB]
Get:2 http://ppa.launchpad.net/ubuntu-power-triage/lp1848127/ubuntu bionic/main 
ppc64el linux-image-unsigned-4.15.0-68-generic ppc64el 
4.15.0-68.77~lp1848127+build.1 [6,532 kB]
Fetched 18.6 MB in 6s (3,277 kB/s)
Selecting previously unselected package linux-modules-4.15.0-68-generic.
(Reading database ... 183240 files and directories currently installed.)
Preparing to unpack 
.../linux-modules-4.15.0-68-generic_4.15.0-68.77~lp1848127+build.1_ppc64el.deb 
...
Unpacking linux-modules-4.15.0-68-generic (4.15.0-68.77~lp1848127+build.1) ...
Selecting previously unselected package linux-image-unsigned-4.15.0-68-generic.
Preparing to unpack 
.../linux-image-unsigned-4.15.0-68-generic_4.15.0-68.77~lp1848127+build.1_ppc64el.deb
 ...
Unpacking linux-image-unsigned-4.15.0-68-generic 
(4.15.0-68.77~lp1848127+build.1) ...
Setting up linux-modules-4.15.0-68-generic (4.15.0-68.77~lp1848127+build.1) ...
Setting up linux-image-unsigned-4.15.0-68-generic 
(4.15.0-68.77~lp1848127+build.1) ...
I: /boot/vmlinux.old is now a symlink to vmlinux-4.15.0-66-generic
I: /boot/initrd.img.old is now a symlink to initrd.img-4.15.0-66-generic
I: /boot/vmlinux is now a symlink to vmlinux-4.15.0-68-generic
I: /boot/initrd.img is now a symlink to initrd.img-4.15.0-68-generic
Processing triggers for linux-image-unsigned-4.15.0-68-generic 
(4.15.0-68.77~lp1848127+build.1) ...
/etc/kernel/postinst.d/dkms:
* dkms: running auto installation service for kernel 4.15.0-68-generic
Error! Your kernel headers for kernel 4.15.0-68-generic cannot be found.
Please install the linux-headers-4.15.0-68-generic package,
or use the --kernelsourcedir option to tell DKMS where it's located
...done.
/etc/kernel/postinst.d/initramfs-tools:
update-initramfs: Generating /boot/initrd.img-4.15.0-68-generic
/etc/kernel/postinst.d/kdump-tools:
kdump-tools: Generating /var/lib/kdump/initrd.img-4.15.0-68-generic
/etc/kernel/postinst.d/zz-update-grub:
Sourcing file `/etc/default/grub'
Sourcing file `/etc/default/grub.d/kdump-tools.cfg'
Generating grub configuration file ...
Found linux image: /boot/vmlinux-5.3.0-18-generic
Found initrd image: /boot/initrd.img-5.3.0-18-generic
Found linux image: /boot/vmlinux-4.15.0-68-generic
Found initrd image: /boot/initrd.img-4.15.0-68-generic
Found linux image: /boot/vmlinux-4.15.0-66-generic
Found initrd image: /boot/initrd.img-4.15.0-66-generic
Found linux image: /boot/vmlinux-4.15.0-65-generic
Found initrd image: /boot/initrd.img-4.15.0-65-generic
done

root@ltc-wspoon11:~# apt-get install linux-headers-4.15.0-68-generic
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following additional packages will be installed:
linux-headers-4.15.0-68
The following NEW packages will be installed:
linux-headers-4.15.0-68 linux-headers-4.15.0-68-generic
0 upgraded, 2 newly installed, 0 to remove and 0 not upgraded.
Need to get 12.6 MB of archives.
After this operation, 86.2 MB of additional disk space will be used.
Do you want to continue? [Y/n] Y
Get:1 http://ppa.launchpad.net/ubuntu-power-triage/lp1848127/ubuntu bionic/main 
ppc64el linux-headers-4.15.0-68 all 4.15.0-68.77~lp1848127+build.1 [11.3 MB]
Get:2 http://ppa.launchpad.net/ubuntu-power-triage/lp1848127/ubuntu bionic/main 
ppc64el linux-headers-4.15.0-68-generic ppc64el 4.15.0-68.77~lp1848127+build.1 
[1,357 kB]
Fetched 12.6 MB in 6s (2,150 kB/s)
Selecting previously unselected package linux-headers-4.15.0-68.
(Reading database ... 184392 files and directories currently installed.)
Preparing to unpack 
.../linux-headers-4.15.0-68_4.15.0-68.77~lp1848127+build.1_all.deb ...
Unpacking linux-headers-4.15.0-68 (4.15.0-68.77~lp1848127+build.1) ...
Selecting previously unselected package linux-headers-4.15.0-68-generic.
Preparing to unpack 
.../linux-headers-4.15.0-68-generic_4.15.0-68.77~lp1848127+build.1_ppc64el.deb 
...
Unpacking linux-headers-4.15.0-68-generic (4.15.0-68.77~lp1848127+build.1) ...
Setting up linux-headers-4.15.0-68 (4.15.0-68.77~lp1848127+build.1) ...
Setting up linux-headers-4.15.0-68-generic (4.15.0-68.77~lp1848127+build.1) ...
/etc/kernel/header_postinst.d/dkms:
* dkms: running auto installation service for kernel 4.15.0-68-generic

Kernel preparation unnecessary for this kernel.  Skipping...

Building module:
cleaning build area...
unset ARCH; env NV_VERBOSE=1 'make' -j16 NV_EXCLUDE_BUILD_MODULES='' 
KERNEL_UNAME=4.15.0-68-generic IGNORE_XEN_PRESENCE=1 IGNORE_CC_MISMATCH=1 
SYSSRC=/lib/modules/4.15.0-68-generic/build LD=/usr/bin/ld.bfd modules......
cleaning build area...

DKMS: build completed.

nvidia.ko:
Running module version sanity check.
- Original module
- No original module exists within this kernel
- Installation
- Installing to /lib/modules/4.15.0-68-generic/updates/dkms/

nvidia-modeset.ko:
Running module version sanity check.
- Original module
- No original module exists within this kernel
- Installation
- Installing to /lib/modules/4.15.0-68-generic/updates/dkms/

nvidia-drm.ko:
Running module version sanity check.
- Original module
- No original module exists within this kernel
- Installation
- Installing to /lib/modules/4.15.0-68-generic/updates/dkms/

nvidia-uvm.ko:
Running module version sanity check.
- Original module
- No original module exists within this kernel
- Installation
- Installing to /lib/modules/4.15.0-68-generic/updates/dkms/

depmod...

DKMS: install completed.
...done.

root@ltc-wspoon4:~# uname -a
Linux ltc-wspoon4 4.15.0-68-generic #77~lp1848127+build.1-Ubuntu SMP Mon Oct 28 
19:57:54 UTC 2019 ppc64le ppc64le ppc64le GNU/Linux
root@ltc-wspoon4:~# cat /etc/os-release
NAME="Ubuntu"
VERSION="18.04.3 LTS (Bionic Beaver)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 18.04.3 LTS"
VERSION_ID="18.04"
HOME_URL="https://www.ubuntu.com/";
SUPPORT_URL="https://help.ubuntu.com/";
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/";
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy";
VERSION_CODENAME=bionic
UBUNTU_CODENAME=bionic

root@ltc-wspoon4:~# ./statedisable.sh
./statedisable.sh: line 10: 
/sys/devices/system/cpu/cpu*/cpuidle/state7/disable: No such file or directory
./statedisable.sh: line 11: 
/sys/devices/system/cpu/cpu*/cpuidle/state8/disable: No such file or directory

root@ltc-wspoon4:~# ./run_workload.sh

root@ltc-wspoon4:~# ./scom_addr_p9.sh 0x1001080c 22
EQ[ 5]: 0x1501080c
EX[11]: 0x15010c0c
C[22]: 0x3601080c
root@ltc-wspoon4:~# getscom -c 0x8 0x15010c0c
0000000000000000

ltc-wspoon4 login: [  442.228985]   NIP [c00000000019ae5c]: osq_lock+0x15c/0x230
[  442.228985]   Initiator: CPU
[  442.228986]   Error type: UE [Load/Store]
[  442.228987]     Effective address: c000201cc76a9600
[  442.228988]     Physical address:  0000201cc76a0000
[  442.228988] opal: Hardware platform error: Unrecoverable Machine Check 
exception
[  442.228989] CPU: 109 PID: 9095 Comm: find Tainted: G   M              
4.15.0-68-generic #77~lp1848127+build.1-Ubuntu
[  442.228990] NIP:  c00000000019ae5c LR: c000000000e000a0 CTR: c000000000446e30
[  442.228991] REGS: c000201fff24bd70 TRAP: 0200   Tainted: G   M               
(5.0.0-33-generic)
[  442.228992] MSR:  9000000000209033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 48002222  
XER: 00000000
[  442.228996] CFAR: c00000000019ae34 DAR: c000201cc76a9600 DSISR: 00008000 
IRQMASK: 0
[  442.228998] GPR00: c000000000e000a0 c000201c87babc30 c00000000184cb00 
c000000001731abc
[  442.229001] GPR04: 0000000000000000 0000000000000000 c000000001885c78 
0000000000000000
[  442.229003] GPR08: c000201cc76a9600 c000201cc7b69600 0000000000000004 
ffffffffffffffea
[  442.229005] GPR12: 0000000088002228 c000201fff686d80 00000ed7ab1e2b80 
00000ed7ab1e2b80
[  442.229008] GPR16: 00000ed7ab1f0e30 00000ed7ab1eec30 0000000000000101 
00007fffc662d8b8
[  442.229010] GPR20: 0000000000000000 0000000000030000 000000000001a9b7 
0000000000000018
[  442.229012] GPR24: c000001fc28a9dc8 c000201c7710c500 0000000000000000 
c000000001731ab0
[  442.229014] GPR28: 0000000000000002 c000000001731abc c000201c87babdb0 
c000000001731ab0
[  442.229017] NIP [c00000000019ae5c] osq_lock+0x15c/0x230
[  442.229018] LR [c000000000e000a0] __mutex_lock.isra.1+0x90/0x710
[  442.229018] Call Trace:
[  442.229019] [c000201c87babc30] [c000000000e00054] 
__mutex_lock.isra.1+0x44/0x710 (unreliable)
[  442.229020] [c000201c87babcd0] [c000000[  577.498732581,0] OPAL: Reboot 
requested due to Platform error.
[  577.498806187,3] OPAL: Reboot requested due to Platform error.0004facd0] 
kernfs_fop_readdir+0x200/0x3b0
[  442.229022] [c000201c87babd40] [c000000000446300] iterate_dir+0x200/0x280
[  442.229023] [c000201c87babd90] [c0000000004472a0] ksys_getdents64+0xa0/0x1a0
[  442.229024] [c000201c87babe00] [c0000000004473c8] sys_getdents64+0x28/0x110
[  442.229025] [c000201c87babe20] [c00000000000b288] system_call+0x5c/0x70
[  442.229026] Instruction dump:
[  442.229027] 60000000 38e00000 48000028 60000000 60000000 81490010 7c2004ac 
2faa0000
[  442.229030] 409effd4 7c210b78 7c421378 e9090008 <e9480000> 7faa4800 409effdc 
7c0004ac
[  443.416541] Disabling lock debugging due to kernel taint
[  443.416543] Severe Machine check interrupt [Not recovered]
[  443.416544]   NIP [c00000000019ad88]: osq_lock+0x88/0x230
[  443.416544]   Initiator: CPU
[  443.416545]   Error type: UE [Load/Store]
[  443.416545]     Effective address: c000201cc76a9610
[  443.416546]     Physical address:  0000201cc76a0000
[  443.416547] opal: Hardware platform error: Unrecoverable Machine Check 
exception
[  443.416548] CPU: 90 PID: 9020 Comm: find Tainted: G   M           
4.15.0-68-generic #77~lp1848127+build.1-Ubuntu
[  443.416549] NIP:  c00000000019ad88 LR: c000000000e000a0 CTR: c0000000004f8d60
[  443.416550] REGS: c000201fff32fd70 TRAP: 0200   Tainted: G   M               
(5.0.0-33-generic)
[  443.416551] MSR:  9000000000209033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 24002224  
XER: 00000000
[  443.416555] CFAR: c000000000e0009c DAR: 0000201cc76a9610 DSISR: 00008000 
IRQMASK: 0
[  443.416557] GPR00: c000000000e000a0 c000201c8f81fbc0 c00000000184cb00 
c000000001731abc
[  443.416559] GPR04: 0000000000000000 0000000000000000 0000201cc6370000 
c000000001339600
[  443.416561] GPR08: c000001ffec29600 c000201cc76a9600 0000001ffd8f0000 
c000001f96936300
[  443.416564] GPR12: 0000000084002228 c000201fff69ba00 00000334527e2b80 
0000000000000000
[  443.416566] GPR16: 0000000000000000 000003345280d440 0000000000000101 
00007fffcaffe858
[  443.416568] GPR20: 0000000000000000 00007fffcaffe7c8 0000000000000000 
0000000000000006
[  443.416570] GPR24: 000077194c155308 00000000000007ff c000201c8f81fd80 
000003345280d548
[  443.416572] GPR28: 0000000000000002 c000000001731abc c000000001731ab0 
c000000001731ab0
[  443.416575] NIP [c00000000019ad88] osq_lock+0x88/0x230
[  443.416576] LR [c000000000e000a0] __mutex_lock.isra.1+0x90/0x710
[  443.416576] Call Trace:
[  443.416577] [c000201c8f81fbc0] [c000000000e00054] 
__mutex_lock.isra.1+0x44/0x710 (unreliable)
[  443.416578] [c000201c8f81fc60] [c0000000004f8dac] 
kernfs_iop_getattr+0x4c/0xa0
[  443.416579] [c000201c8f81fca0] [c00000000042eac0] vfs_getattr_nosec+0x90/0xf0
[  443.416581] [c000201c8f81fce0] [c00000000042ed68] vfs_statx+0xc8/0x190
[  443.416582] [c000201c8f81fd60] [c00000000042f128] sys_newfstatat+0x48/0x90
[  443.416583] [c000201c8f81fe20] [c00000000000b288] system_call+0x5c/0x70
[  443.416584] Instruction dump:
[  443.416584] 2faa0000 419e00c4 394affff 3d020003 39085170 7d4a07b4 794a1f24 
7d48502a
[  443.416587] 7d075214 f9090008 7c2004ac 7d27512a <81490010> 2faa0000 409e0090 
782a0464
[  577.500377001,3]  ___________________________________________________________
[  577.500429242,3] <  Dangerous NVRAM option: opal-sw-xstop=enable
[  577.500480635,3]  -----------------------------------------------------------
[  577.500520165,3]                   \
[  577.500562271,3]                    \   WW
[  577.500614905,3]                       <^ \___/|
[  577.500657283,3]                        \      /
[  577.500704560,3]                         \_  _/
[  577.500743890,3]                           }{

The Linux HOST did not hang and it booted back after the above
injection.

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1848127

Title:
  [LTCTest][OPAL][OP930] Machine hangs after injecting the Machine Check
  Error

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu-power-systems/+bug/1848127/+subscriptions

-- 
ubuntu-bugs mailing list
ubuntu-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

Reply via email to