Hello,

we have problems with Oracle, they don't want to serve our service
contract because we have installed illumos/OI and they attribute
the errors we see (on a Sun X4275) not to a hardware defect, but
a software problem with illumos FMA (and the SP does indeed not
report a problem, newest firmware installed). Is there indeed a
problem with FMA in illumos with this machine type, or do we have
a real hardware problem that is not properly diagnosed ? The impact
is rather bad, it disables all hardware on PCI it can disable on
boot (like network cards) because it thinks the PCI is botched.

# fmadm faulty
--------------- ------------------------------------  -------------- ---------
TIME            EVENT-ID                              MSG-ID         SEVERITY
--------------- ------------------------------------  -------------- ---------
May 27 19:04:47 e35fa799-9ea4-c042-a039-cef64ced402f  SUNOS-8000-J0  Major

Host        : imksung01
Platform    : SUN-FIRE-X4275-SERVER     Chassis_id  : 0943XFG026
Product_sn  :

Fault class : defect.sunos.eft.unexpected_telemetry 50%
              fault.sunos.eft.unexpected_telemetry 50%
Problem in  : dev:////pci@0,0
                  faulted and taken out of service

Description : The diagnosis engine encountered telemetry from the listed
              devices for which it was unable to perform a diagnosis -
              Refer to http://illumos.org/msg/SUNOS-8000-J0 for more
              information.  Refer to http://illumos.org/msg/SUNOS-8000-J0 for
              more information.

Response    : Error reports have been logged for examination by Sun.

Impact      : Automated diagnosis and response for these events will not occur.

Action      : Ensure that the latest Solaris Kernel and Predictive Self-Healing
              (PSH) patches are installed.

# fmdump -eV

May 27 2013 22:15:43.423088158 ereport.io.pci.fabric
nvlist version: 0
        class = ereport.io.pci.fabric
        ena = 0xc05896c893b01001
        detector = (embedded nvlist)
        nvlist version: 0
                version = 0x0
                scheme = dev
                device-path = /pci@0,0/pci8086,340c@5
        (end detector)

        bdf = 0x28
        device_id = 0x340c
        vendor_id = 0x8086
        rev_id = 0x13
        dev_type = 0x40
        pcie_off = 0x90
        pcix_off = 0x0
        aer_off = 0x100
        ecc_ver = 0x0
        pci_status = 0x10
        pci_command = 0x47
        pci_bdg_sec_status = 0x0
        pci_bdg_ctrl = 0x3
        pcie_status = 0x0
        pcie_command = 0x26
        pcie_dev_cap = 0x8021
        pcie_adv_ctl = 0x0
        pcie_ue_status = 0x0
        pcie_ue_mask = 0x100000
        pcie_ue_sev = 0x62030
        pcie_ue_hdr0 = 0x0
        pcie_ue_hdr1 = 0x0
        pcie_ue_hdr2 = 0x0
        pcie_ue_hdr3 = 0x0
        pcie_ce_status = 0x0
        pcie_ce_mask = 0x0
        pcie_rp_status = 0x0
        pcie_rp_control = 0x0
        pcie_adv_rp_status = 0x0
        pcie_adv_rp_command = 0x7
        pcie_adv_rp_ce_src_id = 0x0
        pcie_adv_rp_ue_src_id = 0x0
        remainder = 0x1
        severity = 0x1
        __ttl = 0x1
        __tod = 0x51a3beef 0x1937d01e

May 27 2013 22:15:43.423089201 ereport.io.pci.fabric
nvlist version: 0
        class = ereport.io.pci.fabric
        ena = 0xc05896c893b01001
        detector = (embedded nvlist)
        nvlist version: 0
                version = 0x0
                scheme = dev
                device-path = /pci@0,0/pci8086,340c@5/pci108e,286@0
        (end detector)

        bdf = 0x1300
        device_id = 0x285
        vendor_id = 0x9005
        rev_id = 0x9
        dev_type = 0x0
        pcie_off = 0xd0
        pcix_off = 0x0
        aer_off = 0x100
        ecc_ver = 0x0
        pci_status = 0x10
        pci_command = 0x47
        pcie_status = 0x0
        pcie_command = 0x2036
        pcie_dev_cap = 0x6481c2
        pcie_adv_ctl = 0xb4
        pcie_ue_status = 0x0
        pcie_ue_mask = 0x180000
        pcie_ue_sev = 0x62011
        pcie_ue_hdr0 = 0x5000001
        pcie_ue_hdr1 = 0x280003
        pcie_ue_hdr2 = 0x14000000
        pcie_ue_hdr3 = 0x14000000
        pcie_ce_status = 0x0
        pcie_ce_mask = 0x0
        remainder = 0x0
        severity = 0x1
        __ttl = 0x1
        __tod = 0x51a3beef 0x1937d431

May 28 2013 04:41:39.246379273 ereport.io.pci.fabric
nvlist version: 0
        class = ereport.io.pci.fabric
        ena = 0x114c70346f801001
        detector = (embedded nvlist)
        nvlist version: 0
                version = 0x0
                scheme = dev
                device-path = /pci@0,0/pci8086,340c@5
        (end detector)

        bdf = 0x28
        device_id = 0x340c
        vendor_id = 0x8086
        rev_id = 0x13
        dev_type = 0x40
        pcie_off = 0x90
        pcix_off = 0x0
        aer_off = 0x100
        ecc_ver = 0x0
        pci_status = 0x10
        pci_command = 0x47
        pci_bdg_sec_status = 0x0
        pci_bdg_ctrl = 0x3
        pcie_status = 0x0
        pcie_command = 0x26
        pcie_dev_cap = 0x8021
        pcie_adv_ctl = 0x0
        pcie_ue_status = 0x0
        pcie_ue_mask = 0x100000
        pcie_ue_sev = 0x62030
        pcie_ue_hdr0 = 0x0
        pcie_ue_hdr1 = 0x0
        pcie_ue_hdr2 = 0x0
        pcie_ue_hdr3 = 0x0
        pcie_ce_status = 0x0
        pcie_ce_mask = 0x0
        pcie_rp_status = 0x0
        pcie_rp_control = 0x0
        pcie_adv_rp_status = 0x0
        pcie_adv_rp_command = 0x7
        pcie_adv_rp_ce_src_id = 0x0
        pcie_adv_rp_ue_src_id = 0x0
        remainder = 0x1
        severity = 0x1
        __ttl = 0x1
        __tod = 0x51a41963 0xeaf7309

May 28 2013 04:41:39.246381160 ereport.io.pci.fabric
nvlist version: 0
        class = ereport.io.pci.fabric
        ena = 0x114c70346f801001
        detector = (embedded nvlist)
        nvlist version: 0
                version = 0x0
                scheme = dev
                device-path = /pci@0,0/pci8086,340c@5/pci108e,286@0
        (end detector)

        bdf = 0x1300
        device_id = 0x285
        vendor_id = 0x9005
        rev_id = 0x9
        dev_type = 0x0
        pcie_off = 0xd0
        pcix_off = 0x0
        aer_off = 0x100
        ecc_ver = 0x0
        pci_status = 0x10
        pci_command = 0x47
        pcie_status = 0x0
        pcie_command = 0x2036
        pcie_dev_cap = 0x6481c2
        pcie_adv_ctl = 0xb4
        pcie_ue_status = 0x0
        pcie_ue_mask = 0x180000
        pcie_ue_sev = 0x62011
        pcie_ue_hdr0 = 0x5000001
        pcie_ue_hdr1 = 0x280003
        pcie_ue_hdr2 = 0x14000000
        pcie_ue_hdr3 = 0x14000000
        pcie_ce_status = 0x0
        pcie_ce_mask = 0x0
        remainder = 0x0
        severity = 0x1
        __ttl = 0x1
        __tod = 0x51a41963 0xeaf7a68



--
Dr.Udo Grabowski    Inst.f.Meteorology a.Climate Research IMK-ASF-SAT
www.imk-asf.kit.edu/english/sat.php
KIT - Karlsruhe Institute of Technology            http://www.kit.edu
Postfach 3640,76021 Karlsruhe,Germany  T:(+49)721 608-26026 F:-926026




-------------------------------------------
illumos-discuss
Archives: https://www.listbox.com/member/archive/182180/=now
RSS Feed: https://www.listbox.com/member/archive/rss/182180/21175430-2e6923be
Modify Your Subscription: 
https://www.listbox.com/member/?member_id=21175430&id_secret=21175430-6a77cda4
Powered by Listbox: http://www.listbox.com

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature

Reply via email to