crash-study.txt
/**
* megasas_make_sgl_fusion - Prepares 32-bit SGL
* @instance: Adapter soft state
* @scp: SCSI command from the mid-layer
* @sgl_ptr: SGL to be filled in
* @cmd: cmd we are working on
*
* If successful, this function returns the number of SG elements.
*/
static int
megasas_make_sgl_fusion(struct megasas_instance *instance,
struct scsi_cmnd *scp,
struct MPI25_IEEE_SGE_CHAIN64 *sgl_ptr,
struct megasas_cmd_fusion *cmd)
{
...
sge_count = scsi_dma_map(scp);
BUG_ON(sge_count < 0); ----> FAILS HERE
if (sge_count > instance->max_num_sge || !sge_count)
return sge_count;
----
/**
* scsi_dma_map - perform DMA mapping against command's sg lists
* @cmd: scsi command
*
* Returns the number of sg lists actually used, zero if the sg lists
* is NULL, or -ENOMEM if the mapping failed.
*/
int scsi_dma_map(struct scsi_cmnd *cmd)
{
int nseg = 0;
if (scsi_sg_count(cmd)) {
struct device *dev = cmd->device->host->dma_dev;
nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
cmd->sc_data_direction);
if (unlikely(!nseg))
return -ENOMEM;
}
return nseg;
}
----
The only possible way for the BUG_ON in megasas_make_sg_fusion to be triggered
is if nseg is 0 and -ENOMEM (-12) is returned. This means that dma_map_sg could
NOT mapp the scatter gather buffers, from scsi_cmnd, into the firmware ?
----
#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
----
/*
* dma_maps_sg_attrs returns 0 on error and > 0 on success.
* It should never return a value < 0.
*/
static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
...
ents = ops->map_sg(dev, sg, nents, dir, attrs);
BUG_ON(ents < 0);
...
return ents;
}
----
Specially because dma_map_sg_attrs would have BUGed_ON if ents where negative.
So the only possible thing that could have happened is for ents to be zeroed, so
it BUGed_ON at megasas_make_sgl_fusion() instead.
----
ops->map_sg:
{init __mic_dma_ops}() : dma_map_ops
{init amd_iommu_dma_ops}() : dma_map_ops
{init calgary_dma_ops}() : dma_map_ops
{init gart_dma_ops}() : dma_map_ops
{init intel_dma_ops}() : dma_map_ops
{init nommu_dma_ops}() : dma_map_ops
{init sta2x11_dma_ops}() : dma_map_ops
{init swiotlb_dma_ops}() : dma_map_ops
{init xen_swiotlb_dma_ops}() : dma_map_ops
crash> dev -d
MAJOR GENDISK NAME REQUEST_QUEUE TOTAL ASYNC SYNC DRV
8 ffff881ff0142800 sdc ffff881fe92a9f50 0 0 0 1
11 ffff881ff06dd000 sr0 ffff881fe8ecb968 0 0 0 0
8 ffff881ff06de000 sdd ffff881fe8ecb430 0 0 0 0
8 ffff881ff0141800 sdb ffff881fe9c78a70 0 0 0 0
8 ffff881ff0140800 sda ffff881fe9c78538 0 0 0 0
8 ffff881ff06b2000 sde ffff881fe8dfbea0 0 0 0 0
crash> struct device.archdata ffff881fe8ecb968
archdata = {
dma_ops = 0xffff881ff06dc168,
iommu = 0x0
}
crash> struct device.archdata ffff881fe8ecb430
archdata = {
dma_ops = 0xffff881ff06dc968,
iommu = 0x0
}
crash> struct device.archdata ffff881fe9c78a70
archdata = {
dma_ops = 0xffff881ff06da968,
iommu = 0x0
}
crash> struct device.archdata ffff881fe9c78538
archdata = {
dma_ops = 0xffff881ff06da168,
iommu = 0x0
}
crash> struct device.archdata ffff881fe8dfbea0
archdata = {
dma_ops = 0xffff881fec86a968,
iommu = 0x0
}
crash> struct dma_map_ops 0xffff881ff06dc168
struct dma_map_ops {
alloc = 0xffff881fe942e028,
free = 0xffff881fe8a12a80,
mmap = 0xffff881fefd97010,
get_sgtable = 0xffff881fe942c018,
map_page = 0xffff881fe942e040,
unmap_page = 0xffff881fe942e038,
map_sg = 0xffff881fefcaed20,
unmap_sg = 0xffffffff81ecd980 <device_ktype>,
sync_single_for_cpu = 0xffff881feac667f8,
sync_single_for_device = 0x70000000d,
sync_sg_for_cpu = 0x0,
sync_sg_for_device = 0xffffffff81ee1440 <scsi_dev_type>,
mapping_error = 0x1,
dma_supported = 0xffff881ff06dc1d0,
set_dma_mask = 0xffff881ff06dc1d0,
is_phys = 0
}
dma_map-ops has no map_sg mapped because:
#ifndef CONFIG_X86_DEV_DMA_OPS == 1 in our case (auto generated)
SO
crash> dma_ops
dma_ops = $3 = (struct dma_map_ops *) 0xffffffff81ecd200 <intel_dma_ops>
----
So,
ents = ops->map_sg(dev, sg, nents, dir, attrs);
Where ents = 0
Means:
struct dma_map_ops intel_dma_ops = {
...
.map_sg = intel_map_sg,
...
};
----
READING intel_map_sg() now, in order to discover why it would return 0 because
of firmware.
iommu_no_mapping(dev):
static int identity_mapping(struct device *dev)
{
struct device_domain_info *info;
if (likely(!iommu_identity_mapping))
return 0;
info = dev->archdata.iommu;
if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
return (info->domain == si_domain);
return 0;
}
There is no identity mapping since:
crash> iommu_identity_mapping
iommu_identity_mapping = $6 = 1
But all archdata.iommus are zeroed.
Only possible returns == 0 are:
domain = get_valid_domain_for_dev(dev);
if (!domain)
return 0;
Meaning that no dmar domain could be created or found for the HBA
(improbable)
or
iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
*dev->dma_mask);
if (!iova) {
sglist->dma_length = 0;
return 0;
}
This is it! intel_alloc_iova has the message found in the logs.
or
ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
if (unlikely(ret)) {
dma_pte_free_pagetable(domain, start_vpfn,
start_vpfn + size - 1);
__free_iova(&domain->iovad, iova);
return 0;
}
We didn't reach the scatter gather mapping.
The log messages are:
[ 8650.749804] SLUB: Unable to allocate memory on node -1 (gfp=0x2080020)
[ 8650.749809] cache:
kmalloc-64(2958:318bd9ccdb762575c670d0f17241a655a4d31b0358a6017686497cf166ea647e),
object size: 64, buffer size: 64, default order: 0, min order: 0
[ 8650.749812] node 0: slabs: 83, objs: 5312, free: 0
[ 8650.749814] node 1: slabs: 59, objs: 3776, free: 0
For the first attempt of allocating IOVA from DMA_BIT_MAS(32)
[ 8650.749817] SLUB: Unable to allocate memory on node -1 (gfp=0x2080020)
[ 8650.749819] cache:
kmalloc-64(2958:318bd9ccdb762575c670d0f17241a655a4d31b0358a6017686497cf166ea647e),
object size: 64, buffer size: 64, default order: 0, min order: 0
[ 8650.749821] node 0: slabs: 83, objs: 5312, free: 0
[ 8650.749823] node 1: slabs: 59, objs: 3776, free: 0
For the second attempt of allocating IOVA
[ 8650.749825] DMAR: Allocating 2-page iova for 0000:02:00.0 failed
Both attemps had:
size == sum of all scatter gather from scsi command in question.
So allocation failed because the lack of memory for the slab cache of "struct
iova" (new_iova) in alloc_iova. Which means that something could, likely, not
be freeying the IOVA slabs after they are allocated. IOVA structures are used
when mapping the VM to IOMMU in order for the device to DMA data into virtual
addresses (and guaranteeing DMA operation is secured).
The allocation logic for the failed message can be followed in here:
___slab_alloc() from slub (our slab algorithm choice):
...
freelist = new_slab_objects(s, gfpflags, node, &c);
if (unlikely(!freelist)) {
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
...
And coming from the following stack trace:
slab_out_of_memory(kmem_cache *, ?, int) : void
___slab_alloc(kmem_cache *, ?, int, unsigned long int, kmem_cache_cpu *) :
void *
__slab_alloc(kmem_cache *, ?, int, unsigned long int, kmem_cache_cpu *) :
void *
slab_alloc_node(kmem_cache *, ?, int, unsigned long int) : void *
slab_alloc(kmem_cache *, ?, unsigned long int) : void *
kmem_cache_alloc(kmem_cache *, ?) : void *
alloc_iova_mem()
alloc_iova()
intel_alloc_iova() -> allocs IOVA structure for the IOMMU VM
mapping
intel_map_sg() -> maps scatterlist to dma memory
intel_dma_ops -> dma_map_ops() -> intel_map_sg()
dma_map_sg_attrs()
scsi_dma_map() --> BUG when returning from this.
performs de dma mapping for scsi cmd sg buffers.
megasas_make_sgl_fusion() -> maps scsi sgl (after dma
mapping) to low level 32 bit sgls ptrs
megasas_build_io_fusion() -> builds low level
driver command
megasas_build_and_issue_cmd_fusion()
megasas_instance_template ->
build_and_issue_cmd() -> megasas_build_and_issue_cmd_fusion()
megasas_queue_command() -> enqueues prepared
scsi cmd with scatter gather buffers
Result of BUG() right after dma_map_ops(), because alloc_iova_mem() couldn't get
a new object out of kmem "iova", the slab objected being used by Intel DMA
function. This function would use the IOVA structure to perform the scsi command
scatter gather buffer list DMA mapping for the HBA transfer.
[ 8650.756414] ------------[ cut here ]------------
[ 8650.761768] kernel BUG at
/build/linux-HSAA8v/linux-4.4.0/drivers/scsi/megaraid/megaraid_sas_fusion.c:1452!
[ 8650.772638] invalid opcode: 0000 [#1] SMP
Now we have to check how the kmem_cache of type IOVA would be freed and why it
would reach its allocated limit, not allowing new IOVA allocations: In order to
unmap the IO APIC mappings, it is likely that the IO completion from each of the
issued scsi command will unmap the scatter gather buffers and release the
IOVA structure from slab caching. This is exactly what is showed here:
The megaraid driver has a tasklet that is initialized during driver
initialization and it is responsible for the bottom half processing of the
driver. Tasklet is called "megaraid_mbox_dpc".
The "megaraid_mbox_dpc" gets a list of completed jobs - out of the HBA - and
issues completion based on their status. It also calls a function called
"megaraid_mbox_sync_scb". This is the function responsbible for the calling
the DMA sync and then calling DMA unmapping (which is the logic that would call
Intel implementation for unmapping the scatter gather lists and free the
kmem_alloc slabs entitled as IOVA).
With that, it is possible to afirm that the HBA firmware was, indeed, not doing
the IRQ ack sequence in the right manner (since this is the logic that would
put the scsi command in the completed list to be proccessed by the tasklet
megaraid_mbox_dpc).
Note: megaraid_abort_handler could also have caused the command to fail to
upper layers (since it is the "eh_abort_handler" pointer, for the block device
layer) but it would only abort commands not owned by the firmware. This points
out that, indeed, firmware is holding all incomplete commands and abort handler
cannot do much.
End user upgraded to:
Dell HBA330 Adapter
Firmware Version 15.17.08.01, A04
Release date 06 Mar 2018
Last Updated 06 Mar 2018
And this is issue was gone.
Rafael David Tinoco
[email protected]
--
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1755160
Title:
BUG() inside megaraid (megaraid_sas_fusion) driver
To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1755160/+subscriptions
--
ubuntu-bugs mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs