Re: usercopy whitelist woe in scsi_sense_cache
On Tue, Apr 10, 2018 at 10:16 AM, Oleksandr Natalenkowrote: > Hi, Kees, Paolo et al. > > 10.04.2018 08:53, Kees Cook wrote: >> >> Unfortunately I only had a single hang with no dumps. I haven't been >> able to reproduce it since. :( > > > For your convenience I've prepared a VM that contains a reproducer. Awesome. :) > Under the /root folder there is a reproducer script (reproducer.sh). It does > trivial things like enabling sysrq, opening LUKS device, mounting a volume, > running a background I/O (this is an important part, actually, since I > wasn't able to trigger the issue without the background I/O) and, finally, > running the smartctl in a loop. If you are lucky, within a minute or two > you'll get the first warning followed shortly by subsequent bugs and I/O > stall (htop is pre-installed for your convenience too). Yup! [ 27.729498] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'scsi_sense_cache' (offset 76, size 22)! I'll see about booting with my own kernels, etc, and try to narrow this down. :) -Kees -- Kees Cook Pixel Security
Re: [PATCH] scsi_debug: IMMED related delay adjustments
On Tue, Apr 10, 2018 at 01:00:36PM -0400, Douglas Gilbert wrote: > A patch titled: "[PATCH v2] scsi_debug: implement IMMED bit" > introduced long delays to the Start stop unit (SSU) and > Synchronize cache (SC) commands when the IMMED bit is clear. > This patch makes those delays more realistic. It causes SSU > to only delay when the start stop state is changed; SC only > delays when there's been a write since the previous SC. It > also reduced the SC delay from 1 second to 50 milliseconds. > > Signed-off-by: Douglas Gilbert> --- > This patch is in response to a report on the Linux scsi list > indicating a significant slowdown in benchmarks using the > original patch. The reporter seemed satisfield with an earlier > version of this patch (in which the only difference was that > the SC delay was 100 milliseconds). > > drivers/scsi/scsi_debug.c | 33 - > 1 file changed, 24 insertions(+), 9 deletions(-) > > diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c > index 26ce022dd6f4..9ab19285a3d3 100644 > --- a/drivers/scsi/scsi_debug.c > +++ b/drivers/scsi/scsi_debug.c > @@ -234,11 +234,13 @@ static const char *sdebug_version_date = "20180128"; > #define F_INV_OP 0x200 > #define F_FAKE_RW0x400 > #define F_M_ACCESS 0x800 /* media access */ > -#define F_LONG_DELAY 0x1000 > +#define F_SSU_DELAY 0x1000 > +#define F_SYNC_DELAY 0x2000 > > #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR) > #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW) > #define FF_SA (F_SA_HIGH | F_SA_LOW) > +#define F_LONG_DELAY (F_SSU_DELAY | F_SYNC_DELAY) > > #define SDEBUG_MAX_PARTS 4 > > @@ -510,7 +512,7 @@ static const struct opcode_info_t release_iarr[] = { > }; > > static const struct opcode_info_t sync_cache_iarr[] = { > - {0, 0x91, 0, F_LONG_DELAY | F_M_ACCESS, resp_sync_cache, NULL, > + {0, 0x91, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, NULL, > {16, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, >0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* SYNC_CACHE (16) */ > }; > @@ -553,7 +555,7 @@ static const struct opcode_info_t > opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { > resp_write_dt0, write_iarr, /* WRITE(16) */ > {16, 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, >0xff, 0xff, 0xff, 0xff, 0xff, 0xc7} }, > - {0, 0x1b, 0, F_LONG_DELAY, resp_start_stop, NULL,/* START STOP UNIT */ > + {0, 0x1b, 0, F_SSU_DELAY, resp_start_stop, NULL,/* START STOP UNIT */ > {6, 0x1, 0, 0xf, 0xf7, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, > {ARRAY_SIZE(sa_in_16_iarr), 0x9e, 0x10, F_SA_LOW | F_D_IN, > resp_readcap16, sa_in_16_iarr, /* SA_IN(16), READ CAPACITY(16) */ > @@ -606,7 +608,7 @@ static const struct opcode_info_t > opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { > resp_write_same_10, write_same_iarr,/* WRITE SAME(10) */ > {10, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, >0, 0, 0, 0, 0} }, > - {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_LONG_DELAY | F_M_ACCESS, > + {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_SYNC_DELAY | F_M_ACCESS, > resp_sync_cache, sync_cache_iarr, > {10, 0x7, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, >0, 0, 0, 0} }, /* SYNC_CACHE (10) */ > @@ -667,6 +669,7 @@ static bool sdebug_strict = DEF_STRICT; > static bool sdebug_any_injecting_opt; > static bool sdebug_verbose; > static bool have_dif_prot; > +static bool write_since_sync; > static bool sdebug_statistics = DEF_STATISTICS; > > static unsigned int sdebug_store_sectors; > @@ -1607,6 +1610,7 @@ static int resp_start_stop(struct scsi_cmnd *scp, > { > unsigned char *cmd = scp->cmnd; > int power_cond, stop; > + bool changing; > > power_cond = (cmd[4] & 0xf0) >> 4; > if (power_cond) { > @@ -1614,8 +1618,12 @@ static int resp_start_stop(struct scsi_cmnd *scp, > return check_condition_result; > } > stop = !(cmd[4] & 1); > + changing = atomic_read(>stopped) == !stop; > atomic_xchg(>stopped, stop); > - return (cmd[1] & 0x1) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */ > + if (!changing || cmd[1] & 0x1) /* state unchanged or IMMED set */ > + return SDEG_RES_IMMED_MASK; > + else > + return 0; > } > > static sector_t get_sdebug_capacity(void) > @@ -2473,6 +2481,7 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 > sg_skip, u64 lba, > if (do_write) { > sdb = scsi_out(scmd); > dir = DMA_TO_DEVICE; > + write_since_sync = true; > } else { > sdb = scsi_in(scmd); > dir = DMA_FROM_DEVICE; > @@ -3583,6 +3592,7 @@ static int resp_get_lba_status(struct scsi_cmnd *scp,
Re: 4.15.14 crash with iscsi target and dvd
Ming Lei wrote: > Sure, thanks for your sharing. > > Wakko, could you test the following patch and see if there is any > difference? > > -- > diff --git a/drivers/target/target_core_pscsi.c > b/drivers/target/target_core_pscsi.c > index 0d99b242e82e..6147178f1f37 100644 > --- a/drivers/target/target_core_pscsi.c > +++ b/drivers/target/target_core_pscsi.c > @@ -888,7 +888,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, > u32 sgl_nents, > if (len > 0 && data_len > 0) { > bytes = min_t(unsigned int, len, PAGE_SIZE - off); > bytes = min(bytes, data_len); > - > + new_bio: > if (!bio) { > nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages); > nr_pages -= nr_vecs; > @@ -931,6 +931,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, > u32 sgl_nents, >* be allocated with pscsi_get_bio() above. >*/ > bio = NULL; > + goto new_bio; > } > > data_len -= bytes; Sorry for the delay. I reverted my change, added this one. I didn't reboot, I just unloaded and loaded this one. Note: /dev/sr1 as seen from the initiator is /dev/sr0 (physical disc) on the target. Doesn't crash, however on the initiator I see this: [9273849.70] ISO 9660 Extensions: RRIP_1991A [9273863.359718] scsi_io_completion: 13 callbacks suppressed [9273863.359788] sr 26:0:0:0: [sr1] tag#1 UNKNOWN(0x2003) Result: hostbyte=0x00 driverbyte=0x08 [9273863.359909] sr 26:0:0:0: [sr1] tag#1 Sense Key : 0x2 [current] [9273863.359974] sr 26:0:0:0: [sr1] tag#1 ASC=0x8 ASCQ=0x0 [9273863.360036] sr 26:0:0:0: [sr1] tag#1 CDB: opcode=0x28 28 00 00 22 f6 96 00 00 80 00 [9273863.360116] blk_update_request: 13 callbacks suppressed [9273863.360177] blk_update_request: I/O error, dev sr1, sector 9165400 [9273875.864648] sr 26:0:0:0: [sr1] tag#1 UNKNOWN(0x2003) Result: hostbyte=0x00 driverbyte=0x08 [9273875.864738] sr 26:0:0:0: [sr1] tag#1 Sense Key : 0x2 [current] [9273875.864801] sr 26:0:0:0: [sr1] tag#1 ASC=0x8 ASCQ=0x0 [9273875.864890] sr 26:0:0:0: [sr1] tag#1 CDB: opcode=0x28 28 00 00 22 f7 16 00 00 80 00 [9273875.864971] blk_update_request: I/O error, dev sr1, sector 9165912 To cause this, I mounted the dvd as seen in the first line and ran this command: find /cdrom2 -type f | xargs -tn1 cat > /dev/null I did some various tests. Each test was done after umount and mount to clear the cache. cat > /dev/null causes the message. dd if= of=/dev/null bs=2048 doesn't using bs=4096 doesn't using bs=64k doesn't using bs=128k does cat uses a blocksize of 128k. The following was done without being mounted. ddrescue -f -f /dev/sr1 /dev/null doesn't cause the message dd if=/dev/sr1 of=/dev/null bs=128k doesn't cause the message using bs=256k causes the message once: [9275916.857409] sr 27:0:0:0: [sr1] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x00 driverbyte=0x08 [9275916.857482] sr 27:0:0:0: [sr1] tag#0 Sense Key : 0x2 [current] [9275916.857520] sr 27:0:0:0: [sr1] tag#0 ASC=0x8 ASCQ=0x0 [9275916.857556] sr 27:0:0:0: [sr1] tag#0 CDB: opcode=0x28 28 00 00 00 00 00 00 00 80 00 [9275916.857614] blk_update_request: I/O error, dev sr1, sector 0 If I access the disc from the target natively either by mounting and accessing files or working with the device directly (ie dd) no errors are logged on the target. -- Microsoft has beaten Volkswagen's world record. Volkswagen only created 22 million bugs.
Re: [PATCH v7] scsi: new zorro_esp.c for Amiga Zorro NCR53C9x boards
Hi Geert, now I see, thanks. I could change the device table to use ZORRO_ID(PHASE5, ...) style IDs instead of the longish defines if you're OK with that. The other changes have passed tests and I'd otherwise just send what I have now, adding Christoph's Reviewed-by. Cheers, Michael On Tue, Apr 10, 2018 at 8:18 PM, Geert Uytterhoevenwrote: > Hi Michael, > > On Tue, Apr 10, 2018 at 4:16 AM, Michael Schmitz wrote: >> On Mon, Apr 9, 2018 at 7:50 PM, Christoph Hellwig wrote: >>> On Sun, Apr 08, 2018 at 02:45:32PM +1200, Michael Schmitz wrote: New combined SCSI driver for all ESP based Zorro SCSI boards for m68k Amiga. + { + .id = ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060, >>> >>> In PCI Land we've usually stopped using PCI IDs unless they are used >>> in multiple > > (missing "places"?) > >> Short of a complete rewrite of the Zorro driver support code to be >> closer to what PCI does, I don' see what can be done about the use of >> Zorro IDs. I don't think such a rewrite is planned in the near future, >> Geert? > > I think what Christoph means is the use of the define > ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060 > versus hardcoded numbers, or ZORRO_ID(PHASE5, 0x0B, 0). > > We have a long list of ZORRO_PROD_* definitions in > include/uapi/linux/zorro_ids.h because of historical reasons. The list > isn't really changing (no new IDs in git history) due to almost no new > Zorro boards being made, unlike for PCI, where keeping an in-kernel list > is a lot of work, and not desirable. > > Gr{oetje,eeting}s, > > Geert > > -- > Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- > ge...@linux-m68k.org > > In personal conversations with technical people, I call myself a hacker. But > when I'm talking to journalists I just say "programmer" or something like > that. > -- Linus Torvalds
Hotplug
Hi, Been running some tests and I keep running into issues with hotplug. This looks similar to what Bart posted the other day, but it looks more deeply rooted than just having to protect the queue in generic_make_request_checks(). The test run is blktests, block/001. Current -git doesn't survive it. I've seen at least two different oopses, pasted below. [ 102.163442] NULL pointer dereference at 0010 [ 102.163444] PGD 0 P4D 0 [ 102.163447] Oops: [#1] PREEMPT SMP [ 102.163449] Modules linked in: [ 102.175540] sr 12:0:0:0: [sr2] scsi-1 drive [ 102.180112] scsi_debug crc_t10dif crct10dif_generic crct10dif_common nvme nvme_core sb_edac xl [ 102.186934] sr 12:0:0:0: Attached scsi CD-ROM sr2 [ 102.191896] sr_mod cdrom btrfs xor zstd_decompress zstd_compress xxhash lzo_compress zlib_defc [ 102.197169] sr 12:0:0:0: Attached scsi generic sg7 type 5 [ 102.203475] igb ahci libahci i2c_algo_bit libata dca [last unloaded: crc_t10dif] [ 102.203484] CPU: 43 PID: 4629 Comm: systemd-udevd Not tainted 4.16.0+ #650 [ 102.203487] Hardware name: Dell Inc. PowerEdge T630/0NT78X, BIOS 2.3.4 11/09/2016 [ 102.350882] RIP: 0010:sr_block_revalidate_disk+0x23/0x190 [sr_mod] [ 102.358299] RSP: 0018:883ff357bb58 EFLAGS: 00010292 [ 102.364734] RAX: a00b07d0 RBX: 883ff3058000 RCX: 883ff357bb66 [ 102.373220] RDX: 0003 RSI: 7530 RDI: 881fea631000 [ 102.381705] RBP: R08: 881fe4d38400 R09: [ 102.390185] R10: R11: 01b6 R12: 085d [ 102.398671] R13: 085d R14: 883ffd9b3790 R15: [ 102.407156] FS: 7f7dc8e6d8c0() GS:883fff34() knlGS: [ 102.417138] CS: 0010 DS: ES: CR0: 80050033 [ 102.424066] CR2: 0010 CR3: 003ffda98005 CR4: 003606e0 [ 102.432545] DR0: DR1: DR2: [ 102.441024] DR3: DR6: fffe0ff0 DR7: 0400 [ 102.449502] Call Trace: [ 102.452744] ? __invalidate_device+0x48/0x60 [ 102.458022] check_disk_change+0x4c/0x60 [ 102.462900] sr_block_open+0x16/0xd0 [sr_mod] [ 102.468270] __blkdev_get+0xb9/0x450 [ 102.472774] ? iget5_locked+0x1c0/0x1e0 [ 102.477568] blkdev_get+0x11e/0x320 [ 102.481969] ? bdget+0x11d/0x150 [ 102.486083] ? _raw_spin_unlock+0xa/0x20 [ 102.490968] ? bd_acquire+0xc0/0xc0 [ 102.495368] do_dentry_open+0x1b0/0x320 [ 102.500159] ? inode_permission+0x24/0xc0 [ 102.505140] path_openat+0x4e6/0x1420 [ 102.509741] ? cpumask_any_but+0x1f/0x40 [ 102.514630] ? flush_tlb_mm_range+0xa0/0x120 [ 102.519903] do_filp_open+0x8c/0xf0 [ 102.524305] ? __seccomp_filter+0x28/0x230 [ 102.529389] ? _raw_spin_unlock+0xa/0x20 [ 102.534283] ? __handle_mm_fault+0x7d6/0x9b0 [ 102.539559] ? list_lru_add+0xa8/0xc0 [ 102.544157] ? _raw_spin_unlock+0xa/0x20 [ 102.549047] ? __alloc_fd+0xaf/0x160 [ 102.553549] ? do_sys_open+0x1a6/0x230 [ 102.558244] do_sys_open+0x1a6/0x230 [ 102.562742] do_syscall_64+0x5a/0x100 [ 102.567336] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 and this one, similar to Barts: [ 4676.738069] NULL pointer dereference at 0154 [ 4676.738071] PGD 0 P4D 0 [ 4676.738073] Oops: [#1] PREEMPT SMP [ 4676.738075] Modules linked in: scsi_debug crc_t10dif nvme nvme_core loop configfs crct10dif_ge] [ 4676.859272] CPU: 10 PID: 16598 Comm: systemd-udevd Not tainted 4.16.0+ #651 [ 4676.867525] Hardware name: Dell Inc. PowerEdge T630/0NT78X, BIOS 2.3.4 11/09/2016 [ 4676.876765] RIP: 0010:blk_throtl_bio+0x45/0x9b0 [ 4676.882296] RSP: 0018:881ff0c8bb38 EFLAGS: 00010246 [ 4676.888610] RAX: RBX: 881ffa273a40 RCX: [ 4676.897059] RDX: 881ffa273a40 RSI: RDI: [ 4676.905507] RBP: R08: 881ffa273ac0 R09: 881ff123f458 [ 4676.913955] R10: 881ff0c8bc70 R11: 1000 R12: [ 4676.922402] R13: 82600980 R14: 88208113 R15: [ 4676.930856] FS: 7fe63e5228c0() GS:881fff74() knlGS: [ 4676.940773] CS: 0010 DS: ES: CR0: 80050033 [ 4676.947667] CR2: 0154 CR3: 001fed2a0003 CR4: 003606e0 [ 4676.956116] DR0: DR1: DR2: [ 4676.964568] DR3: DR6: fffe0ff0 DR7: 0400 [ 4676.973021] Call Trace: [ 4676.976229] generic_make_request_checks+0x640/0x660 [ 4676.982245] ? kmem_cache_alloc+0x37/0x190 [ 4676.987295] generic_make_request+0x29/0x2f0 [ 4676.992534] ? submit_bio+0x5c/0x110 [ 4676.996998] ? bio_alloc_bioset+0x99/0x1c0 [ 4677.002050] submit_bio+0x5c/0x110 [ 4677.006317] ? guard_bio_eod+0x42/0x120 [ 4677.011074] submit_bh_wbc.isra.49+0x132/0x160 [ 4677.016517] ? bh_uptodate_or_lock+0x90/0x90 [ 4677.021757]
Re: usercopy whitelist woe in scsi_sense_cache
Hi, Kees, Paolo et al. 10.04.2018 08:53, Kees Cook wrote: Unfortunately I only had a single hang with no dumps. I haven't been able to reproduce it since. :( For your convenience I've prepared a VM that contains a reproducer. It consists of 3 disk images (sda.img is for the system, it is Arch-based, sdb/sdc.img are for RAID). They are available (in a compressed form) to download here [1]. RAID is built as RAID10 with far2 layout, on top of it there is a LUKS container (can be opened either with keyfile under the /root or using "qwerty" password). There's one LVM PV, one VG and one volume on top of LUKS containing XFS. RAID is automatically assembled during the boot, so you don't have to worry about it. I run the VM like this: $ qemu-system-x86_64 -display gtk,gl=on -machine q35,accel=kvm -cpu host,+vmx -enable-kvm -netdev user,id=user.0 -device virtio-net,netdev=user.0 -usb -device nec-usb-xhci,id=xhci -device usb-tablet,bus=xhci.0 -serial stdio -smp 4 -m 512 -hda sda.img -hdb sdb.img -hdc sdc.img The system is accessible via both VGA and serial console. The user is "root", the password is "qwerty". Under the /root folder there is a reproducer script (reproducer.sh). It does trivial things like enabling sysrq, opening LUKS device, mounting a volume, running a background I/O (this is an important part, actually, since I wasn't able to trigger the issue without the background I/O) and, finally, running the smartctl in a loop. If you are lucky, within a minute or two you'll get the first warning followed shortly by subsequent bugs and I/O stall (htop is pre-installed for your convenience too). Notable changes in this VM comparing to generic defaults: 1) blk-mq is enabled via kernel cmdline (scsi_mod.use_blk_mq=1 is in /etc/default/grub) 2) BFQ is set via udev (check /etc/udev/rules.d/10-io-scheduler.rules file) Again, I wasn't able to reproduce the usercopy warning/bug and I/O hang without all these components being involved. Hope you enjoy it. P.S. I haven't tested Linus' master branch yet. For now, this VM runs v4.16 kernel. Regards, Oleksandr [1] https://natalenko.name/myfiles/usercopy_bfq_woe/
[PATCH] scsi_debug: IMMED related delay adjustments
A patch titled: "[PATCH v2] scsi_debug: implement IMMED bit" introduced long delays to the Start stop unit (SSU) and Synchronize cache (SC) commands when the IMMED bit is clear. This patch makes those delays more realistic. It causes SSU to only delay when the start stop state is changed; SC only delays when there's been a write since the previous SC. It also reduced the SC delay from 1 second to 50 milliseconds. Signed-off-by: Douglas Gilbert--- This patch is in response to a report on the Linux scsi list indicating a significant slowdown in benchmarks using the original patch. The reporter seemed satisfield with an earlier version of this patch (in which the only difference was that the SC delay was 100 milliseconds). drivers/scsi/scsi_debug.c | 33 - 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 26ce022dd6f4..9ab19285a3d3 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -234,11 +234,13 @@ static const char *sdebug_version_date = "20180128"; #define F_INV_OP 0x200 #define F_FAKE_RW 0x400 #define F_M_ACCESS 0x800 /* media access */ -#define F_LONG_DELAY 0x1000 +#define F_SSU_DELAY0x1000 +#define F_SYNC_DELAY 0x2000 #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR) #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW) #define FF_SA (F_SA_HIGH | F_SA_LOW) +#define F_LONG_DELAY (F_SSU_DELAY | F_SYNC_DELAY) #define SDEBUG_MAX_PARTS 4 @@ -510,7 +512,7 @@ static const struct opcode_info_t release_iarr[] = { }; static const struct opcode_info_t sync_cache_iarr[] = { - {0, 0x91, 0, F_LONG_DELAY | F_M_ACCESS, resp_sync_cache, NULL, + {0, 0x91, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, NULL, {16, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* SYNC_CACHE (16) */ }; @@ -553,7 +555,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { resp_write_dt0, write_iarr, /* WRITE(16) */ {16, 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7} }, - {0, 0x1b, 0, F_LONG_DELAY, resp_start_stop, NULL,/* START STOP UNIT */ + {0, 0x1b, 0, F_SSU_DELAY, resp_start_stop, NULL,/* START STOP UNIT */ {6, 0x1, 0, 0xf, 0xf7, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, {ARRAY_SIZE(sa_in_16_iarr), 0x9e, 0x10, F_SA_LOW | F_D_IN, resp_readcap16, sa_in_16_iarr, /* SA_IN(16), READ CAPACITY(16) */ @@ -606,7 +608,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { resp_write_same_10, write_same_iarr,/* WRITE SAME(10) */ {10, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0} }, - {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_LONG_DELAY | F_M_ACCESS, + {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, sync_cache_iarr, {10, 0x7, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0} }, /* SYNC_CACHE (10) */ @@ -667,6 +669,7 @@ static bool sdebug_strict = DEF_STRICT; static bool sdebug_any_injecting_opt; static bool sdebug_verbose; static bool have_dif_prot; +static bool write_since_sync; static bool sdebug_statistics = DEF_STATISTICS; static unsigned int sdebug_store_sectors; @@ -1607,6 +1610,7 @@ static int resp_start_stop(struct scsi_cmnd *scp, { unsigned char *cmd = scp->cmnd; int power_cond, stop; + bool changing; power_cond = (cmd[4] & 0xf0) >> 4; if (power_cond) { @@ -1614,8 +1618,12 @@ static int resp_start_stop(struct scsi_cmnd *scp, return check_condition_result; } stop = !(cmd[4] & 1); + changing = atomic_read(>stopped) == !stop; atomic_xchg(>stopped, stop); - return (cmd[1] & 0x1) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */ + if (!changing || cmd[1] & 0x1) /* state unchanged or IMMED set */ + return SDEG_RES_IMMED_MASK; + else + return 0; } static sector_t get_sdebug_capacity(void) @@ -2473,6 +2481,7 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, if (do_write) { sdb = scsi_out(scmd); dir = DMA_TO_DEVICE; + write_since_sync = true; } else { sdb = scsi_in(scmd); dir = DMA_FROM_DEVICE; @@ -3583,6 +3592,7 @@ static int resp_get_lba_status(struct scsi_cmnd *scp, static int resp_sync_cache(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { + int res = 0; u64 lba; u32 num_blocks;
RE: [PATCH v1 14/15] mpt3sas: fix possible memory leak.
Yes Sasha, We like to have this patch included in a stable tree. Thanks, Sreekanth -Original Message- From: Sasha Levin [mailto:alexander.le...@microsoft.com] Sent: Tuesday, April 10, 2018 7:19 PM To: Sasha Levin; Chaitra P B; linux-scsi@vger.kernel.org Cc: sathya.prak...@broadcom.com; sreekanth.re...@broadcom.com; sta...@vger.kernel.org Subject: Re: [PATCH v1 14/15] mpt3sas: fix possible memory leak. Hi, [This is an automated email] This commit has been processed by the -stable helper bot and determined to be a high probability candidate for -stable trees. (score: 12.9808) The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, v4.4.127. v4.16.1: Build OK! v4.15.16: Build OK! v4.14.33: Build OK! v4.9.93: Build OK! v4.4.127: Build OK! Please let us know if you'd like to have this patch included in a stable tree. -- Thanks, Sasha
RE: [PATCH v1 03/15] mpt3sas: Add sanity checks for scsi tracker before accessing it.
Yes Sasha, We like to have this patch included in a stable tree. Thanks, Sreekanth -Original Message- From: Sasha Levin [mailto:alexander.le...@microsoft.com] Sent: Tuesday, April 10, 2018 7:19 PM To: Sasha Levin; Chaitra P B; linux-scsi@vger.kernel.org Cc: sathya.prak...@broadcom.com; sreekanth.re...@broadcom.com; sta...@vger.kernel.org Subject: Re: [PATCH v1 03/15] mpt3sas: Add sanity checks for scsi tracker before accessing it. Hi, [This is an automated email] This commit has been processed by the -stable helper bot and determined to be a high probability candidate for -stable trees. (score: 28.7865) The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, v4.4.127. v4.16.1: Build OK! v4.15.16: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") v4.14.33: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") v4.9.93: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") v4.4.127: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") Please let us know if you'd like to have this patch included in a stable tree. -- Thanks, Sasha
Re: [PATCH v3 0/3] Report all request failures again to user space
On Tue, 2018-04-10 at 15:48 +0200, Johannes Thumshirn wrote: > On Mon, Apr 09, 2018 at 09:35:21PM -0400, Martin K. Petersen wrote: > > > > Johannes, > > > > > I did start a series [1] for this but than got distracted by more urgent > > > things. I can pick it up again I think. > > > > > > [1] > > > https://git.kernel.org/pub/scm/linux/kernel/git/jth/linux.git/log/?h=iscsi_DID_REQUEUE > > > > Definitely a step in the right direction. > > Does anyone object getting rid of the Scsi_Cmnd typedef while I'm at > it? > > This would ease my refactoring a lot, asI don't have to teach > coccinelle yet another case that multiplies the rules. I'm in favor of the removal. But maybe there is something I'm overlooking and that means that the typedef should not be removed? Bart.
Re: [PATCH v3 1/3] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"
On 04/10/18 07:49, Sasha Levin wrote: [This is an automated email] This commit has been processed because it contains a "Fixes:" tag, fixing commit: e39a97353e53 scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte(). The bot has also determined it's probably a bug fixing patch. (score: 94.6757) The bot has tested the following trees: v4.16.1. v4.16.1: Build OK! Hello Sasha, Please fix your bot. Your bot replied to a patch that has a "Cc: stable" tag. Bart.
Re: [PATCH v3 1/3] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"
Hi, [This is an automated email] This commit has been processed because it contains a "Fixes:" tag, fixing commit: e39a97353e53 scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte(). The bot has also determined it's probably a bug fixing patch. (score: 94.6757) The bot has tested the following trees: v4.16.1. v4.16.1: Build OK! -- Thanks, Sasha
Re: [PATCH 2/3] megaraid_sas: Increase timeout by 1 sec for non-RAID fastpath IOs
Hi, [This is an automated email] This commit has been processed by the -stable helper bot and determined to be a high probability candidate for -stable trees. (score: 8.0623) The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, v4.4.127. v4.16.1: Build OK! v4.15.16: Build OK! v4.14.33: Build OK! v4.9.93: Build OK! v4.4.127: Build OK! Please let us know if you'd like to have this patch included in a stable tree. -- Thanks, Sasha
Re: [PATCH v1 03/15] mpt3sas: Add sanity checks for scsi tracker before accessing it.
Hi, [This is an automated email] This commit has been processed by the -stable helper bot and determined to be a high probability candidate for -stable trees. (score: 28.7865) The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, v4.4.127. v4.16.1: Build OK! v4.15.16: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") v4.14.33: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") v4.9.93: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") v4.4.127: Failed to apply! Possible dependencies: dbec4c9040ed ("scsi: mpt3sas: lockless command submission") Please let us know if you'd like to have this patch included in a stable tree. -- Thanks, Sasha
Re: [PATCH v1 14/15] mpt3sas: fix possible memory leak.
Hi, [This is an automated email] This commit has been processed by the -stable helper bot and determined to be a high probability candidate for -stable trees. (score: 12.9808) The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, v4.4.127. v4.16.1: Build OK! v4.15.16: Build OK! v4.14.33: Build OK! v4.9.93: Build OK! v4.4.127: Build OK! Please let us know if you'd like to have this patch included in a stable tree. -- Thanks, Sasha
Re: [PATCH v3 0/3] Report all request failures again to user space
Hi, [This is an automated email] This commit has been processed by the -stable helper bot and determined to be a high probability candidate for -stable trees. (score: 19.8603) The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, v4.4.127. v4.16.1: Failed to apply! Possible dependencies: Unable to calculate v4.15.16: Build OK! v4.14.33: Build OK! v4.9.93: Failed to apply! Possible dependencies: 2a842acab109 ("block: introduce new block status code type") b6800ec54c74 ("Make scsi_result_to_blk_status() recognize CONDITION MET") v4.4.127: Failed to apply! Possible dependencies: 2a842acab109 ("block: introduce new block status code type") b6800ec54c74 ("Make scsi_result_to_blk_status() recognize CONDITION MET") Please let us know if you'd like to have this patch included in a stable tree. -- Thanks, Sasha
Re: usercopy whitelist woe in scsi_sense_cache
Hi. 10.04.2018 08:35, Oleksandr Natalenko wrote: - does it reproduce _without_ hardened usercopy? (I would assume yes, but you'd just not get any warning until the hangs started.) If it does reproduce without hardened usercopy, then a new bisect run could narrow the search even more. Looks like it cannot be disabled via kernel cmdline, so I have to re-compile the kernel, right? I can certainly do that anyway. Okay, I've recompiled the kernel without hardened usercopy: [root@archlinux ~]# zgrep USERCOPY /proc/config.gz CONFIG_X86_INTEL_USERCOPY=y CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y # CONFIG_HARDENED_USERCOPY is not set and I cannot reproduce the issue anymore. I/O doesn't hang regardless of how long I hammer it. Eeeh? Maybe, this is a matter of some cleanup code path once the warn/bug condition is hit with hardening enabled? I'm just guessing here again. Will work towards checking Linus' master branch now… Regards, Oleksandr
Re: [PATCH v3 0/3] Report all request failures again to user space
On Mon, Apr 09, 2018 at 09:35:21PM -0400, Martin K. Petersen wrote: > > Johannes, > > > I did start a series [1] for this but than got distracted by more urgent > > things. I can pick it up again I think. > > > > [1] > > https://git.kernel.org/pub/scm/linux/kernel/git/jth/linux.git/log/?h=iscsi_DID_REQUEUE > > Definitely a step in the right direction. Does anyone object getting rid of the Scsi_Cmnd typedef while I'm at it? This would ease my refactoring a lot, asI don't have to teach coccinelle yet another case that multiplies the rules. Thanks, Johannes -- Johannes Thumshirn Storage jthumsh...@suse.de+49 911 74053 689 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: Felix Imendörffer, Jane Smithard, Graham Norton HRB 21284 (AG Nürnberg) Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
Re: [RFC PATCH] mpt3sas: mpt3sas_scsih_enclosure_find_by_handle can be static
On Tue, 2018-04-10 at 09:15 +0200, Jaco Kroon wrote: > I've not seen additional feedback on this (I may simply not be CCed). > > I've applied the patch to one of our hosts where we've had endless IO > lockups (with MQ enabled the host died within a day or two, sometimes > sub one hour, without it typically ran for about two weeks). With this > patch (on top of 4.16) we're now at four days and 17 hours, with IO > still going strong (including a mdadm reshape to add a disk, as well as > a rebuild on a drive that failed - concurrently on two different arrays, > same controller). Very subjective, but the host also feels more > responsive under heavy IO load. > > What can I do from my side (I've got some development experience) to > help push this patch forward? Hello Jaco, If you want to follow mpt3sas development please consider to subscribe to the linux-scsi mailing list. Recently Broadcom posted a new mpt3sas patch series. See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg72823.html. Bart.
[PATCH 2/2] scsi: st: Replace GFP_ATOMIC with GFP_KERNEL in new_tape_buffer
new_tape_buffer() is never called in atomic context. new_tape_buffer() is only called by st_probe(), which is only set as ".probe" in struct scsi_driver. Despite never getting called from atomic context, new_tape_buffer() calls kzalloc() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. This is found by a static analysis tool named DCNS written by myself. And I also manually check it. Signed-off-by: Jia-Ju Bai--- drivers/scsi/st.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 94e402e..b987f6d 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -3878,7 +3878,7 @@ static struct st_buffer *new_tape_buffer(int need_dma, int max_sg) { struct st_buffer *tb; - tb = kzalloc(sizeof(struct st_buffer), GFP_ATOMIC); + tb = kzalloc(sizeof(struct st_buffer), GFP_KERNEL); if (!tb) { printk(KERN_NOTICE "st: Can't allocate new tape buffer.\n"); return NULL; @@ -3889,7 +3889,7 @@ static struct st_buffer *new_tape_buffer(int need_dma, int max_sg) tb->buffer_size = 0; tb->reserved_pages = kzalloc(max_sg * sizeof(struct page *), -GFP_ATOMIC); +GFP_KERNEL); if (!tb->reserved_pages) { kfree(tb); return NULL; -- 1.9.1
[PATCH 1/2] scsi: st: Replace GFP_ATOMIC with GFP_KERNEL in st_probe
st_probe() is never called in atomic context. st_probe() is only set as ".probe" in struct scsi_driver. Despite never getting called from atomic context, st_probe() calls kzalloc() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. This is found by a static analysis tool named DCNS written by myself. And I also manually check it. Signed-off-by: Jia-Ju Bai--- drivers/scsi/st.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 94e402e..1da42f3 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4290,7 +4290,7 @@ static int st_probe(struct device *dev) goto out_buffer_free; } - tpnt = kzalloc(sizeof(struct scsi_tape), GFP_ATOMIC); + tpnt = kzalloc(sizeof(struct scsi_tape), GFP_KERNEL); if (tpnt == NULL) { sdev_printk(KERN_ERR, SDp, "st: Can't allocate device descriptor.\n"); -- 1.9.1
RE: Regarding SCSI SANITIZE command support
Hi Doug Gilbert, Thanks for your response. During the drive sanitize, INQUIRY and REPORT LUNS commands were working as expected. Drive supports all the 9 commands (sbc4r15.pdf, clause 4.11.2) during sanitize. When the OS sends TUR commands, drive reports SenseKey: NOT READY, ASC: 04 and ASCQ: 1B "LOGICAL UNIT NOT READY, SANITIZE IN PROGRESS". The OS SCSI upper layer driver is trying to read block 0. From the OS dmesg, I could see that SCSI READ 10 byte CDB (opcode: 0x28) have been issued by OS. Snippet of OS dmesg: [ 7679.363737] not responding... [ 7726.521694] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.521706] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.521711] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.521717] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00 [ 7726.521722] blk_update_request: 15 callbacks suppressed [ 7726.521726] blk_update_request: I/O error, dev sdb, sector 0 [ 7726.521731] buffer_io_error: 14 callbacks suppressed [ 7726.521735] Buffer I/O error on dev sdb, logical block 0, async page read [ 7726.522791] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.522796] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.522799] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.522803] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00 [ 7726.522806] blk_update_request: I/O error, dev sdb, sector 4 [ 7726.522809] Buffer I/O error on dev sdb, logical block 1, async page read [ 7726.523837] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.523851] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.523856] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.523861] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00 [ 7726.523866] blk_update_request: I/O error, dev sdb, sector 0 [ 7726.523871] Buffer I/O error on dev sdb, logical block 0, async page read [ 7726.524846] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.524850] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.524853] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.524856] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00 [ 7726.524858] blk_update_request: I/O error, dev sdb, sector 4 [ 7726.524860] Buffer I/O error on dev sdb, logical block 1, async page read [ 7726.525899] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.525904] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.525907] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.525910] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00 [ 7726.525913] blk_update_request: I/O error, dev sdb, sector 0 [ 7726.525916] Buffer I/O error on dev sdb, logical block 0, async page read [ 7726.526925] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.526930] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.526933] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.526936] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00 [ 7726.526939] blk_update_request: I/O error, dev sdb, sector 4 [ 7726.526942] Buffer I/O error on dev sdb, logical block 1, async page read [ 7726.527989] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.527995] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.527999] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.528003] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00 [ 7726.528018] blk_update_request: I/O error, dev sdb, sector 0 [ 7726.528022] Buffer I/O error on dev sdb, logical block 0, async page read [ 7726.528994] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.528997] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.529000] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.529003] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00 [ 7726.529012] blk_update_request: I/O error, dev sdb, sector 4 [ 7726.529015] Buffer I/O error on dev sdb, logical block 1, async page read [ 7726.530057] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 7726.530061] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current] [ 7726.530065] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize in progress [ 7726.530068] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00 [ 7726.530071] blk_update_request: I/O error, dev sdb, sector 0 [ 7726.530074] Buffer I/O error on dev sdb, logical block 0, async page read [ 7726.531071] sd 10:0:10:0:
Re: [PATCH] tcmu: fix error resetting qfull_time_out to default
On 2018/4/9 19:44, Prasanna Kumar Kalever wrote: Problem: --- $ cat /sys/kernel/config/target/core/user_0/block/attrib/qfull_time_out -1 $ echo "-1" > /sys/kernel/config/target/core/user_0/block/attrib/qfull_time_out -bash: echo: write error: Invalid argument Fix: --- This patch will help reset qfull_time_out to its default i.e. qfull_time_out=-1 Signed-off-by: Prasanna Kumar Kalever--- drivers/target/target_core_user.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 4ad89ea71a70..4f26bdc3d1dc 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2121,6 +2121,8 @@ static ssize_t tcmu_qfull_time_out_store(struct config_item *item, if (val >= 0) { udev->qfull_time_out = val * MSEC_PER_SEC; + } else if (val == -1) { + udev->qfull_time_out = val; } else { printk(KERN_ERR "Invalid qfull timeout value %d\n", val); return -EINVAL; This looks fine to me. @Mike, IMO the -1 could also be settable just like this case. And does it have any other special meaning expects to compatible to cmd_time_out ? Thanks,
[PATCH] scsi: qla2xxx: reduce the time granularity of qla2x00_eh_wait_on_command
If the cmd has not be returned after aborted by qla2x00_eh_abort, we have to wait for it. However, the time is 1000ms at least currently. If there are a lot cmds need to be aborted, the delay could be long enough to lead to panic due to such as hung task, ocfs2 heartbeat, etc, just before scsi recovery works. Change the granularity to 1ms, even though more context switches would be introduced, but it should be ok as it is not hot path. Signed-off-by: Jianchao Wang--- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5c5dcca4..9f52ad9 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1072,7 +1072,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, static int qla2x00_eh_wait_on_command(struct scsi_cmnd *cmd) { -#define ABORT_POLLING_PERIOD 1000 +#define ABORT_POLLING_PERIOD 1 #define ABORT_WAIT_ITER((2 * 1000) / (ABORT_POLLING_PERIOD)) unsigned long wait_iter = ABORT_WAIT_ITER; scsi_qla_host_t *vha = shost_priv(cmd->device->host); -- 2.7.4
Re: [PATCH v7] scsi: new zorro_esp.c for Amiga Zorro NCR53C9x boards
Hi Michael, On Tue, Apr 10, 2018 at 4:16 AM, Michael Schmitzwrote: > On Mon, Apr 9, 2018 at 7:50 PM, Christoph Hellwig wrote: >> On Sun, Apr 08, 2018 at 02:45:32PM +1200, Michael Schmitz wrote: >>> New combined SCSI driver for all ESP based Zorro SCSI boards for >>> m68k Amiga. >>> + { >>> + .id = >>> ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060, >> >> In PCI Land we've usually stopped using PCI IDs unless they are used >> in multiple (missing "places"?) > Short of a complete rewrite of the Zorro driver support code to be > closer to what PCI does, I don' see what can be done about the use of > Zorro IDs. I don't think such a rewrite is planned in the near future, > Geert? I think what Christoph means is the use of the define ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060 versus hardcoded numbers, or ZORRO_ID(PHASE5, 0x0B, 0). We have a long list of ZORRO_PROD_* definitions in include/uapi/linux/zorro_ids.h because of historical reasons. The list isn't really changing (no new IDs in git history) due to almost no new Zorro boards being made, unlike for PCI, where keeping an in-kernel list is a lot of work, and not desirable. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: *** SPAM *** Re: [RFC PATCH] mpt3sas: mpt3sas_scsih_enclosure_find_by_handle can be static
Hi Martin, Bart, I've not seen additional feedback on this (I may simply not be CCed). I've applied the patch to one of our hosts where we've had endless IO lockups (with MQ enabled the host died within a day or two, sometimes sub one hour, without it typically ran for about two weeks). With this patch (on top of 4.16) we're now at four days and 17 hours, with IO still going strong (including a mdadm reshape to add a disk, as well as a rebuild on a drive that failed - concurrently on two different arrays, same controller). Very subjective, but the host also feels more responsive under heavy IO load. What can I do from my side (I've got some development experience) to help push this patch forward? Kind Regards, Jaco On 28/03/2018 23:54, Martin K. Petersen wrote: > Bart, > >> Are you aware that if the 0-day test infrastructure suggests an improvement >> for a patch that the patch that that improvement applies to gets ignored >> unless either the patch is reposted with the improvement applied or that it >> is explained why the suggested improvement is inappropriate? > Correct. I don't apply anything that causes a 0-day warning. The patch > will be closed with "Changes Required" status in patchwork. > > Always build patch submissions to linux-scsi with: > > make C=1 CF="-D__CHECK_ENDIAN__" >
Re: usercopy whitelist woe in scsi_sense_cache
On Mon, Apr 9, 2018 at 11:35 PM, Oleksandr Natalenkowrote: > Did your system hang on smartctl hammering too? Have you got some stack > traces to compare with mine ones? Unfortunately I only had a single hang with no dumps. I haven't been able to reproduce it since. :( -Kees -- Kees Cook Pixel Security
Re: usercopy whitelist woe in scsi_sense_cache
Hi. 09.04.2018 22:30, Kees Cook wrote: echo 1 | tee /sys/block/sd*/queue/nr_requests I can't get this below "4". Oops, yeah. It cannot be less than BLKDEV_MIN_RQ (which is 4), so it is enforced explicitly in queue_requests_store(). It is the same for me. echo 1 | tee /sys/block/sd*/device/queue_depth I've got this now too. Ah! dm-crypt too. I'll see if I can get that added easily to my tests. And XFS! You love your corner cases. ;) Yeah, so far this wonderful configuration has allowed me to uncover a bunch of bugs, and see, we are not done yet ;). Two other questions, since you can reproduce this easily: - does it reproduce _without_ hardened usercopy? (I would assume yes, but you'd just not get any warning until the hangs started.) If it does reproduce without hardened usercopy, then a new bisect run could narrow the search even more. Looks like it cannot be disabled via kernel cmdline, so I have to re-compile the kernel, right? I can certainly do that anyway. - does it reproduce with Linus's current tree? Will try this too. What would imply missing locking, yes? Yikes. But I'd expect use-after-free or something, or bad data, not having the pointer slip forward? I still think this has something to do with blk-mq re-queuing capability and how BFQ implements it, because there are no sings of issue popping up with Kyber so far. Quick update: I added dm-crypt (with XFS on top) and it hung my system almost immediately. I got no warnings at all, though. Did your system hang on smartctl hammering too? Have you got some stack traces to compare with mine ones? Regards, Oleksandr
Re: [PATCH 1/1] scsi/ufs: qcom: Don't enable PHY_QCOM_UFS by default
On 4/10/2018 1:39 AM, Bjorn Andersson wrote: On Mon 09 Apr 10:38 PDT 2018, Vivek Gautam wrote: On 4/9/2018 10:21 PM, Bjorn Andersson wrote: On Mon 09 Apr 06:24 PDT 2018, Vivek Gautam wrote: [..] diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h index 0a2c18a9771d..1388c2a2965e 100644 --- a/include/linux/phy/phy-qcom-ufs.h +++ b/include/linux/phy/phy-qcom-ufs.h @@ -31,8 +31,21 @@ void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy); */ void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy); +#if IS_ENABLED(CONFIG_PHY_QCOM_UFS) int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes); void ufs_qcom_phy_save_controller_version(struct phy *phy, - u8 major, u16 minor, u16 step); + u8 major, u16 minor, u16 step); +#else +static inline int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes) +{ + return -ENOSYS; +} + +static inline void ufs_qcom_phy_save_controller_version(struct phy *phy, + u8 major, u16 minor, + u16 step) +{ +} +#endif /* PHY_QCOM_UFS */ What's the timeline for getting rid of the references to these functions? I presume that code depending on these being here will compile but won't actually work? Yes, these inline definitions are just to keep ufs-qcom happy with the direct calls that it makes to these functions. As you would know these couple of functions are just used by the 20nm phy. However, we don't have any platform yet in the upstream that enables this phy. I am hoping that we will eventually get rid of these functions when we further clean up ufs-qcom driver. I see, but that means that we're calling this function with a struct phy that might not be a struct ufs_qcom_phy and as such a defconfig with both enabled will have undefined outcome for the migrated phys. No, we will have to add support for separate phys as sdm845 has phy per each lane, and the older struct phy will exist alongside. We will call this function only with the older phy pointer. In particular we do expect that the same kernel will boot on db820c and sdm845-mtp, so we will have to enable support for the 14nm & 20nm phy driver (and we don't want random crashes because someone happened to enable it). Right, so we create new struct phy while keeping older one intact to keep the ufs-qcom work with both - ufs_qcom_phy and qmp_phy. Some of the controller drivers, such as usb/dwc3/ keep support for old and new phys, although there the difference is between generic phy and the usb-phy. So, I am assuming that if we want to keep ufs-qcom on platforms using 20nm, 14nm and 10nm phys happy, we will have to keep the phys separately for sometime. What do you say about it? On db820c, we can still work with the ufs_qcom_phy. Regards Vivek So either we add the 10nm support to the existing driver or I think we should migrate, at least, the 14nm support to the QMP driver (and mark the remaining 20nm BROKEN for now). Regardless of the path chosen this should be cleaned up to the point where all three phys are supported. Regards, Bjorn -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Waiting for scsi_host_template release
Description: SCSI mid-layer may hold references to Scsi_Host structs when the owning module has already unloaded. Scsi_Host release path touches scsi_host_template struct that is usually allocated in the unloaded module's memory. That results in a crash. To work around the problem, this change implements scsi_host_template_release API to be called at driver unload path to make sure all Scsi_Host structs are gone before releasing scsi_host_template memory. --- drivers/scsi/hosts.c | 2 ++ drivers/scsi/qla2xxx/qla_os.c | 2 ++ drivers/scsi/scsi_priv.h | 1 + drivers/scsi/scsi_proc.c | 64 +++ include/scsi/scsi_host.h | 17 5 files changed, 80 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 82ac1cd..51c9e2b 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -353,6 +353,8 @@ static void scsi_host_dev_release(struct device *dev) blk_free_tags(shost->bqt); } + scsi_host_template_release(shost->hostt); + kfree(shost->shost_data); if (parent) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index fb8beee..e913b8b 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -270,6 +270,7 @@ struct scsi_host_template qla2xxx_driver_template = { .supported_mode = MODE_INITIATOR, .track_queue_depth = 1, + .wait_on_unload = 1, }; static struct scsi_transport_template *qla2xxx_transport_template = NULL; @@ -6116,6 +6117,7 @@ qla2x00_module_exit(void) kmem_cache_destroy(ctx_cachep); fc_release_transport(qla2xxx_transport_template); fc_release_transport(qla2xxx_transport_vport_template); + scsi_host_template_wait(_driver_template); } module_init(qla2x00_module_init); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 4d01cdb..c76cb6e2 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -99,6 +99,7 @@ extern struct kmem_cache *scsi_sdb_cache; #ifdef CONFIG_SCSI_PROC_FS extern void scsi_proc_hostdir_add(struct scsi_host_template *); extern void scsi_proc_hostdir_rm(struct scsi_host_template *); +extern void scsi_host_template_release(struct scsi_host_template *); extern void scsi_proc_host_add(struct Scsi_Host *); extern void scsi_proc_host_rm(struct Scsi_Host *); extern int scsi_init_procfs(void); diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 251598e..daf2c99 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -99,15 +99,21 @@ static const struct file_operations proc_scsi_fops = { void scsi_proc_hostdir_add(struct scsi_host_template *sht) { - if (!sht->show_info) + if (!sht->show_info && !sht->wait_on_unload) return; mutex_lock(_host_template_mutex); if (!sht->present++) { - sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi); -if (!sht->proc_dir) - printk(KERN_ERR "%s: proc_mkdir failed for %s\n", -__func__, sht->proc_name); + if (sht->show_info) { + sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi); + if (!sht->proc_dir) + printk(KERN_ERR "%s: proc_mkdir failed for %s\n", +__func__, sht->proc_name); + } + if (sht->wait_on_unload) { + sema_init(>release_sem, 0); + sht->release_sem_waiters = 0; + } } mutex_unlock(_host_template_mutex); } @@ -118,7 +124,7 @@ void scsi_proc_hostdir_add(struct scsi_host_template *sht) */ void scsi_proc_hostdir_rm(struct scsi_host_template *sht) { - if (!sht->show_info) + if (!sht->show_info && !sht->wait_on_unload) return; mutex_lock(_host_template_mutex); @@ -126,9 +132,55 @@ void scsi_proc_hostdir_rm(struct scsi_host_template *sht) remove_proc_entry(sht->proc_name, proc_scsi); sht->proc_dir = NULL; } + if (sht->wait_on_unload) + ++sht->scsi_host_cleanups; mutex_unlock(_host_template_mutex); } +/** + * scsi_host_template_release - runs when a user of scsi_host_template + * (like Scsi_Host) is gone. + * @sht: pointer to scsi_host_template +*/ +void scsi_host_template_release(struct scsi_host_template *sht) +{ + int release_sem_waiters = 0; + struct semaphore *release_sem; + if (!sht->wait_on_unload) + return; + + mutex_lock(_host_template_mutex); + if (!--sht->scsi_host_cleanups && !sht->present) { + release_sem = >release_sem; + release_sem_waiters = sht->release_sem_waiters; + sht->release_sem_waiters = 0; + } + mutex_unlock(_host_template_mutex); + for (; release_sem_waiters; --release_sem_waiters) + up(release_sem); +} + +/** + * scsi_host_template_wait - Waits till all references to + * scsi_host_template are gone + * @sht: pointer to scsi_host_template +*/ +void scsi_host_template_wait(struct scsi_host_template *sht) +{ + unsigned char present = 0; + struct semaphore *release_sem; + if (!sht->wait_on_unload) + return; + mutex_lock(_host_template_mutex); + present = sht->present + sht->scsi_host_cleanups; + if (present) { + ++sht->release_sem_waiters; + release_sem = >release_sem; + } +