Re: usercopy whitelist woe in scsi_sense_cache

2018-04-10 Thread Kees Cook
On Tue, Apr 10, 2018 at 10:16 AM, Oleksandr Natalenko
 wrote:
> Hi, Kees, Paolo et al.
>
> 10.04.2018 08:53, Kees Cook wrote:
>>
>> Unfortunately I only had a single hang with no dumps. I haven't been
>> able to reproduce it since. :(
>
>
> For your convenience I've prepared a VM that contains a reproducer.

Awesome. :)

> Under the /root folder there is a reproducer script (reproducer.sh). It does
> trivial things like enabling sysrq, opening LUKS device, mounting a volume,
> running a background I/O (this is an important part, actually, since I
> wasn't able to trigger the issue without the background I/O) and, finally,
> running the smartctl in a loop. If you are lucky, within a minute or two
> you'll get the first warning followed shortly by subsequent bugs and I/O
> stall (htop is pre-installed for your convenience too).

Yup!

[   27.729498] Bad or missing usercopy whitelist? Kernel memory
exposure attempt detected from SLUB object 'scsi_sense_cache' (offset
76, size 22)!

I'll see about booting with my own kernels, etc, and try to narrow this down. :)

-Kees

-- 
Kees Cook
Pixel Security


Re: [PATCH] scsi_debug: IMMED related delay adjustments

2018-04-10 Thread Ming Lei
On Tue, Apr 10, 2018 at 01:00:36PM -0400, Douglas Gilbert wrote:
> A patch titled: "[PATCH v2] scsi_debug: implement IMMED bit"
> introduced long delays to the Start stop unit (SSU) and
> Synchronize cache (SC) commands when the IMMED bit is clear.
> This patch makes those delays more realistic. It causes SSU
> to only delay when the start stop state is changed; SC only
> delays when there's been a write since the previous SC. It
> also reduced the SC delay from 1 second to 50 milliseconds.
> 
> Signed-off-by: Douglas Gilbert 
> ---
> This patch is in response to a report on the Linux scsi list
> indicating a significant slowdown in benchmarks using the
> original patch. The reporter seemed satisfield with an earlier
> version of this patch (in which the only difference was that
> the SC delay was 100 milliseconds).
> 
>  drivers/scsi/scsi_debug.c | 33 -
>  1 file changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index 26ce022dd6f4..9ab19285a3d3 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -234,11 +234,13 @@ static const char *sdebug_version_date = "20180128";
>  #define F_INV_OP 0x200
>  #define F_FAKE_RW0x400
>  #define F_M_ACCESS   0x800   /* media access */
> -#define F_LONG_DELAY 0x1000
> +#define F_SSU_DELAY  0x1000
> +#define F_SYNC_DELAY 0x2000
>  
>  #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR)
>  #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW)
>  #define FF_SA (F_SA_HIGH | F_SA_LOW)
> +#define F_LONG_DELAY (F_SSU_DELAY | F_SYNC_DELAY)
>  
>  #define SDEBUG_MAX_PARTS 4
>  
> @@ -510,7 +512,7 @@ static const struct opcode_info_t release_iarr[] = {
>  };
>  
>  static const struct opcode_info_t sync_cache_iarr[] = {
> - {0, 0x91, 0, F_LONG_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
> + {0, 0x91, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
>   {16,  0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
>0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* SYNC_CACHE (16) */
>  };
> @@ -553,7 +555,7 @@ static const struct opcode_info_t 
> opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
>   resp_write_dt0, write_iarr, /* WRITE(16) */
>   {16,  0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
>0xff, 0xff, 0xff, 0xff, 0xff, 0xc7} },
> - {0, 0x1b, 0, F_LONG_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
> + {0, 0x1b, 0, F_SSU_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
>   {6,  0x1, 0, 0xf, 0xf7, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
>   {ARRAY_SIZE(sa_in_16_iarr), 0x9e, 0x10, F_SA_LOW | F_D_IN,
>   resp_readcap16, sa_in_16_iarr, /* SA_IN(16), READ CAPACITY(16) */
> @@ -606,7 +608,7 @@ static const struct opcode_info_t 
> opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
>   resp_write_same_10, write_same_iarr,/* WRITE SAME(10) */
>   {10,  0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0,
>0, 0, 0, 0, 0} },
> - {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_LONG_DELAY | F_M_ACCESS,
> + {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_SYNC_DELAY | F_M_ACCESS,
>   resp_sync_cache, sync_cache_iarr,
>   {10,  0x7, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0,
>0, 0, 0, 0} }, /* SYNC_CACHE (10) */
> @@ -667,6 +669,7 @@ static bool sdebug_strict = DEF_STRICT;
>  static bool sdebug_any_injecting_opt;
>  static bool sdebug_verbose;
>  static bool have_dif_prot;
> +static bool write_since_sync;
>  static bool sdebug_statistics = DEF_STATISTICS;
>  
>  static unsigned int sdebug_store_sectors;
> @@ -1607,6 +1610,7 @@ static int resp_start_stop(struct scsi_cmnd *scp,
>  {
>   unsigned char *cmd = scp->cmnd;
>   int power_cond, stop;
> + bool changing;
>  
>   power_cond = (cmd[4] & 0xf0) >> 4;
>   if (power_cond) {
> @@ -1614,8 +1618,12 @@ static int resp_start_stop(struct scsi_cmnd *scp,
>   return check_condition_result;
>   }
>   stop = !(cmd[4] & 1);
> + changing = atomic_read(>stopped) == !stop;
>   atomic_xchg(>stopped, stop);
> - return (cmd[1] & 0x1) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */
> + if (!changing || cmd[1] & 0x1)  /* state unchanged or IMMED set */
> + return SDEG_RES_IMMED_MASK;
> + else
> + return 0;
>  }
>  
>  static sector_t get_sdebug_capacity(void)
> @@ -2473,6 +2481,7 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 
> sg_skip, u64 lba,
>   if (do_write) {
>   sdb = scsi_out(scmd);
>   dir = DMA_TO_DEVICE;
> + write_since_sync = true;
>   } else {
>   sdb = scsi_in(scmd);
>   dir = DMA_FROM_DEVICE;
> @@ -3583,6 +3592,7 @@ static int resp_get_lba_status(struct scsi_cmnd *scp,

Re: 4.15.14 crash with iscsi target and dvd

2018-04-10 Thread Wakko Warner
Ming Lei wrote:
> Sure, thanks for your sharing.
> 
> Wakko, could you test the following patch and see if there is any
> difference?
> 
> --
> diff --git a/drivers/target/target_core_pscsi.c 
> b/drivers/target/target_core_pscsi.c
> index 0d99b242e82e..6147178f1f37 100644
> --- a/drivers/target/target_core_pscsi.c
> +++ b/drivers/target/target_core_pscsi.c
> @@ -888,7 +888,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, 
> u32 sgl_nents,
>   if (len > 0 && data_len > 0) {
>   bytes = min_t(unsigned int, len, PAGE_SIZE - off);
>   bytes = min(bytes, data_len);
> -
> + new_bio:
>   if (!bio) {
>   nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
>   nr_pages -= nr_vecs;
> @@ -931,6 +931,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, 
> u32 sgl_nents,
>* be allocated with pscsi_get_bio() above.
>*/
>   bio = NULL;
> + goto new_bio;
>   }
>  
>   data_len -= bytes;

Sorry for the delay.  I reverted my change, added this one.  I didn't
reboot, I just unloaded and loaded this one.
Note: /dev/sr1 as seen from the initiator is /dev/sr0 (physical disc) on the
target.

Doesn't crash, however on the initiator I see this:
[9273849.70] ISO 9660 Extensions: RRIP_1991A
[9273863.359718] scsi_io_completion: 13 callbacks suppressed
[9273863.359788] sr 26:0:0:0: [sr1] tag#1 UNKNOWN(0x2003) Result: hostbyte=0x00 
driverbyte=0x08
[9273863.359909] sr 26:0:0:0: [sr1] tag#1 Sense Key : 0x2 [current] 
[9273863.359974] sr 26:0:0:0: [sr1] tag#1 ASC=0x8 ASCQ=0x0 
[9273863.360036] sr 26:0:0:0: [sr1] tag#1 CDB: opcode=0x28 28 00 00 22 f6 96 00 
00 80 00
[9273863.360116] blk_update_request: 13 callbacks suppressed
[9273863.360177] blk_update_request: I/O error, dev sr1, sector 9165400
[9273875.864648] sr 26:0:0:0: [sr1] tag#1 UNKNOWN(0x2003) Result: hostbyte=0x00 
driverbyte=0x08
[9273875.864738] sr 26:0:0:0: [sr1] tag#1 Sense Key : 0x2 [current] 
[9273875.864801] sr 26:0:0:0: [sr1] tag#1 ASC=0x8 ASCQ=0x0 
[9273875.864890] sr 26:0:0:0: [sr1] tag#1 CDB: opcode=0x28 28 00 00 22 f7 16 00 
00 80 00
[9273875.864971] blk_update_request: I/O error, dev sr1, sector 9165912

To cause this, I mounted the dvd as seen in the first line and ran this
command: find /cdrom2 -type f | xargs -tn1 cat > /dev/null
I did some various tests.  Each test was done after umount and mount to
clear the cache.
cat  > /dev/null causes the message.
dd if= of=/dev/null bs=2048 doesn't
using bs=4096 doesn't
using bs=64k doesn't
using bs=128k does
cat uses a blocksize of 128k.

The following was done without being mounted.
ddrescue -f -f /dev/sr1 /dev/null 
doesn't cause the message
dd if=/dev/sr1 of=/dev/null bs=128k
doesn't cause the message
using bs=256k causes the message once:
[9275916.857409] sr 27:0:0:0: [sr1] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x00 
driverbyte=0x08
[9275916.857482] sr 27:0:0:0: [sr1] tag#0 Sense Key : 0x2 [current] 
[9275916.857520] sr 27:0:0:0: [sr1] tag#0 ASC=0x8 ASCQ=0x0 
[9275916.857556] sr 27:0:0:0: [sr1] tag#0 CDB: opcode=0x28 28 00 00 00 00 00 00 
00 80 00
[9275916.857614] blk_update_request: I/O error, dev sr1, sector 0

If I access the disc from the target natively either by mounting and
accessing files or working with the device directly (ie dd) no errors are
logged on the target.

-- 
 Microsoft has beaten Volkswagen's world record.  Volkswagen only created 22
 million bugs.


Re: [PATCH v7] scsi: new zorro_esp.c for Amiga Zorro NCR53C9x boards

2018-04-10 Thread Michael Schmitz
Hi Geert,

now I see, thanks.

I could change the device table to use ZORRO_ID(PHASE5, ...) style IDs
instead of the longish defines if you're OK with that.

The other changes have passed tests and I'd otherwise just send what I
have now, adding Christoph's Reviewed-by.

Cheers,

  Michael


On Tue, Apr 10, 2018 at 8:18 PM, Geert Uytterhoeven
 wrote:
> Hi Michael,
>
> On Tue, Apr 10, 2018 at 4:16 AM, Michael Schmitz  wrote:
>> On Mon, Apr 9, 2018 at 7:50 PM, Christoph Hellwig  wrote:
>>> On Sun, Apr 08, 2018 at 02:45:32PM +1200, Michael Schmitz wrote:
 New combined SCSI driver for all ESP based Zorro SCSI boards for
 m68k Amiga.
 + {
 + .id = 
 ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060,
>>>
>>> In PCI Land we've usually stopped using PCI IDs unless they are used
>>> in multiple
>
> (missing "places"?)
>
>> Short of a complete rewrite of the Zorro driver support code to be
>> closer to what PCI does, I don' see what can be done about the use of
>> Zorro IDs. I don't think such a rewrite is planned in the near future,
>> Geert?
>
> I think what Christoph means is the use of the define
> ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060
> versus hardcoded numbers, or ZORRO_ID(PHASE5, 0x0B, 0).
>
> We have a long list of ZORRO_PROD_* definitions in
> include/uapi/linux/zorro_ids.h because of historical reasons.  The list
> isn't really changing (no new IDs in git history) due to almost no new
> Zorro boards being made, unlike for PCI, where keeping an in-kernel list
> is a lot of work, and not desirable.
>
> Gr{oetje,eeting}s,
>
> Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
> -- Linus Torvalds


Hotplug

2018-04-10 Thread Jens Axboe
Hi,

Been running some tests and I keep running into issues with hotplug.
This looks similar to what Bart posted the other day, but it looks
more deeply rooted than just having to protect the queue in
generic_make_request_checks(). The test run is blktests,
block/001. Current -git doesn't survive it. I've seen at least two
different oopses, pasted below.

[  102.163442] NULL pointer dereference at 0010
[  102.163444] PGD 0 P4D 0 
[  102.163447] Oops:  [#1] PREEMPT SMP
[  102.163449] Modules linked in:
[  102.175540] sr 12:0:0:0: [sr2] scsi-1 drive
[  102.180112]  scsi_debug crc_t10dif crct10dif_generic crct10dif_common nvme 
nvme_core sb_edac xl
[  102.186934] sr 12:0:0:0: Attached scsi CD-ROM sr2
[  102.191896]  sr_mod cdrom btrfs xor zstd_decompress zstd_compress xxhash 
lzo_compress zlib_defc
[  102.197169] sr 12:0:0:0: Attached scsi generic sg7 type 5
[  102.203475]  igb ahci libahci i2c_algo_bit libata dca [last unloaded: 
crc_t10dif]
[  102.203484] CPU: 43 PID: 4629 Comm: systemd-udevd Not tainted 4.16.0+ #650
[  102.203487] Hardware name: Dell Inc. PowerEdge T630/0NT78X, BIOS 2.3.4 
11/09/2016
[  102.350882] RIP: 0010:sr_block_revalidate_disk+0x23/0x190 [sr_mod]
[  102.358299] RSP: 0018:883ff357bb58 EFLAGS: 00010292
[  102.364734] RAX: a00b07d0 RBX: 883ff3058000 RCX: 883ff357bb66
[  102.373220] RDX: 0003 RSI: 7530 RDI: 881fea631000
[  102.381705] RBP:  R08: 881fe4d38400 R09: 
[  102.390185] R10:  R11: 01b6 R12: 085d
[  102.398671] R13: 085d R14: 883ffd9b3790 R15: 
[  102.407156] FS:  7f7dc8e6d8c0() GS:883fff34() 
knlGS:
[  102.417138] CS:  0010 DS:  ES:  CR0: 80050033
[  102.424066] CR2: 0010 CR3: 003ffda98005 CR4: 003606e0
[  102.432545] DR0:  DR1:  DR2: 
[  102.441024] DR3:  DR6: fffe0ff0 DR7: 0400
[  102.449502] Call Trace:
[  102.452744]  ? __invalidate_device+0x48/0x60
[  102.458022]  check_disk_change+0x4c/0x60
[  102.462900]  sr_block_open+0x16/0xd0 [sr_mod]
[  102.468270]  __blkdev_get+0xb9/0x450
[  102.472774]  ? iget5_locked+0x1c0/0x1e0
[  102.477568]  blkdev_get+0x11e/0x320
[  102.481969]  ? bdget+0x11d/0x150
[  102.486083]  ? _raw_spin_unlock+0xa/0x20
[  102.490968]  ? bd_acquire+0xc0/0xc0
[  102.495368]  do_dentry_open+0x1b0/0x320
[  102.500159]  ? inode_permission+0x24/0xc0
[  102.505140]  path_openat+0x4e6/0x1420
[  102.509741]  ? cpumask_any_but+0x1f/0x40
[  102.514630]  ? flush_tlb_mm_range+0xa0/0x120
[  102.519903]  do_filp_open+0x8c/0xf0
[  102.524305]  ? __seccomp_filter+0x28/0x230
[  102.529389]  ? _raw_spin_unlock+0xa/0x20
[  102.534283]  ? __handle_mm_fault+0x7d6/0x9b0
[  102.539559]  ? list_lru_add+0xa8/0xc0
[  102.544157]  ? _raw_spin_unlock+0xa/0x20
[  102.549047]  ? __alloc_fd+0xaf/0x160
[  102.553549]  ? do_sys_open+0x1a6/0x230
[  102.558244]  do_sys_open+0x1a6/0x230
[  102.562742]  do_syscall_64+0x5a/0x100
[  102.567336]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2

and this one, similar to Barts:

[ 4676.738069] NULL pointer dereference at 0154
[ 4676.738071] PGD 0 P4D 0 
[ 4676.738073] Oops:  [#1] PREEMPT SMP
[ 4676.738075] Modules linked in: scsi_debug crc_t10dif nvme nvme_core loop 
configfs crct10dif_ge]
[ 4676.859272] CPU: 10 PID: 16598 Comm: systemd-udevd Not tainted 4.16.0+ #651
[ 4676.867525] Hardware name: Dell Inc. PowerEdge T630/0NT78X, BIOS 2.3.4 
11/09/2016
[ 4676.876765] RIP: 0010:blk_throtl_bio+0x45/0x9b0
[ 4676.882296] RSP: 0018:881ff0c8bb38 EFLAGS: 00010246
[ 4676.888610] RAX:  RBX: 881ffa273a40 RCX: 
[ 4676.897059] RDX: 881ffa273a40 RSI:  RDI: 
[ 4676.905507] RBP:  R08: 881ffa273ac0 R09: 881ff123f458
[ 4676.913955] R10: 881ff0c8bc70 R11: 1000 R12: 
[ 4676.922402] R13: 82600980 R14: 88208113 R15: 
[ 4676.930856] FS:  7fe63e5228c0() GS:881fff74() 
knlGS:
[ 4676.940773] CS:  0010 DS:  ES:  CR0: 80050033
[ 4676.947667] CR2: 0154 CR3: 001fed2a0003 CR4: 003606e0
[ 4676.956116] DR0:  DR1:  DR2: 
[ 4676.964568] DR3:  DR6: fffe0ff0 DR7: 0400
[ 4676.973021] Call Trace:
[ 4676.976229]  generic_make_request_checks+0x640/0x660
[ 4676.982245]  ? kmem_cache_alloc+0x37/0x190
[ 4676.987295]  generic_make_request+0x29/0x2f0
[ 4676.992534]  ? submit_bio+0x5c/0x110
[ 4676.996998]  ? bio_alloc_bioset+0x99/0x1c0
[ 4677.002050]  submit_bio+0x5c/0x110
[ 4677.006317]  ? guard_bio_eod+0x42/0x120
[ 4677.011074]  submit_bh_wbc.isra.49+0x132/0x160
[ 4677.016517]  ? bh_uptodate_or_lock+0x90/0x90
[ 4677.021757]  

Re: usercopy whitelist woe in scsi_sense_cache

2018-04-10 Thread Oleksandr Natalenko

Hi, Kees, Paolo et al.

10.04.2018 08:53, Kees Cook wrote:

Unfortunately I only had a single hang with no dumps. I haven't been
able to reproduce it since. :(


For your convenience I've prepared a VM that contains a reproducer.

It consists of 3 disk images (sda.img is for the system, it is 
Arch-based, sdb/sdc.img are for RAID). They are available (in a 
compressed form) to download here [1].


RAID is built as RAID10 with far2 layout, on top of it there is a LUKS 
container (can be opened either with keyfile under the /root or using 
"qwerty" password). There's one LVM PV, one VG and one volume on top of 
LUKS containing XFS. RAID is automatically assembled during the boot, so 
you don't have to worry about it.


I run the VM like this:

$ qemu-system-x86_64 -display gtk,gl=on -machine q35,accel=kvm -cpu 
host,+vmx -enable-kvm -netdev user,id=user.0 -device 
virtio-net,netdev=user.0 -usb -device nec-usb-xhci,id=xhci -device 
usb-tablet,bus=xhci.0 -serial stdio -smp 4 -m 512 -hda sda.img -hdb 
sdb.img -hdc sdc.img


The system is accessible via both VGA and serial console. The user is 
"root", the password is "qwerty".


Under the /root folder there is a reproducer script (reproducer.sh). It 
does trivial things like enabling sysrq, opening LUKS device, mounting a 
volume, running a background I/O (this is an important part, actually, 
since I wasn't able to trigger the issue without the background I/O) 
and, finally, running the smartctl in a loop. If you are lucky, within a 
minute or two you'll get the first warning followed shortly by 
subsequent bugs and I/O stall (htop is pre-installed for your 
convenience too).


Notable changes in this VM comparing to generic defaults:

1) blk-mq is enabled via kernel cmdline (scsi_mod.use_blk_mq=1 is in 
/etc/default/grub)
2) BFQ is set via udev (check /etc/udev/rules.d/10-io-scheduler.rules 
file)


Again, I wasn't able to reproduce the usercopy warning/bug and I/O hang 
without all these components being involved.


Hope you enjoy it.

P.S. I haven't tested Linus' master branch yet. For now, this VM runs 
v4.16 kernel.


Regards,
  Oleksandr

[1] https://natalenko.name/myfiles/usercopy_bfq_woe/


[PATCH] scsi_debug: IMMED related delay adjustments

2018-04-10 Thread Douglas Gilbert
A patch titled: "[PATCH v2] scsi_debug: implement IMMED bit"
introduced long delays to the Start stop unit (SSU) and
Synchronize cache (SC) commands when the IMMED bit is clear.
This patch makes those delays more realistic. It causes SSU
to only delay when the start stop state is changed; SC only
delays when there's been a write since the previous SC. It
also reduced the SC delay from 1 second to 50 milliseconds.

Signed-off-by: Douglas Gilbert 
---
This patch is in response to a report on the Linux scsi list
indicating a significant slowdown in benchmarks using the
original patch. The reporter seemed satisfield with an earlier
version of this patch (in which the only difference was that
the SC delay was 100 milliseconds).

 drivers/scsi/scsi_debug.c | 33 -
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 26ce022dd6f4..9ab19285a3d3 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -234,11 +234,13 @@ static const char *sdebug_version_date = "20180128";
 #define F_INV_OP   0x200
 #define F_FAKE_RW  0x400
 #define F_M_ACCESS 0x800   /* media access */
-#define F_LONG_DELAY   0x1000
+#define F_SSU_DELAY0x1000
+#define F_SYNC_DELAY   0x2000
 
 #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR)
 #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW)
 #define FF_SA (F_SA_HIGH | F_SA_LOW)
+#define F_LONG_DELAY   (F_SSU_DELAY | F_SYNC_DELAY)
 
 #define SDEBUG_MAX_PARTS 4
 
@@ -510,7 +512,7 @@ static const struct opcode_info_t release_iarr[] = {
 };
 
 static const struct opcode_info_t sync_cache_iarr[] = {
-   {0, 0x91, 0, F_LONG_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
+   {0, 0x91, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
{16,  0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* SYNC_CACHE (16) */
 };
@@ -553,7 +555,7 @@ static const struct opcode_info_t 
opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
resp_write_dt0, write_iarr, /* WRITE(16) */
{16,  0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7} },
-   {0, 0x1b, 0, F_LONG_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
+   {0, 0x1b, 0, F_SSU_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
{6,  0x1, 0, 0xf, 0xf7, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
{ARRAY_SIZE(sa_in_16_iarr), 0x9e, 0x10, F_SA_LOW | F_D_IN,
resp_readcap16, sa_in_16_iarr, /* SA_IN(16), READ CAPACITY(16) */
@@ -606,7 +608,7 @@ static const struct opcode_info_t 
opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
resp_write_same_10, write_same_iarr,/* WRITE SAME(10) */
{10,  0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0,
 0, 0, 0, 0, 0} },
-   {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_LONG_DELAY | F_M_ACCESS,
+   {ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_SYNC_DELAY | F_M_ACCESS,
resp_sync_cache, sync_cache_iarr,
{10,  0x7, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0,
 0, 0, 0, 0} }, /* SYNC_CACHE (10) */
@@ -667,6 +669,7 @@ static bool sdebug_strict = DEF_STRICT;
 static bool sdebug_any_injecting_opt;
 static bool sdebug_verbose;
 static bool have_dif_prot;
+static bool write_since_sync;
 static bool sdebug_statistics = DEF_STATISTICS;
 
 static unsigned int sdebug_store_sectors;
@@ -1607,6 +1610,7 @@ static int resp_start_stop(struct scsi_cmnd *scp,
 {
unsigned char *cmd = scp->cmnd;
int power_cond, stop;
+   bool changing;
 
power_cond = (cmd[4] & 0xf0) >> 4;
if (power_cond) {
@@ -1614,8 +1618,12 @@ static int resp_start_stop(struct scsi_cmnd *scp,
return check_condition_result;
}
stop = !(cmd[4] & 1);
+   changing = atomic_read(>stopped) == !stop;
atomic_xchg(>stopped, stop);
-   return (cmd[1] & 0x1) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */
+   if (!changing || cmd[1] & 0x1)  /* state unchanged or IMMED set */
+   return SDEG_RES_IMMED_MASK;
+   else
+   return 0;
 }
 
 static sector_t get_sdebug_capacity(void)
@@ -2473,6 +2481,7 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 
sg_skip, u64 lba,
if (do_write) {
sdb = scsi_out(scmd);
dir = DMA_TO_DEVICE;
+   write_since_sync = true;
} else {
sdb = scsi_in(scmd);
dir = DMA_FROM_DEVICE;
@@ -3583,6 +3592,7 @@ static int resp_get_lba_status(struct scsi_cmnd *scp,
 static int resp_sync_cache(struct scsi_cmnd *scp,
   struct sdebug_dev_info *devip)
 {
+   int res = 0;
u64 lba;
u32 num_blocks;

RE: [PATCH v1 14/15] mpt3sas: fix possible memory leak.

2018-04-10 Thread Sreekanth Reddy
Yes Sasha, We like to have this patch included in a stable tree.

Thanks,
Sreekanth

-Original Message-
From: Sasha Levin [mailto:alexander.le...@microsoft.com]
Sent: Tuesday, April 10, 2018 7:19 PM
To: Sasha Levin; Chaitra P B; linux-scsi@vger.kernel.org
Cc: sathya.prak...@broadcom.com; sreekanth.re...@broadcom.com;
sta...@vger.kernel.org
Subject: Re: [PATCH v1 14/15] mpt3sas: fix possible memory leak.

Hi,

[This is an automated email]

This commit has been processed by the -stable helper bot and determined to
be a high probability candidate for -stable trees. (score: 12.9808)

The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33,
v4.9.93, v4.4.127.

v4.16.1: Build OK!
v4.15.16: Build OK!
v4.14.33: Build OK!
v4.9.93: Build OK!
v4.4.127: Build OK!

Please let us know if you'd like to have this patch included in a stable
tree.

--
Thanks,
Sasha


RE: [PATCH v1 03/15] mpt3sas: Add sanity checks for scsi tracker before accessing it.

2018-04-10 Thread Sreekanth Reddy
Yes Sasha, We like to have this patch included in a stable tree.

Thanks,
Sreekanth

-Original Message-
From: Sasha Levin [mailto:alexander.le...@microsoft.com]
Sent: Tuesday, April 10, 2018 7:19 PM
To: Sasha Levin; Chaitra P B; linux-scsi@vger.kernel.org
Cc: sathya.prak...@broadcom.com; sreekanth.re...@broadcom.com;
sta...@vger.kernel.org
Subject: Re: [PATCH v1 03/15] mpt3sas: Add sanity checks for scsi tracker
before accessing it.

Hi,

[This is an automated email]

This commit has been processed by the -stable helper bot and determined to
be a high probability candidate for -stable trees. (score: 28.7865)

The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33,
v4.9.93, v4.4.127.

v4.16.1: Build OK!
v4.15.16: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")

v4.14.33: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")

v4.9.93: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")

v4.4.127: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")


Please let us know if you'd like to have this patch included in a stable
tree.

--
Thanks,
Sasha


Re: [PATCH v3 0/3] Report all request failures again to user space

2018-04-10 Thread Bart Van Assche
On Tue, 2018-04-10 at 15:48 +0200, Johannes Thumshirn wrote:
> On Mon, Apr 09, 2018 at 09:35:21PM -0400, Martin K. Petersen wrote:
> > 
> > Johannes,
> > 
> > > I did start a series [1] for this but than got distracted by more urgent
> > > things. I can pick it up again I think.
> > > 
> > > [1]
> > > https://git.kernel.org/pub/scm/linux/kernel/git/jth/linux.git/log/?h=iscsi_DID_REQUEUE
> > 
> > Definitely a step in the right direction.
> 
> Does anyone object getting rid of the Scsi_Cmnd typedef while I'm at
> it?
> 
> This would ease my refactoring a lot, asI don't have to teach
> coccinelle yet another case that multiplies the rules.

I'm in favor of the removal. But maybe there is something I'm overlooking
and that means that the typedef should not be removed?

Bart.




Re: [PATCH v3 1/3] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"

2018-04-10 Thread Bart Van Assche

On 04/10/18 07:49, Sasha Levin wrote:

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag,
fixing commit: e39a97353e53 scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte().

The bot has also determined it's probably a bug fixing patch. (score: 94.6757)

The bot has tested the following trees: v4.16.1.

v4.16.1: Build OK!


Hello Sasha,

Please fix your bot. Your bot replied to a patch that has a "Cc: stable" 
tag.


Bart.






Re: [PATCH v3 1/3] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"

2018-04-10 Thread Sasha Levin
Hi,

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag,
fixing commit: e39a97353e53 scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte().

The bot has also determined it's probably a bug fixing patch. (score: 94.6757)

The bot has tested the following trees: v4.16.1.

v4.16.1: Build OK!

--
Thanks,
Sasha

Re: [PATCH 2/3] megaraid_sas: Increase timeout by 1 sec for non-RAID fastpath IOs

2018-04-10 Thread Sasha Levin
Hi,

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 8.0623)

The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, 
v4.4.127.

v4.16.1: Build OK!
v4.15.16: Build OK!
v4.14.33: Build OK!
v4.9.93: Build OK!
v4.4.127: Build OK!

Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks,
Sasha

Re: [PATCH v1 03/15] mpt3sas: Add sanity checks for scsi tracker before accessing it.

2018-04-10 Thread Sasha Levin
Hi,

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 28.7865)

The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, 
v4.4.127.

v4.16.1: Build OK!
v4.15.16: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")

v4.14.33: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")

v4.9.93: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")

v4.4.127: Failed to apply! Possible dependencies:
dbec4c9040ed ("scsi: mpt3sas: lockless command submission")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks,
Sasha

Re: [PATCH v1 14/15] mpt3sas: fix possible memory leak.

2018-04-10 Thread Sasha Levin
Hi,

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 12.9808)

The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, 
v4.4.127.

v4.16.1: Build OK!
v4.15.16: Build OK!
v4.14.33: Build OK!
v4.9.93: Build OK!
v4.4.127: Build OK!

Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks,
Sasha

Re: [PATCH v3 0/3] Report all request failures again to user space

2018-04-10 Thread Sasha Levin
Hi,

[This is an automated email]

This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 19.8603)

The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, 
v4.4.127.

v4.16.1: Failed to apply! Possible dependencies:
Unable to calculate

v4.15.16: Build OK!
v4.14.33: Build OK!
v4.9.93: Failed to apply! Possible dependencies:
2a842acab109 ("block: introduce new block status code type")
b6800ec54c74 ("Make scsi_result_to_blk_status() recognize CONDITION MET")

v4.4.127: Failed to apply! Possible dependencies:
2a842acab109 ("block: introduce new block status code type")
b6800ec54c74 ("Make scsi_result_to_blk_status() recognize CONDITION MET")


Please let us know if you'd like to have this patch included in a stable tree.

--
Thanks,
Sasha

Re: usercopy whitelist woe in scsi_sense_cache

2018-04-10 Thread Oleksandr Natalenko

Hi.

10.04.2018 08:35, Oleksandr Natalenko wrote:

- does it reproduce _without_ hardened usercopy? (I would assume yes,
but you'd just not get any warning until the hangs started.) If it
does reproduce without hardened usercopy, then a new bisect run could
narrow the search even more.


Looks like it cannot be disabled via kernel cmdline, so I have to
re-compile the kernel, right? I can certainly do that anyway.


Okay, I've recompiled the kernel without hardened usercopy:

[root@archlinux ~]# zgrep USERCOPY /proc/config.gz
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
# CONFIG_HARDENED_USERCOPY is not set

and I cannot reproduce the issue anymore. I/O doesn't hang regardless of 
how long I hammer it.


Eeeh? Maybe, this is a matter of some cleanup code path once the 
warn/bug condition is hit with hardening enabled? I'm just guessing here 
again.


Will work towards checking Linus' master branch now…

Regards,
  Oleksandr


Re: [PATCH v3 0/3] Report all request failures again to user space

2018-04-10 Thread Johannes Thumshirn
On Mon, Apr 09, 2018 at 09:35:21PM -0400, Martin K. Petersen wrote:
> 
> Johannes,
> 
> > I did start a series [1] for this but than got distracted by more urgent
> > things. I can pick it up again I think.
> >
> > [1]
> > https://git.kernel.org/pub/scm/linux/kernel/git/jth/linux.git/log/?h=iscsi_DID_REQUEUE
> 
> Definitely a step in the right direction.

Does anyone object getting rid of the Scsi_Cmnd typedef while I'm at
it?

This would ease my refactoring a lot, asI don't have to teach
coccinelle yet another case that multiplies the rules.

Thanks,
Johannes
-- 
Johannes Thumshirn  Storage
jthumsh...@suse.de+49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850


Re: [RFC PATCH] mpt3sas: mpt3sas_scsih_enclosure_find_by_handle can be static

2018-04-10 Thread Bart Van Assche
On Tue, 2018-04-10 at 09:15 +0200, Jaco Kroon wrote:
> I've not seen additional feedback on this (I may simply not be CCed).
> 
> I've applied the patch to one of our hosts where we've had endless IO
> lockups (with MQ enabled the host died within a day or two, sometimes
> sub one hour, without it typically ran for about two weeks).  With this
> patch (on top of 4.16) we're now at four days and 17 hours, with IO
> still going strong (including a mdadm reshape to add a disk, as well as
> a rebuild on a drive that failed - concurrently on two different arrays,
> same controller).  Very subjective, but the host also feels more
> responsive under heavy IO load.
> 
> What can I do from my side (I've got some development experience) to
> help push this patch forward?

Hello Jaco,

If you want to follow mpt3sas development please consider to subscribe to the
linux-scsi mailing list. Recently Broadcom posted a new mpt3sas patch series.
See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg72823.html.

Bart.





[PATCH 2/2] scsi: st: Replace GFP_ATOMIC with GFP_KERNEL in new_tape_buffer

2018-04-10 Thread Jia-Ju Bai
new_tape_buffer() is never called in atomic context.

new_tape_buffer() is only called by st_probe(),
which is only set as ".probe" in struct scsi_driver.

Despite never getting called from atomic context,
new_tape_buffer() calls kzalloc() with GFP_ATOMIC,
which does not sleep for allocation.
GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
which can sleep and improve the possibility of sucessful allocation.

This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.

Signed-off-by: Jia-Ju Bai 
---
 drivers/scsi/st.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 94e402e..b987f6d 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3878,7 +3878,7 @@ static struct st_buffer *new_tape_buffer(int need_dma, 
int max_sg)
 {
struct st_buffer *tb;
 
-   tb = kzalloc(sizeof(struct st_buffer), GFP_ATOMIC);
+   tb = kzalloc(sizeof(struct st_buffer), GFP_KERNEL);
if (!tb) {
printk(KERN_NOTICE "st: Can't allocate new tape buffer.\n");
return NULL;
@@ -3889,7 +3889,7 @@ static struct st_buffer *new_tape_buffer(int need_dma, 
int max_sg)
tb->buffer_size = 0;
 
tb->reserved_pages = kzalloc(max_sg * sizeof(struct page *),
-GFP_ATOMIC);
+GFP_KERNEL);
if (!tb->reserved_pages) {
kfree(tb);
return NULL;
-- 
1.9.1



[PATCH 1/2] scsi: st: Replace GFP_ATOMIC with GFP_KERNEL in st_probe

2018-04-10 Thread Jia-Ju Bai
st_probe() is never called in atomic context.

st_probe() is only set as ".probe" in struct scsi_driver.

Despite never getting called from atomic context,
st_probe() calls kzalloc() with GFP_ATOMIC,
which does not sleep for allocation.
GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
which can sleep and improve the possibility of sucessful allocation.

This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.

Signed-off-by: Jia-Ju Bai 
---
 drivers/scsi/st.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 94e402e..1da42f3 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4290,7 +4290,7 @@ static int st_probe(struct device *dev)
goto out_buffer_free;
}
 
-   tpnt = kzalloc(sizeof(struct scsi_tape), GFP_ATOMIC);
+   tpnt = kzalloc(sizeof(struct scsi_tape), GFP_KERNEL);
if (tpnt == NULL) {
sdev_printk(KERN_ERR, SDp,
"st: Can't allocate device descriptor.\n");
-- 
1.9.1



RE: Regarding SCSI SANITIZE command support

2018-04-10 Thread Mahesh Rajashekhara
Hi Doug Gilbert,

Thanks for your response.

During the drive sanitize, INQUIRY and REPORT LUNS commands were working as 
expected. Drive supports all the 9 commands (sbc4r15.pdf, clause 4.11.2) during 
sanitize.

When the OS sends TUR commands, drive reports SenseKey: NOT READY, ASC: 04 and 
ASCQ: 1B "LOGICAL UNIT NOT READY, SANITIZE IN PROGRESS".

The OS SCSI upper layer driver is trying to read block 0. From the OS dmesg, I 
could see that SCSI READ 10 byte CDB (opcode: 0x28) have been issued by OS. 

Snippet of OS dmesg:

[ 7679.363737] not responding...
[ 7726.521694] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.521706] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.521711] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.521717] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00
[ 7726.521722] blk_update_request: 15 callbacks suppressed
[ 7726.521726] blk_update_request: I/O error, dev sdb, sector 0
[ 7726.521731] buffer_io_error: 14 callbacks suppressed
[ 7726.521735] Buffer I/O error on dev sdb, logical block 0, async page read
[ 7726.522791] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.522796] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.522799] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.522803] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00
[ 7726.522806] blk_update_request: I/O error, dev sdb, sector 4
[ 7726.522809] Buffer I/O error on dev sdb, logical block 1, async page read
[ 7726.523837] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.523851] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.523856] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.523861] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00
[ 7726.523866] blk_update_request: I/O error, dev sdb, sector 0
[ 7726.523871] Buffer I/O error on dev sdb, logical block 0, async page read
[ 7726.524846] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.524850] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.524853] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.524856] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00
[ 7726.524858] blk_update_request: I/O error, dev sdb, sector 4
[ 7726.524860] Buffer I/O error on dev sdb, logical block 1, async page read
[ 7726.525899] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.525904] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.525907] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.525910] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00
[ 7726.525913] blk_update_request: I/O error, dev sdb, sector 0
[ 7726.525916] Buffer I/O error on dev sdb, logical block 0, async page read
[ 7726.526925] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.526930] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.526933] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.526936] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00
[ 7726.526939] blk_update_request: I/O error, dev sdb, sector 4
[ 7726.526942] Buffer I/O error on dev sdb, logical block 1, async page read
[ 7726.527989] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.527995] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.527999] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.528003] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00
[ 7726.528018] blk_update_request: I/O error, dev sdb, sector 0
[ 7726.528022] Buffer I/O error on dev sdb, logical block 0, async page read
[ 7726.528994] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.528997] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.529000] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.529003] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 04 00 00 04 00
[ 7726.529012] blk_update_request: I/O error, dev sdb, sector 4
[ 7726.529015] Buffer I/O error on dev sdb, logical block 1, async page read
[ 7726.530057] sd 10:0:10:0: [sdb] FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[ 7726.530061] sd 10:0:10:0: [sdb] Sense Key : Not Ready [current]
[ 7726.530065] sd 10:0:10:0: [sdb] Add. Sense: Logical unit not ready, sanitize 
in progress
[ 7726.530068] sd 10:0:10:0: [sdb] CDB: Read(10) 28 00 00 00 00 00 00 00 04 00
[ 7726.530071] blk_update_request: I/O error, dev sdb, sector 0
[ 7726.530074] Buffer I/O error on dev sdb, logical block 0, async page read
[ 7726.531071] sd 10:0:10:0: 

Re: [PATCH] tcmu: fix error resetting qfull_time_out to default

2018-04-10 Thread Xiubo Li


On 2018/4/9 19:44, Prasanna Kumar Kalever wrote:

Problem:
---
$ cat /sys/kernel/config/target/core/user_0/block/attrib/qfull_time_out
-1

$ echo "-1" > /sys/kernel/config/target/core/user_0/block/attrib/qfull_time_out
-bash: echo: write error: Invalid argument

Fix:
---
This patch will help reset qfull_time_out to its default i.e. qfull_time_out=-1

Signed-off-by: Prasanna Kumar Kalever 
---
  drivers/target/target_core_user.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/target/target_core_user.c 
b/drivers/target/target_core_user.c
index 4ad89ea71a70..4f26bdc3d1dc 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -2121,6 +2121,8 @@ static ssize_t tcmu_qfull_time_out_store(struct 
config_item *item,
  
  	if (val >= 0) {

udev->qfull_time_out = val * MSEC_PER_SEC;
+   } else if (val == -1) {
+   udev->qfull_time_out = val;
} else {
printk(KERN_ERR "Invalid qfull timeout value %d\n", val);
return -EINVAL;


This looks fine to me.

@Mike, IMO the -1 could also be settable just like this case. And does 
it have any other special meaning expects to compatible to cmd_time_out ?


Thanks,



[PATCH] scsi: qla2xxx: reduce the time granularity of qla2x00_eh_wait_on_command

2018-04-10 Thread Jianchao Wang
If the cmd has not be returned after aborted by qla2x00_eh_abort,
we have to wait for it. However, the time is 1000ms at least currently.
If there are a lot cmds need to be aborted, the delay could be long
enough to lead to panic due to such as hung task, ocfs2 heartbeat,
etc, just before scsi recovery works.
Change the granularity to 1ms, even though more context switches
would be introduced, but it should be ok as it is not hot path.

Signed-off-by: Jianchao Wang 
---
 drivers/scsi/qla2xxx/qla_os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 5c5dcca4..9f52ad9 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1072,7 +1072,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct 
scsi_cmnd *cmd,
 static int
 qla2x00_eh_wait_on_command(struct scsi_cmnd *cmd)
 {
-#define ABORT_POLLING_PERIOD   1000
+#define ABORT_POLLING_PERIOD   1
 #define ABORT_WAIT_ITER((2 * 1000) / (ABORT_POLLING_PERIOD))
unsigned long wait_iter = ABORT_WAIT_ITER;
scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-- 
2.7.4



Re: [PATCH v7] scsi: new zorro_esp.c for Amiga Zorro NCR53C9x boards

2018-04-10 Thread Geert Uytterhoeven
Hi Michael,

On Tue, Apr 10, 2018 at 4:16 AM, Michael Schmitz  wrote:
> On Mon, Apr 9, 2018 at 7:50 PM, Christoph Hellwig  wrote:
>> On Sun, Apr 08, 2018 at 02:45:32PM +1200, Michael Schmitz wrote:
>>> New combined SCSI driver for all ESP based Zorro SCSI boards for
>>> m68k Amiga.
>>> + {
>>> + .id = 
>>> ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060,
>>
>> In PCI Land we've usually stopped using PCI IDs unless they are used
>> in multiple

(missing "places"?)

> Short of a complete rewrite of the Zorro driver support code to be
> closer to what PCI does, I don' see what can be done about the use of
> Zorro IDs. I don't think such a rewrite is planned in the near future,
> Geert?

I think what Christoph means is the use of the define
ZORRO_PROD_PHASE5_BLIZZARD_1230_II_FASTLANE_Z3_CYBERSCSI_CYBERSTORM060
versus hardcoded numbers, or ZORRO_ID(PHASE5, 0x0B, 0).

We have a long list of ZORRO_PROD_* definitions in
include/uapi/linux/zorro_ids.h because of historical reasons.  The list
isn't really changing (no new IDs in git history) due to almost no new
Zorro boards being made, unlike for PCI, where keeping an in-kernel list
is a lot of work, and not desirable.

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: *** SPAM *** Re: [RFC PATCH] mpt3sas: mpt3sas_scsih_enclosure_find_by_handle can be static

2018-04-10 Thread Jaco Kroon
Hi Martin, Bart,

I've not seen additional feedback on this (I may simply not be CCed).

I've applied the patch to one of our hosts where we've had endless IO
lockups (with MQ enabled the host died within a day or two, sometimes
sub one hour, without it typically ran for about two weeks).  With this
patch (on top of 4.16) we're now at four days and 17 hours, with IO
still going strong (including a mdadm reshape to add a disk, as well as
a rebuild on a drive that failed - concurrently on two different arrays,
same controller).  Very subjective, but the host also feels more
responsive under heavy IO load.

What can I do from my side (I've got some development experience) to
help push this patch forward?

Kind Regards,
Jaco


On 28/03/2018 23:54, Martin K. Petersen wrote:
> Bart,
>
>> Are you aware that if the 0-day test infrastructure suggests an improvement
>> for a patch that the patch that that improvement applies to gets ignored
>> unless either the patch is reposted with the improvement applied or that it
>> is explained why the suggested improvement is inappropriate?
> Correct. I don't apply anything that causes a 0-day warning. The patch
> will be closed with "Changes Required" status in patchwork.
>
> Always build patch submissions to linux-scsi with:
>
>   make C=1 CF="-D__CHECK_ENDIAN__"
>



Re: usercopy whitelist woe in scsi_sense_cache

2018-04-10 Thread Kees Cook
On Mon, Apr 9, 2018 at 11:35 PM, Oleksandr Natalenko
 wrote:
> Did your system hang on smartctl hammering too? Have you got some stack
> traces to compare with mine ones?

Unfortunately I only had a single hang with no dumps. I haven't been
able to reproduce it since. :(

-Kees

-- 
Kees Cook
Pixel Security


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-10 Thread Oleksandr Natalenko

Hi.

09.04.2018 22:30, Kees Cook wrote:

echo 1 | tee /sys/block/sd*/queue/nr_requests


I can't get this below "4".


Oops, yeah. It cannot be less than BLKDEV_MIN_RQ (which is 4), so it is 
enforced explicitly in queue_requests_store(). It is the same for me.



echo 1 | tee /sys/block/sd*/device/queue_depth


I've got this now too.
Ah! dm-crypt too. I'll see if I can get that added easily to my tests.
And XFS! You love your corner cases. ;)


Yeah, so far this wonderful configuration has allowed me to uncover a 
bunch of bugs, and see, we are not done yet ;).



Two other questions, since you can reproduce this easily:
- does it reproduce _without_ hardened usercopy? (I would assume yes,
but you'd just not get any warning until the hangs started.) If it
does reproduce without hardened usercopy, then a new bisect run could
narrow the search even more.


Looks like it cannot be disabled via kernel cmdline, so I have to 
re-compile the kernel, right? I can certainly do that anyway.



- does it reproduce with Linus's current tree?


Will try this too.


What would imply missing locking, yes? Yikes. But I'd expect
use-after-free or something, or bad data, not having the pointer slip
forward?


I still think this has something to do with blk-mq re-queuing capability 
and how BFQ implements it, because there are no sings of issue popping 
up with Kyber so far.



Quick update: I added dm-crypt (with XFS on top) and it hung my system
almost immediately. I got no warnings at all, though.


Did your system hang on smartctl hammering too? Have you got some stack 
traces to compare with mine ones?


Regards,
  Oleksandr


Re: [PATCH 1/1] scsi/ufs: qcom: Don't enable PHY_QCOM_UFS by default

2018-04-10 Thread Vivek Gautam



On 4/10/2018 1:39 AM, Bjorn Andersson wrote:

On Mon 09 Apr 10:38 PDT 2018, Vivek Gautam wrote:

On 4/9/2018 10:21 PM, Bjorn Andersson wrote:

On Mon 09 Apr 06:24 PDT 2018, Vivek Gautam wrote:

[..]

diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h
index 0a2c18a9771d..1388c2a2965e 100644
--- a/include/linux/phy/phy-qcom-ufs.h
+++ b/include/linux/phy/phy-qcom-ufs.h
@@ -31,8 +31,21 @@ void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy);
*/
   void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy);
+#if IS_ENABLED(CONFIG_PHY_QCOM_UFS)
   int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes);
   void ufs_qcom_phy_save_controller_version(struct phy *phy,
-   u8 major, u16 minor, u16 step);
+ u8 major, u16 minor, u16 step);
+#else
+static inline int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 
tx_lanes)
+{
+   return -ENOSYS;
+}
+
+static inline void ufs_qcom_phy_save_controller_version(struct phy *phy,
+   u8 major, u16 minor,
+   u16 step)
+{
+}
+#endif /* PHY_QCOM_UFS */

What's the timeline for getting rid of the references to these
functions? I presume that code depending on these being here will
compile but won't actually work?

Yes, these inline definitions are just to keep ufs-qcom happy with the
direct
calls that it makes to these functions.
As you would know these couple of functions are just used by the 20nm phy.
However, we don't have any platform yet in the upstream that enables this
phy.
I am hoping that we will eventually get rid of these functions when we
further
clean up ufs-qcom driver.


I see, but that means that we're calling this function with a struct phy
that might not be a struct ufs_qcom_phy and as such a defconfig with
both enabled will have undefined outcome for the migrated phys.


No, we will have to add support for separate phys as sdm845 has phy per 
each lane,

and the older struct phy will exist alongside.
We will call this function only with the older phy pointer.


In particular we do expect that the same kernel will boot on db820c and
sdm845-mtp, so we will have to enable support for the 14nm & 20nm phy
driver (and we don't want random crashes because someone happened to
enable it).


Right, so we create new struct phy while keeping older one intact to 
keep the

ufs-qcom work with both - ufs_qcom_phy and qmp_phy.
Some of the controller drivers, such as usb/dwc3/ keep support for old 
and new phys,

although there the difference is between generic phy and the usb-phy.
So, I am assuming that if we want to keep ufs-qcom on platforms using 20nm,
14nm and 10nm phys happy, we will have to keep the phys separately for 
sometime.

What do you say about it?

On db820c, we can still work with the ufs_qcom_phy.

Regards
Vivek



So either we add the 10nm support to the existing driver or I think we
should migrate, at least, the 14nm support to the QMP driver (and mark
the remaining 20nm BROKEN for now).

Regardless of the path chosen this should be cleaned up to the point
where all three phys are supported.

Regards,
Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[PATCH] Waiting for scsi_host_template release

2018-04-10 Thread Anatoliy Glagolev
Description:
SCSI mid-layer may hold references to Scsi_Host structs when
the owning module has already unloaded. Scsi_Host release path
touches scsi_host_template struct that is usually allocated
in the unloaded module's memory. That results in a crash.
To work around the problem, this change implements
scsi_host_template_release API to be called at driver unload
path to make sure all Scsi_Host structs are gone before
releasing scsi_host_template memory.

---
 drivers/scsi/hosts.c  |  2 ++
 drivers/scsi/qla2xxx/qla_os.c |  2 ++
 drivers/scsi/scsi_priv.h  |  1 +
 drivers/scsi/scsi_proc.c  | 64 +++
 include/scsi/scsi_host.h  | 17 
 5 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 82ac1cd..51c9e2b 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -353,6 +353,8 @@ static void scsi_host_dev_release(struct device *dev)
  blk_free_tags(shost->bqt);
  }

+ scsi_host_template_release(shost->hostt);
+
  kfree(shost->shost_data);

  if (parent)
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index fb8beee..e913b8b 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -270,6 +270,7 @@ struct scsi_host_template qla2xxx_driver_template = {

  .supported_mode = MODE_INITIATOR,
  .track_queue_depth = 1,
+ .wait_on_unload = 1,
 };

 static struct scsi_transport_template *qla2xxx_transport_template = NULL;
@@ -6116,6 +6117,7 @@ qla2x00_module_exit(void)
  kmem_cache_destroy(ctx_cachep);
  fc_release_transport(qla2xxx_transport_template);
  fc_release_transport(qla2xxx_transport_vport_template);
+ scsi_host_template_wait(_driver_template);
 }

 module_init(qla2x00_module_init);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 4d01cdb..c76cb6e2 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -99,6 +99,7 @@ extern struct kmem_cache *scsi_sdb_cache;
 #ifdef CONFIG_SCSI_PROC_FS
 extern void scsi_proc_hostdir_add(struct scsi_host_template *);
 extern void scsi_proc_hostdir_rm(struct scsi_host_template *);
+extern void scsi_host_template_release(struct scsi_host_template *);
 extern void scsi_proc_host_add(struct Scsi_Host *);
 extern void scsi_proc_host_rm(struct Scsi_Host *);
 extern int scsi_init_procfs(void);
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 251598e..daf2c99 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -99,15 +99,21 @@ static const struct file_operations proc_scsi_fops = {

 void scsi_proc_hostdir_add(struct scsi_host_template *sht)
 {
- if (!sht->show_info)
+ if (!sht->show_info && !sht->wait_on_unload)
  return;

  mutex_lock(_host_template_mutex);
  if (!sht->present++) {
- sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
-if (!sht->proc_dir)
- printk(KERN_ERR "%s: proc_mkdir failed for %s\n",
-__func__, sht->proc_name);
+ if (sht->show_info) {
+ sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
+ if (!sht->proc_dir)
+ printk(KERN_ERR "%s: proc_mkdir failed for %s\n",
+__func__, sht->proc_name);
+ }
+ if (sht->wait_on_unload) {
+ sema_init(>release_sem, 0);
+ sht->release_sem_waiters = 0;
+ }
  }
  mutex_unlock(_host_template_mutex);
 }
@@ -118,7 +124,7 @@ void scsi_proc_hostdir_add(struct scsi_host_template *sht)
  */
 void scsi_proc_hostdir_rm(struct scsi_host_template *sht)
 {
- if (!sht->show_info)
+ if (!sht->show_info && !sht->wait_on_unload)
  return;

  mutex_lock(_host_template_mutex);
@@ -126,9 +132,55 @@ void scsi_proc_hostdir_rm(struct scsi_host_template *sht)
  remove_proc_entry(sht->proc_name, proc_scsi);
  sht->proc_dir = NULL;
  }
+ if (sht->wait_on_unload)
+ ++sht->scsi_host_cleanups;
  mutex_unlock(_host_template_mutex);
 }

+/**
+ * scsi_host_template_release - runs when a user of scsi_host_template
+ * (like Scsi_Host) is gone.
+ * @sht: pointer to scsi_host_template
+*/
+void scsi_host_template_release(struct scsi_host_template *sht)
+{
+ int release_sem_waiters = 0;
+ struct semaphore *release_sem;
+ if (!sht->wait_on_unload)
+ return;
+
+ mutex_lock(_host_template_mutex);
+ if (!--sht->scsi_host_cleanups && !sht->present) {
+ release_sem = >release_sem;
+ release_sem_waiters = sht->release_sem_waiters;
+ sht->release_sem_waiters = 0;
+ }
+ mutex_unlock(_host_template_mutex);
+ for (; release_sem_waiters; --release_sem_waiters)
+ up(release_sem);
+}
+
+/**
+ * scsi_host_template_wait - Waits till all references to
+ * scsi_host_template are gone
+ * @sht: pointer to scsi_host_template
+*/
+void scsi_host_template_wait(struct scsi_host_template *sht)
+{
+ unsigned char present = 0;
+ struct semaphore *release_sem;
+ if (!sht->wait_on_unload)
+ return;
+ mutex_lock(_host_template_mutex);
+ present = sht->present + sht->scsi_host_cleanups;
+ if (present) {
+ ++sht->release_sem_waiters;
+ release_sem = >release_sem;
+ }
+