The branch main has been updated by imp:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d5507f9e436698ac17dc5ace7ef58493988a9b04

commit d5507f9e436698ac17dc5ace7ef58493988a9b04
Author:     Warner Losh <[email protected]>
AuthorDate: 2024-08-14 22:55:49 +0000
Commit:     Warner Losh <[email protected]>
CommitDate: 2024-08-16 02:22:18 +0000

    nvme: Separate total failures from I/O failures
    
    When it's a I/O failure, we can still send admin commands. Separate out
    the admin failures and flag them as such so that we can still send admin
    commands on half-failed drives.
    
    Fixes: 9229b3105d88 (nvme: Fail passthrough commands right away in failed 
state)
    Sponsored by: Netflix
---
 sys/amd64/conf/IOSCHED                             |    2 +
 sys/amd64/conf/MPI3MR                              |   10 +
 sys/arm64/conf/GENERIC16K                          |    4 +
 .../linuxkpi/common/include/linux/#compiler.h#     |  117 +
 sys/contrib/dev/iwlwifi/fw/api/soc.h               |   35 +
 sys/contrib/zlib/contrib/asm686/README.686         |   51 +
 sys/contrib/zlib/contrib/asm686/match.S            |  357 +
 sys/dev/ice/ice_sriov.c                            |  595 ++
 sys/dev/ice/ice_sriov.h                            |   64 +
 sys/dev/mps/mpi/mpi2_pci.h                         |  141 +
 sys/dev/nvme/nvme_ctrlr.c                          |   46 +-
 sys/dev/nvme/nvme_private.h                        |    1 +
 sys/dev/nvme/nvme_qpair.c                          |   23 +-
 sys/dev/nvme/nvme_sim.c                            |   13 +-
 sys/dev/sound/pci/aureal.c                         |  686 ++
 sys/dev/sound/pci/aureal.h                         |   99 +
 sys/dev/sound/pci/ds1-fw.h                         | 1602 ++++
 sys/dev/sound/pci/ds1.c                            | 1103 +++
 sys/dev/sound/pci/ds1.h                            |  146 +
 sys/dev/sound/pci/maestro.c                        | 2043 +++++
 sys/dev/sound/pci/maestro_reg.h                    |  381 +
 sys/kern/bsduser-syscalls.c                        | 8712 ++++++++++++++++++++
 sys/modules/sound/driver/ds1/Makefile              |    8 +
 sys/modules/sound/driver/maestro/Makefile          |    8 +
 24 files changed, 16219 insertions(+), 28 deletions(-)

diff --git a/sys/amd64/conf/IOSCHED b/sys/amd64/conf/IOSCHED
new file mode 100644
index 000000000000..e15106bc4c1f
--- /dev/null
+++ b/sys/amd64/conf/IOSCHED
@@ -0,0 +1,2 @@
+include "GENERIC"
+options CAM_IOSCHED_DYNAMIC
diff --git a/sys/amd64/conf/MPI3MR b/sys/amd64/conf/MPI3MR
new file mode 100644
index 000000000000..99e5244cb49d
--- /dev/null
+++ b/sys/amd64/conf/MPI3MR
@@ -0,0 +1,10 @@
+include GENERIC
+
+device mpi3mr
+# All the debugging options
+options DEADLKRES # Enable the deadlock resolver
+options INVARIANTS # Enable calls of extra sanity checking
+options INVARIANT_SUPPORT # Extra sanity checks of internal structures, 
required by INVARIANTS
+options QUEUE_MACRO_DEBUG_TRASH # Trash queue(2) internal pointers on 
invalidation
+options WITNESS # Enable checks to detect deadlocks and cycles
+options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
diff --git a/sys/arm64/conf/GENERIC16K b/sys/arm64/conf/GENERIC16K
new file mode 100644
index 000000000000..9bf9e2dadb08
--- /dev/null
+++ b/sys/arm64/conf/GENERIC16K
@@ -0,0 +1,4 @@
+include                "GENERIC"
+
+ident          GENERIC_16K
+
diff --git a/sys/compat/linuxkpi/common/include/linux/#compiler.h# 
b/sys/compat/linuxkpi/common/include/linux/#compiler.h#
new file mode 100644
index 000000000000..1177674aa68f
--- /dev/null
+++ b/sys/compat/linuxkpi/common/include/linux/#compiler.h#
@@ -0,0 +1,117 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
+ * Copyright (c) 2015 François Tigeot
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef        _LINUX_COMPILER_H_
+#define        _LINUX_COMPILER_H_
+
+#include <sys/cdefs.h>
+
+#define __user
+#define __kernel
+#define __safe
+#define __force
+#define __nocast
+#define __iomem
+#define __chk_user_ptr(x)              ((void)0)
+#define __chk_io_ptr(x)                        ((void)0)
+#define __builtin_warning(x, y...)     (1)
+#define __acquires(x)
+#define __releases(x)
+#define __acquire(x)                   do { } while (0)
+#define __release(x)                   do { } while (0)
+#define __cond_lock(x,c)               (c)
+#define        __bitwise
+#define __devinitdata
+#define        __deprecated
+#define __init
+#define        __initconst
+#define        __devinit
+#define        __devexit
+#define __exit
+#define        __rcu
+#define        __percpu
+#define        __weak __weak_symbol
+#define        __malloc
+#define        ___stringify(...)               #__VA_ARGS__
+#define        __stringify(...)                ___stringify(__VA_ARGS__)
+#define        __attribute_const__             __attribute__((__const__))
+#undef __always_inline
+#define        __always_inline                 inline
+#define        noinline                        __noinline
+#define        ____cacheline_aligned           __aligned(CACHE_LINE_SIZE)
+
+#define        likely(x)                       __builtin_expect(!!(x), 1)
+#define        unlikely(x)                     __builtin_expect(!!(x), 0)
+#define typeof(x)                      __typeof(x)
+
+#define        uninitialized_var(x)            x = x
+#define        __maybe_unused                  __unused
+#define        __always_unused                 __unused
+#define        __must_check                    __result_use_check
+
+#define        __printf(a,b)                   __printflike(a,b)
+
+#define        barrier()                       __asm__ __volatile__("": : 
:"memory")
+
+#if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 50000
+/* Moved from drm_os_freebsd.h */
+#define        lower_32_bits(n)                ((u32)(n))
+#define        upper_32_bits(n)                ((u32)(((n) >> 16) >> 16))
+#endif
+
+#define        ___PASTE(a,b) a##b
+#define        __PASTE(a,b) ___PASTE(a,b)
+
+#define        ACCESS_ONCE(x)                  (*(volatile __typeof(x) *)&(x))
+
+#define        WRITE_ONCE(x,v) do {            \
+       barrier();                      \
+       ACCESS_ONCE(x) = (v);           \
+       barrier();                      \
+} while (0)
+
+#define        READ_ONCE(x) ({                 \
+       __typeof(x) __var = ({          \
+               barrier();              \
+               ACCESS_ONCE(x);         \
+       });                             \
+       barrier();                      \
+       __var;                          \
+})
+
+#define        lockless_dereference(p) READ_ONCE(p)
+
+#define        _AT(T,X)        ((T)(X))
+
+#define        __same_type(a, b)       __builtin_types_compatible_p(typeof(a), 
typeof(b))
+#define        __must_be_array(a)      __same_type(a, &(a)[0])
+
+#endif /* _LINUX_COMPILER_H_ */
diff --git a/sys/contrib/dev/iwlwifi/fw/api/soc.h 
b/sys/contrib/dev/iwlwifi/fw/api/soc.h
new file mode 100644
index 000000000000..c5df1171462b
--- /dev/null
+++ b/sys/contrib/dev/iwlwifi/fw/api/soc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (C) 2012-2014, 2019-2020 Intel Corporation
+ * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
+ * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ */
+#ifndef __iwl_fw_api_soc_h__
+#define __iwl_fw_api_soc_h__
+
+#define SOC_CONFIG_CMD_FLAGS_DISCRETE          BIT(0)
+#define SOC_CONFIG_CMD_FLAGS_LOW_LATENCY       BIT(1)
+
+#define SOC_FLAGS_LTR_APPLY_DELAY_MASK         0xc
+#define SOC_FLAGS_LTR_APPLY_DELAY_NONE         0
+#define SOC_FLAGS_LTR_APPLY_DELAY_200          1
+#define SOC_FLAGS_LTR_APPLY_DELAY_2500         2
+#define SOC_FLAGS_LTR_APPLY_DELAY_1820         3
+
+/**
+ * struct iwl_soc_configuration_cmd - Set device stabilization latency
+ *
+ * @flags: soc settings flags.  In VER_1, we can only set the DISCRETE
+ *     flag, because the FW treats the whole value as an integer. In
+ *     VER_2, we can set the bits independently.
+ * @latency: time for SOC to ensure stable power & XTAL
+ */
+struct iwl_soc_configuration_cmd {
+       __le32 flags;
+       __le32 latency;
+} __packed; /*
+            * SOC_CONFIGURATION_CMD_S_VER_1 (see description above)
+            * SOC_CONFIGURATION_CMD_S_VER_2
+            */
+
+#endif /* __iwl_fw_api_soc_h__ */
diff --git a/sys/contrib/zlib/contrib/asm686/README.686 
b/sys/contrib/zlib/contrib/asm686/README.686
new file mode 100644
index 000000000000..a0bf3bea4aff
--- /dev/null
+++ b/sys/contrib/zlib/contrib/asm686/README.686
@@ -0,0 +1,51 @@
+This is a patched version of zlib, modified to use
+Pentium-Pro-optimized assembly code in the deflation algorithm. The
+files changed/added by this patch are:
+
+README.686
+match.S
+
+The speedup that this patch provides varies, depending on whether the
+compiler used to build the original version of zlib falls afoul of the
+PPro's speed traps. My own tests show a speedup of around 10-20% at
+the default compression level, and 20-30% using -9, against a version
+compiled using gcc 2.7.2.3. Your mileage may vary.
+
+Note that this code has been tailored for the PPro/PII in particular,
+and will not perform particuarly well on a Pentium.
+
+If you are using an assembler other than GNU as, you will have to
+translate match.S to use your assembler's syntax. (Have fun.)
+
+Brian Raiter
[email protected]
+April, 1998
+
+
+Added for zlib 1.1.3:
+
+The patches come from
+http://www.muppetlabs.com/~breadbox/software/assembly.html
+
+To compile zlib with this asm file, copy match.S to the zlib directory
+then do:
+
+CFLAGS="-O3 -DASMV" ./configure
+make OBJA=match.o
+
+
+Update:
+
+I've been ignoring these assembly routines for years, believing that
+gcc's generated code had caught up with it sometime around gcc 2.95
+and the major rearchitecting of the Pentium 4. However, I recently
+learned that, despite what I believed, this code still has some life
+in it. On the Pentium 4 and AMD64 chips, it continues to run about 8%
+faster than the code produced by gcc 4.1.
+
+In acknowledgement of its continuing usefulness, I've altered the
+license to match that of the rest of zlib. Share and Enjoy!
+
+Brian Raiter
[email protected]
+April, 2007
diff --git a/sys/contrib/zlib/contrib/asm686/match.S 
b/sys/contrib/zlib/contrib/asm686/match.S
new file mode 100644
index 000000000000..fa421092785d
--- /dev/null
+++ b/sys/contrib/zlib/contrib/asm686/match.S
@@ -0,0 +1,357 @@
+/* match.S -- x86 assembly version of the zlib longest_match() function.
+ * Optimized for the Intel 686 chips (PPro and later).
+ *
+ * Copyright (C) 1998, 2007 Brian Raiter <[email protected]>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#ifndef NO_UNDERLINE
+#define        match_init      _match_init
+#define        longest_match   _longest_match
+#endif
+
+#define        MAX_MATCH       (258)
+#define        MIN_MATCH       (3)
+#define        MIN_LOOKAHEAD   (MAX_MATCH + MIN_MATCH + 1)
+#define        MAX_MATCH_8     ((MAX_MATCH + 7) & ~7)
+
+/* stack frame offsets */
+
+#define        chainlenwmask           0       /* high word: current chain len 
*/
+                                       /* low word: s->wmask           */
+#define        window                  4       /* local copy of s->window      
*/
+#define        windowbestlen           8       /* s->window + bestlen          
*/
+#define        scanstart               16      /* first two bytes of string    
*/
+#define        scanend                 12      /* last two bytes of string     
*/
+#define        scanalign               20      /* dword-misalignment of string 
*/
+#define        nicematch               24      /* a good enough match size     
*/
+#define        bestlen                 28      /* size of best match so far    
*/
+#define        scan                    32      /* ptr to string wanting match  
*/
+
+#define        LocalVarsSize           (36)
+/*     saved ebx               36 */
+/*     saved edi               40 */
+/*     saved esi               44 */
+/*     saved ebp               48 */
+/*     return address          52 */
+#define        deflatestate            56      /* the function arguments       
*/
+#define        curmatch                60
+
+/* All the +zlib1222add offsets are due to the addition of fields
+ *  in zlib in the deflate_state structure since the asm code was first written
+ * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
+ * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 
0").
+ * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
+ */
+
+#define zlib1222add            (8)
+
+#define        dsWSize                 (36+zlib1222add)
+#define        dsWMask                 (44+zlib1222add)
+#define        dsWindow                (48+zlib1222add)
+#define        dsPrev                  (56+zlib1222add)
+#define        dsMatchLen              (88+zlib1222add)
+#define        dsPrevMatch             (92+zlib1222add)
+#define        dsStrStart              (100+zlib1222add)
+#define        dsMatchStart            (104+zlib1222add)
+#define        dsLookahead             (108+zlib1222add)
+#define        dsPrevLen               (112+zlib1222add)
+#define        dsMaxChainLen           (116+zlib1222add)
+#define        dsGoodMatch             (132+zlib1222add)
+#define        dsNiceMatch             (136+zlib1222add)
+
+
+.file "match.S"
+
+.globl match_init, longest_match
+
+.text
+
+/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
+.cfi_sections  .debug_frame
+
+longest_match:
+
+.cfi_startproc
+/* Save registers that the compiler may be using, and adjust %esp to   */
+/* make room for our stack frame.                                      */
+
+               pushl   %ebp
+               .cfi_def_cfa_offset 8
+               .cfi_offset ebp, -8
+               pushl   %edi
+               .cfi_def_cfa_offset 12
+               pushl   %esi
+               .cfi_def_cfa_offset 16
+               pushl   %ebx
+               .cfi_def_cfa_offset 20
+               subl    $LocalVarsSize, %esp
+               .cfi_def_cfa_offset LocalVarsSize+20
+
+/* Retrieve the function arguments. %ecx will hold cur_match           */
+/* throughout the entire function. %edx will hold the pointer to the   */
+/* deflate_state structure during the function's setup (before         */
+/* entering the main loop).                                            */
+
+               movl    deflatestate(%esp), %edx
+               movl    curmatch(%esp), %ecx
+
+/* uInt wmask = s->w_mask;                                             */
+/* unsigned chain_length = s->max_chain_length;                                
*/
+/* if (s->prev_length >= s->good_match) {                              */
+/*     chain_length >>= 2;                                             */
+/* }                                                                   */
+ 
+               movl    dsPrevLen(%edx), %eax
+               movl    dsGoodMatch(%edx), %ebx
+               cmpl    %ebx, %eax
+               movl    dsWMask(%edx), %eax
+               movl    dsMaxChainLen(%edx), %ebx
+               jl      LastMatchGood
+               shrl    $2, %ebx
+LastMatchGood:
+
+/* chainlen is decremented once beforehand so that the function can    */
+/* use the sign flag instead of the zero flag for the exit test.       */
+/* It is then shifted into the high word, to make room for the wmask   */
+/* value, which it will always accompany.                              */
+
+               decl    %ebx
+               shll    $16, %ebx
+               orl     %eax, %ebx
+               movl    %ebx, chainlenwmask(%esp)
+
+/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;     */
+
+               movl    dsNiceMatch(%edx), %eax
+               movl    dsLookahead(%edx), %ebx
+               cmpl    %eax, %ebx
+               jl      LookaheadLess
+               movl    %eax, %ebx
+LookaheadLess: movl    %ebx, nicematch(%esp)
+
+/* register Bytef *scan = s->window + s->strstart;                     */
+
+               movl    dsWindow(%edx), %esi
+               movl    %esi, window(%esp)
+               movl    dsStrStart(%edx), %ebp
+               lea     (%esi,%ebp), %edi
+               movl    %edi, scan(%esp)
+
+/* Determine how many bytes the scan ptr is off from being             */
+/* dword-aligned.                                                      */
+
+               movl    %edi, %eax
+               negl    %eax
+               andl    $3, %eax
+               movl    %eax, scanalign(%esp)
+
+/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ?                      */
+/*     s->strstart - (IPos)MAX_DIST(s) : NIL;                          */
+
+               movl    dsWSize(%edx), %eax
+               subl    $MIN_LOOKAHEAD, %eax
+               subl    %eax, %ebp
+               jg      LimitPositive
+               xorl    %ebp, %ebp
+LimitPositive:
+
+/* int best_len = s->prev_length;                                      */
+
+               movl    dsPrevLen(%edx), %eax
+               movl    %eax, bestlen(%esp)
+
+/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
+
+               addl    %eax, %esi
+               movl    %esi, windowbestlen(%esp)
+
+/* register ush scan_start = *(ushf*)scan;                             */
+/* register ush scan_end   = *(ushf*)(scan+best_len-1);                        
*/
+/* Posf *prev = s->prev;                                               */
+
+               movzwl  (%edi), %ebx
+               movl    %ebx, scanstart(%esp)
+               movzwl  -1(%edi,%eax), %ebx
+               movl    %ebx, scanend(%esp)
+               movl    dsPrev(%edx), %edi
+
+/* Jump into the main loop.                                            */
+
+               movl    chainlenwmask(%esp), %edx
+               jmp     LoopEntry
+
+.balign 16
+
+/* do {
+ *     match = s->window + cur_match;
+ *     if (*(ushf*)(match+best_len-1) != scan_end ||
+ *         *(ushf*)match != scan_start) continue;
+ *     [...]
+ * } while ((cur_match = prev[cur_match & wmask]) > limit
+ *          && --chain_length != 0);
+ *
+ * Here is the inner loop of the function. The function will spend the
+ * majority of its time in this loop, and majority of that time will
+ * be spent in the first ten instructions.
+ *
+ * Within this loop:
+ * %ebx = scanend
+ * %ecx = curmatch
+ * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
+ * %esi = windowbestlen - i.e., (window + bestlen)
+ * %edi = prev
+ * %ebp = limit
+ */
+LookupLoop:
+               andl    %edx, %ecx
+               movzwl  (%edi,%ecx,2), %ecx
+               cmpl    %ebp, %ecx
+               jbe     LeaveNow
+               subl    $0x00010000, %edx
+               js      LeaveNow
+LoopEntry:     movzwl  -1(%esi,%ecx), %eax
+               cmpl    %ebx, %eax
+               jnz     LookupLoop
+               movl    window(%esp), %eax
+               movzwl  (%eax,%ecx), %eax
+               cmpl    scanstart(%esp), %eax
+               jnz     LookupLoop
+
+/* Store the current value of chainlen.                                        
*/
+
+               movl    %edx, chainlenwmask(%esp)
+
+/* Point %edi to the string under scrutiny, and %esi to the string we  */
+/* are hoping to match it up with. In actuality, %esi and %edi are     */
+/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is     */
+/* initialized to -(MAX_MATCH_8 - scanalign).                          */
+
+               movl    window(%esp), %esi
+               movl    scan(%esp), %edi
+               addl    %ecx, %esi
+               movl    scanalign(%esp), %eax
+               movl    $(-MAX_MATCH_8), %edx
+               lea     MAX_MATCH_8(%edi,%eax), %edi
+               lea     MAX_MATCH_8(%esi,%eax), %esi
+
+/* Test the strings for equality, 8 bytes at a time. At the end,
+ * adjust %edx so that it is offset to the exact byte that mismatched.
+ *
+ * We already know at this point that the first three bytes of the
+ * strings match each other, and they can be safely passed over before
+ * starting the compare loop. So what this code does is skip over 0-3
+ * bytes, as much as necessary in order to dword-align the %edi
+ * pointer. (%esi will still be misaligned three times out of four.)
+ *
+ * It should be confessed that this loop usually does not represent
+ * much of the total running time. Replacing it with a more
+ * straightforward "rep cmpsb" would not drastically degrade
+ * performance.
+ */
+LoopCmps:
+               movl    (%esi,%edx), %eax
+               xorl    (%edi,%edx), %eax
+               jnz     LeaveLoopCmps
+               movl    4(%esi,%edx), %eax
+               xorl    4(%edi,%edx), %eax
+               jnz     LeaveLoopCmps4
+               addl    $8, %edx
+               jnz     LoopCmps
+               jmp     LenMaximum
+LeaveLoopCmps4:        addl    $4, %edx
+LeaveLoopCmps: testl   $0x0000FFFF, %eax
+               jnz     LenLower
+               addl    $2, %edx
+               shrl    $16, %eax
+LenLower:      subb    $1, %al
+               adcl    $0, %edx
+
+/* Calculate the length of the match. If it is longer than MAX_MATCH,  */
+/* then automatically accept it as the best possible match and leave.  */
+
+               lea     (%edi,%edx), %eax
+               movl    scan(%esp), %edi
+               subl    %edi, %eax
+               cmpl    $MAX_MATCH, %eax
+               jge     LenMaximum
+
+/* If the length of the match is not longer than the best match we     */
+/* have so far, then forget it and return to the lookup loop.          */
+
+               movl    deflatestate(%esp), %edx
+               movl    bestlen(%esp), %ebx
+               cmpl    %ebx, %eax
+               jg      LongerMatch
+               movl    windowbestlen(%esp), %esi
+               movl    dsPrev(%edx), %edi
+               movl    scanend(%esp), %ebx
+               movl    chainlenwmask(%esp), %edx
+               jmp     LookupLoop
+
+/*         s->match_start = cur_match;                                 */
+/*         best_len = len;                                             */
+/*         if (len >= nice_match) break;                               */
+/*         scan_end = *(ushf*)(scan+best_len-1);                       */
+
+LongerMatch:   movl    nicematch(%esp), %ebx
+               movl    %eax, bestlen(%esp)
+               movl    %ecx, dsMatchStart(%edx)
+               cmpl    %ebx, %eax
+               jge     LeaveNow
+               movl    window(%esp), %esi
+               addl    %eax, %esi
+               movl    %esi, windowbestlen(%esp)
+               movzwl  -1(%edi,%eax), %ebx
+               movl    dsPrev(%edx), %edi
+               movl    %ebx, scanend(%esp)
+               movl    chainlenwmask(%esp), %edx
+               jmp     LookupLoop
+
+/* Accept the current string, with the maximum possible length.                
*/
+
+LenMaximum:    movl    deflatestate(%esp), %edx
+               movl    $MAX_MATCH, bestlen(%esp)
+               movl    %ecx, dsMatchStart(%edx)
+
+/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len;          */
+/* return s->lookahead;                                                        
*/
+
+LeaveNow:
+               movl    deflatestate(%esp), %edx
+               movl    bestlen(%esp), %ebx
+               movl    dsLookahead(%edx), %eax
+               cmpl    %eax, %ebx
+               jg      LookaheadRet
+               movl    %ebx, %eax
+LookaheadRet:
+
+/* Restore the stack and return from whence we came.                   */
+
+               addl    $LocalVarsSize, %esp
+               .cfi_def_cfa_offset 20
+               popl    %ebx
+               .cfi_def_cfa_offset 16
+               popl    %esi
+               .cfi_def_cfa_offset 12
+               popl    %edi
+               .cfi_def_cfa_offset 8
+               popl    %ebp
+               .cfi_def_cfa_offset 4
+.cfi_endproc
+match_init:    ret
diff --git a/sys/dev/ice/ice_sriov.c b/sys/dev/ice/ice_sriov.c
new file mode 100644
index 000000000000..c0521e667fa2
--- /dev/null
+++ b/sys/dev/ice/ice_sriov.c
@@ -0,0 +1,595 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*  Copyright (c) 2021, Intel Corporation
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice,
+ *      this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ *   3. Neither the name of the Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived from
+ *      this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ice_common.h"
+#include "ice_sriov.h"
+
+/**
+ * ice_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: VF ID to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to VF driver (0x0802) using mailbox
+ * queue and asynchronously sending message via
+ * ice_sq_send_cmd() function
+ */
+enum ice_status
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+                     u8 *msg, u16 msglen, struct ice_sq_cd *cd)
+{
+       struct ice_aqc_pf_vf_msg *cmd;
+       struct ice_aq_desc desc;
+
+       ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf);
+
+       cmd = &desc.params.virt;
+       cmd->id = CPU_TO_LE32(vfid);
+
+       desc.cookie_high = CPU_TO_LE32(v_opcode);
+       desc.cookie_low = CPU_TO_LE32(v_retval);
+
+       if (msglen)
+               desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
+
+       return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+/**
+ * ice_aq_send_msg_to_pf
+ * @hw: pointer to the hardware structure
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to PF driver using mailbox queue. By default, this
+ * message is sent asynchronously, i.e. ice_sq_send_cmd()
+ * does not wait for completion before returning.
+ */
+enum ice_status
+ice_aq_send_msg_to_pf(struct ice_hw *hw, enum virtchnl_ops v_opcode,
+                     enum ice_status v_retval, u8 *msg, u16 msglen,
+                     struct ice_sq_cd *cd)
+{
+       struct ice_aq_desc desc;
+
+       ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_pf);
+       desc.cookie_high = CPU_TO_LE32(v_opcode);
+       desc.cookie_low = CPU_TO_LE32(v_retval);
+
+       if (msglen)
+               desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
+
+       return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+/**
+ * ice_conv_link_speed_to_virtchnl
+ * @adv_link_support: determines the format of the returned link speed
+ * @link_speed: variable containing the link_speed to be converted
+ *
+ * Convert link speed supported by HW to link speed supported by virtchnl.
+ * If adv_link_support is true, then return link speed in Mbps. Else return
+ * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller
+ * needs to cast back to an enum virtchnl_link_speed in the case where
+ * adv_link_support is false, but when adv_link_support is true the caller can
+ * expect the speed in Mbps.
+ */
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
+{
+       u32 speed;
+
+       if (adv_link_support)
+               switch (link_speed) {
+               case ICE_AQ_LINK_SPEED_10MB:
+                       speed = ICE_LINK_SPEED_10MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_100MB:
+                       speed = ICE_LINK_SPEED_100MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_1000MB:
+                       speed = ICE_LINK_SPEED_1000MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_2500MB:
+                       speed = ICE_LINK_SPEED_2500MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_5GB:
+                       speed = ICE_LINK_SPEED_5000MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_10GB:
+                       speed = ICE_LINK_SPEED_10000MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_20GB:
+                       speed = ICE_LINK_SPEED_20000MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_25GB:
+                       speed = ICE_LINK_SPEED_25000MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_40GB:
+                       speed = ICE_LINK_SPEED_40000MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_50GB:
+                       speed = ICE_LINK_SPEED_50000MBPS;
+                       break;
+               case ICE_AQ_LINK_SPEED_100GB:
+                       speed = ICE_LINK_SPEED_100000MBPS;
+                       break;
+               default:
+                       speed = ICE_LINK_SPEED_UNKNOWN;
+                       break;
+               }
+       else
+               /* Virtchnl speeds are not defined for every speed supported in
+                * the hardware. To maintain compatibility with older AVF
+                * drivers, while reporting the speed the new speed values are
+                * resolved to the closest known virtchnl speeds
+                */
+               switch (link_speed) {
+               case ICE_AQ_LINK_SPEED_10MB:
+               case ICE_AQ_LINK_SPEED_100MB:
+                       speed = (u32)VIRTCHNL_LINK_SPEED_100MB;
+                       break;
+               case ICE_AQ_LINK_SPEED_1000MB:
+               case ICE_AQ_LINK_SPEED_2500MB:
+               case ICE_AQ_LINK_SPEED_5GB:
+                       speed = (u32)VIRTCHNL_LINK_SPEED_1GB;
+                       break;
+               case ICE_AQ_LINK_SPEED_10GB:
+                       speed = (u32)VIRTCHNL_LINK_SPEED_10GB;
+                       break;
+               case ICE_AQ_LINK_SPEED_20GB:
+                       speed = (u32)VIRTCHNL_LINK_SPEED_20GB;
+                       break;
+               case ICE_AQ_LINK_SPEED_25GB:
+                       speed = (u32)VIRTCHNL_LINK_SPEED_25GB;
+                       break;
+               case ICE_AQ_LINK_SPEED_40GB:
+               case ICE_AQ_LINK_SPEED_50GB:
+               case ICE_AQ_LINK_SPEED_100GB:
+                       speed = (u32)VIRTCHNL_LINK_SPEED_40GB;
+                       break;
+               default:
+                       speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN;
+                       break;
+               }
+
+       return speed;
+}
+
+/* The mailbox overflow detection algorithm helps to check if there
+ * is a possibility of a malicious VF transmitting too many MBX messages to the
+ * PF.
+ * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during
+ * driver initialization in ice_init_hw() using ice_mbx_init_snapshot().
+ * The struct ice_mbx_snapshot helps to track and traverse a static window of
+ * messages within the mailbox queue while looking for a malicious VF.
+ *
+ * 2. When the caller starts processing its mailbox queue in response to an
+ * interrupt, the structure ice_mbx_snapshot is expected to be cleared before
+ * the algorithm can be run for the first time for that interrupt. This can be
+ * done via ice_mbx_reset_snapshot().
+ *
+ * 3. For every message read by the caller from the MBX Queue, the caller must
+ * call the detection algorithm's entry function ice_mbx_vf_state_handler().
+ * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is
+ * filled as it is required to be passed to the algorithm.
+ *
+ * 4. Every time a message is read from the MBX queue, a VFId is received which
+ * is passed to the state handler. The boolean output is_malvf of the state
+ * handler ice_mbx_vf_state_handler() serves as an indicator to the caller
+ * whether this VF is malicious or not.
+ *
+ * 5. When a VF is identified to be malicious, the caller can send a message
+ * to the system administrator. The caller can invoke ice_mbx_report_malvf()
+ * to help determine if a malicious VF is to be reported or not. This function
+ * requires the caller to maintain a global bitmap to track all malicious VFs
+ * and pass that to ice_mbx_report_malvf() along with the VFID which was 
identified
+ * to be malicious by ice_mbx_vf_state_handler().
+ *
+ * 6. The global bitmap maintained by PF can be cleared completely if PF is in
+ * reset or the bit corresponding to a VF can be cleared if that VF is in 
reset.
+ * When a VF is shut down and brought back up, we assume that the new VF
+ * brought up is not malicious and hence report it if found malicious.
+ *
+ * 7. The function ice_mbx_reset_snapshot() is called to reset the information
+ * in ice_mbx_snapshot for every new mailbox interrupt handled.
+ *
+ * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated
+ * when driver is unloaded.
+ */
+#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M)
+/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that
+ * the max messages check must be ignored in the algorithm
+ */
+#define ICE_IGNORE_MAX_MSG_CNT 0xFFFF
+
+/**
+ * ice_mbx_traverse - Pass through mailbox snapshot
+ * @hw: pointer to the HW struct
+ * @new_state: new algorithm state
+ *
+ * Traversing the mailbox static snapshot without checking
+ * for malicious VFs.
+ */
+static void
+ice_mbx_traverse(struct ice_hw *hw,
+                enum ice_mbx_snapshot_state *new_state)
+{
+       struct ice_mbx_snap_buffer_data *snap_buf;
+       u32 num_iterations;
+
+       snap_buf = &hw->mbx_snapshot.mbx_buf;
+
+       /* As mailbox buffer is circular, applying a mask
+        * on the incremented iteration count.
+        */
+       num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations);
+
+       /* Checking either of the below conditions to exit snapshot traversal:
+        * Condition-1: If the number of iterations in the mailbox is equal to
+        * the mailbox head which would indicate that we have reached the end
+        * of the static snapshot.
+        * Condition-2: If the maximum messages serviced in the mailbox for a
+        * given interrupt is the highest possible value then there is no need
+        * to check if the number of messages processed is equal to it. If not
+        * check if the number of messages processed is greater than or equal
+        * to the maximum number of mailbox entries serviced in current work 
item.
+        */
+       if (num_iterations == snap_buf->head ||
+           (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT &&
+            ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx))
+               *new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+}
+
+/**
+ * ice_mbx_detect_malvf - Detect malicious VF in snapshot
+ * @hw: pointer to the HW struct
+ * @vf_id: relative virtual function ID
+ * @new_state: new algorithm state
+ * @is_malvf: boolean output to indicate if VF is malicious
+ *
+ * This function tracks the number of asynchronous messages
+ * sent per VF and marks the VF as malicious if it exceeds
+ * the permissible number of messages to send.
+ */
+static enum ice_status
+ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id,
+                    enum ice_mbx_snapshot_state *new_state,
+                    bool *is_malvf)
+{
+       struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+       if (vf_id >= snap->mbx_vf.vfcntr_len)
+               return ICE_ERR_OUT_OF_RANGE;
+
+       /* increment the message count in the VF array */
+       snap->mbx_vf.vf_cntr[vf_id]++;
+
+       if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD)
+               *is_malvf = true;
+
+       /* continue to iterate through the mailbox snapshot */
+       ice_mbx_traverse(hw, new_state);
+
+       return ICE_SUCCESS;
+}
+
+/**
+ * ice_mbx_reset_snapshot - Reset mailbox snapshot structure
+ * @snap: pointer to mailbox snapshot structure in the ice_hw struct
+ *
+ * Reset the mailbox snapshot structure and clear VF counter array.
+ */
+static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
*** 15611 LINES SKIPPED ***

Reply via email to