commit: 95a4b9c4036d7f10bd7b559ddb7326f70b142cc5 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Thu Aug 11 12:33:42 2022 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Thu Aug 11 12:33:42 2022 +0000 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=95a4b9c4
Linux patch 5.15.60 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> 0000_README | 4 + 1059_linux-5.15.60.patch | 1475 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1479 insertions(+) diff --git a/0000_README b/0000_README index 71e13cee..a6bf4af4 100644 --- a/0000_README +++ b/0000_README @@ -279,6 +279,10 @@ Patch: 1058_linux-5.15.59.patch From: http://www.kernel.org Desc: Linux 5.15.59 +Patch: 1059_linux-5.15.60.patch +From: http://www.kernel.org +Desc: Linux 5.15.60 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1059_linux-5.15.60.patch b/1059_linux-5.15.60.patch new file mode 100644 index 00000000..9afdb82b --- /dev/null +++ b/1059_linux-5.15.60.patch @@ -0,0 +1,1475 @@ +diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst +index 6bd97cd50d625..7e061ed449aaa 100644 +--- a/Documentation/admin-guide/hw-vuln/spectre.rst ++++ b/Documentation/admin-guide/hw-vuln/spectre.rst +@@ -422,6 +422,14 @@ The possible values in this file are: + 'RSB filling' Protection of RSB on context switch enabled + ============= =========================================== + ++ - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status: ++ ++ =========================== ======================================================= ++ 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled ++ 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable ++ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB ++ =========================== ======================================================= ++ + Full mitigation might require a microcode update from the CPU + vendor. When the necessary microcode is not available, the kernel will + report vulnerability. +diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml +index fbdc2083bec4f..20ee96584aba2 100644 +--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml ++++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml +@@ -23,6 +23,7 @@ properties: + - brcm,bcm4345c5 + - brcm,bcm43540-bt + - brcm,bcm4335a0 ++ - brcm,bcm4349-bt + + shutdown-gpios: + maxItems: 1 +diff --git a/Makefile b/Makefile +index 22bca3948306b..4ea646f496c9c 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 15 +-SUBLEVEL = 59 ++SUBLEVEL = 60 + EXTRAVERSION = + NAME = Trick or Treat + +diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c +index 9c3d86e397bf3..1fae18ba11ed1 100644 +--- a/arch/arm64/crypto/poly1305-glue.c ++++ b/arch/arm64/crypto/poly1305-glue.c +@@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, + { + if (unlikely(!dctx->sset)) { + if (!dctx->rset) { +- poly1305_init_arch(dctx, src); ++ poly1305_init_arm64(&dctx->h, src); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->rset = 1; +diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h +index 96dc0f7da258d..a971d462f531c 100644 +--- a/arch/arm64/include/asm/kernel-pgtable.h ++++ b/arch/arm64/include/asm/kernel-pgtable.h +@@ -103,8 +103,8 @@ + /* + * Initial memory map attributes. + */ +-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) ++#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) ++#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN) + + #if ARM64_KERNEL_USES_PMD_MAPS + #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S +index 17962452e31de..ab6566bf1c332 100644 +--- a/arch/arm64/kernel/head.S ++++ b/arch/arm64/kernel/head.S +@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) + subs x1, x1, #64 + b.ne 1b + +- mov x7, SWAPPER_MM_MMUFLAGS ++ mov_q x7, SWAPPER_MM_MMUFLAGS + + /* + * Create the identity mapping. +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index a170cfdae2a7a..fe6981a387957 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -2427,7 +2427,7 @@ config RETPOLINE + config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK +- default y ++ default y if X86_64 + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. +@@ -2436,21 +2436,21 @@ config RETHUNK + + config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" +- depends on CPU_SUP_AMD && RETHUNK ++ depends on CPU_SUP_AMD && RETHUNK && X86_64 + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + + config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" +- depends on CPU_SUP_AMD ++ depends on CPU_SUP_AMD && X86_64 + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + + config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" +- depends on CPU_SUP_INTEL ++ depends on CPU_SUP_INTEL && X86_64 + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index d370718e222ba..be744fa100048 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -301,6 +301,7 @@ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ + #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ + #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +@@ -446,5 +447,6 @@ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ + #define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ ++#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 49d814b2a341a..a35f5e23fc2ac 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -642,6 +642,7 @@ struct kvm_vcpu_arch { + u64 ia32_misc_enable_msr; + u64 smbase; + u64 smi_count; ++ bool at_instruction_boundary; + bool tpr_access_reporting; + bool xsaves_enabled; + u64 ia32_xss; +@@ -1271,6 +1272,8 @@ struct kvm_vcpu_stat { + u64 nested_run; + u64 directed_yield_attempted; + u64 directed_yield_successful; ++ u64 preemption_reported; ++ u64 preemption_other; + u64 guest_mode; + }; + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index ec2967e7249f5..8f38265bc81dc 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -148,6 +148,10 @@ + * are restricted to targets in + * kernel. + */ ++#define ARCH_CAP_PBRSB_NO BIT(24) /* ++ * Not susceptible to Post-Barrier ++ * Return Stack Buffer Predictions. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 9a79b96e55214..6a59b2d58a3a9 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -60,7 +60,9 @@ + 774: \ + add $(BITS_PER_LONG/8) * 2, sp; \ + dec reg; \ +- jnz 771b; ++ jnz 771b; \ ++ /* barrier for jnz misprediction */ \ ++ lfence; + + #ifdef __ASSEMBLY__ + +@@ -118,13 +120,28 @@ + #endif + .endm + ++.macro ISSUE_UNBALANCED_RET_GUARD ++ ANNOTATE_INTRA_FUNCTION_CALL ++ call .Lunbalanced_ret_guard_\@ ++ int3 ++.Lunbalanced_ret_guard_\@: ++ add $(BITS_PER_LONG/8), %_ASM_SP ++ lfence ++.endm ++ + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 ++.ifb \ftr2 + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr ++.else ++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 ++.endif + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) ++.Lunbalanced_\@: ++ ISSUE_UNBALANCED_RET_GUARD + .Lskip_rsb_\@: + .endm + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index a37814c8547e4..837e617f3b76d 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1328,6 +1328,53 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) + } + } + ++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) ++{ ++ /* ++ * Similar to context switches, there are two types of RSB attacks ++ * after VM exit: ++ * ++ * 1) RSB underflow ++ * ++ * 2) Poisoned RSB entry ++ * ++ * When retpoline is enabled, both are mitigated by filling/clearing ++ * the RSB. ++ * ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch ++ * prediction isolation protections, RSB still needs to be cleared ++ * because of #2. Note that SMEP provides no protection here, unlike ++ * user-space-poisoned RSB entries. ++ * ++ * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB ++ * bug is present then a LITE version of RSB protection is required, ++ * just a single call needs to retire before a RET is executed. ++ */ ++ switch (mode) { ++ case SPECTRE_V2_NONE: ++ return; ++ ++ case SPECTRE_V2_EIBRS_LFENCE: ++ case SPECTRE_V2_EIBRS: ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); ++ pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); ++ } ++ return; ++ ++ case SPECTRE_V2_EIBRS_RETPOLINE: ++ case SPECTRE_V2_RETPOLINE: ++ case SPECTRE_V2_LFENCE: ++ case SPECTRE_V2_IBRS: ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); ++ return; ++ } ++ ++ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); ++ dump_stack(); ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -1478,28 +1525,7 @@ static void __init spectre_v2_select_mitigation(void) + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + +- /* +- * Similar to context switches, there are two types of RSB attacks +- * after vmexit: +- * +- * 1) RSB underflow +- * +- * 2) Poisoned RSB entry +- * +- * When retpoline is enabled, both are mitigated by filling/clearing +- * the RSB. +- * +- * When IBRS is enabled, while #1 would be mitigated by the IBRS branch +- * prediction isolation protections, RSB still needs to be cleared +- * because of #2. Note that SMEP provides no protection here, unlike +- * user-space-poisoned RSB entries. +- * +- * eIBRS, on the other hand, has RSB-poisoning protections, so it +- * doesn't need RSB clearing after vmexit. +- */ +- if (boot_cpu_has(X86_FEATURE_RETPOLINE) || +- boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) +- setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + + /* + * Retpoline protects the kernel, but doesn't protect firmware. IBRS +@@ -2285,6 +2311,19 @@ static char *ibpb_state(void) + return ""; + } + ++static char *pbrsb_eibrs_state(void) ++{ ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { ++ if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || ++ boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) ++ return ", PBRSB-eIBRS: SW sequence"; ++ else ++ return ", PBRSB-eIBRS: Vulnerable"; ++ } else { ++ return ", PBRSB-eIBRS: Not affected"; ++ } ++} ++ + static ssize_t spectre_v2_show_state(char *buf) + { + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) +@@ -2297,12 +2336,13 @@ static ssize_t spectre_v2_show_state(char *buf) + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + +- return sprintf(buf, "%s%s%s%s%s%s\n", ++ return sprintf(buf, "%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", ++ pbrsb_eibrs_state(), + spectre_v2_module_string()); + } + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 80cc41f797830..4a538ec413b8b 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1027,6 +1027,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) + #define NO_SWAPGS BIT(6) + #define NO_ITLB_MULTIHIT BIT(7) + #define NO_SPECTRE_V2 BIT(8) ++#define NO_EIBRS_PBRSB BIT(9) + + #define VULNWL(vendor, family, model, whitelist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) +@@ -1067,7 +1068,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously +@@ -1077,7 +1078,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + * good enough for our purposes. + */ + +- VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), ++ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), ++ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +@@ -1255,6 +1258,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } + ++ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && ++ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && ++ !(ia32_cap & ARCH_CAP_PBRSB_NO)) ++ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); ++ + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c +index 4a4dc105552e3..86f3096f042f7 100644 +--- a/arch/x86/kvm/svm/sev.c ++++ b/arch/x86/kvm/svm/sev.c +@@ -832,7 +832,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, + + /* If source buffer is not aligned then use an intermediate buffer */ + if (!IS_ALIGNED((unsigned long)vaddr, 16)) { +- src_tpage = alloc_page(GFP_KERNEL); ++ src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); + if (!src_tpage) + return -ENOMEM; + +@@ -853,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, + if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { + int dst_offset; + +- dst_tpage = alloc_page(GFP_KERNEL); ++ dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); + if (!dst_tpage) { + ret = -ENOMEM; + goto e_free; +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index 26f2da1590eda..5b51156712f74 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -4263,6 +4263,8 @@ out: + + static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) + { ++ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR) ++ vcpu->arch.at_instruction_boundary = true; + } + + static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index 857fa0fc49faf..982138bebb70f 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -197,11 +197,13 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't +- * need the RSB filling sequence. But it does need to be enabled +- * before the first unbalanced RET. ++ * need the RSB filling sequence. But it does need to be enabled, and a ++ * single call to retire, before the first unbalanced RET. + */ + +- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ ++ X86_FEATURE_RSB_VMEXIT_LITE ++ + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index a236104fc7439..359292767e177 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6471,6 +6471,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) + return; + + handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); ++ vcpu->arch.at_instruction_boundary = true; + } + + static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index bd410926fda59..8a6ee5d8adc74 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -277,6 +277,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { + STATS_DESC_COUNTER(VCPU, nested_run), + STATS_DESC_COUNTER(VCPU, directed_yield_attempted), + STATS_DESC_COUNTER(VCPU, directed_yield_successful), ++ STATS_DESC_COUNTER(VCPU, preemption_reported), ++ STATS_DESC_COUNTER(VCPU, preemption_other), + STATS_DESC_ICOUNTER(VCPU, guest_mode) + }; + +@@ -4371,6 +4373,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) + struct kvm_memslots *slots; + static const u8 preempted = KVM_VCPU_PREEMPTED; + ++ /* ++ * The vCPU can be marked preempted if and only if the VM-Exit was on ++ * an instruction boundary and will not trigger guest emulation of any ++ * kind (see vcpu_run). Vendor specific code controls (conservatively) ++ * when this is true, for example allowing the vCPU to be marked ++ * preempted if and only if the VM-Exit was due to a host interrupt. ++ */ ++ if (!vcpu->arch.at_instruction_boundary) { ++ vcpu->stat.preemption_other++; ++ return; ++ } ++ ++ vcpu->stat.preemption_reported++; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + +@@ -4400,19 +4415,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) + { + int idx; + +- if (vcpu->preempted && !vcpu->arch.guest_state_protected) +- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); ++ if (vcpu->preempted) { ++ if (!vcpu->arch.guest_state_protected) ++ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); + +- /* +- * Take the srcu lock as memslots will be accessed to check the gfn +- * cache generation against the memslots generation. +- */ +- idx = srcu_read_lock(&vcpu->kvm->srcu); +- if (kvm_xen_msr_enabled(vcpu->kvm)) +- kvm_xen_runstate_set_preempted(vcpu); +- else +- kvm_steal_time_set_preempted(vcpu); +- srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ /* ++ * Take the srcu lock as memslots will be accessed to check the gfn ++ * cache generation against the memslots generation. ++ */ ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ if (kvm_xen_msr_enabled(vcpu->kvm)) ++ kvm_xen_runstate_set_preempted(vcpu); ++ else ++ kvm_steal_time_set_preempted(vcpu); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ } + + static_call(kvm_x86_vcpu_put)(vcpu); + vcpu->arch.last_host_tsc = rdtsc(); +@@ -9934,6 +9951,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) + vcpu->arch.l1tf_flush_l1d = true; + + for (;;) { ++ /* ++ * If another guest vCPU requests a PV TLB flush in the middle ++ * of instruction emulation, the rest of the emulation could ++ * use a stale page translation. Assume that any code after ++ * this point can start executing an instruction. ++ */ ++ vcpu->arch.at_instruction_boundary = false; + if (kvm_vcpu_running(vcpu)) { + r = vcpu_enter_guest(vcpu); + } else { +diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h +index cc0cf5f37450b..a7693a286e401 100644 +--- a/arch/x86/kvm/xen.h ++++ b/arch/x86/kvm/xen.h +@@ -97,8 +97,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) + * behalf of the vCPU. Only if the VMM does actually block + * does it need to enter RUNSTATE_blocked. + */ +- if (vcpu->preempted) +- kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); ++ if (WARN_ON_ONCE(!vcpu->preempted)) ++ return; ++ ++ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); + } + + /* 32-bit compatibility definitions, also used natively in 32-bit build */ +diff --git a/block/blk-ioc.c b/block/blk-ioc.c +index 57299f860d41e..90c05971f71e0 100644 +--- a/block/blk-ioc.c ++++ b/block/blk-ioc.c +@@ -265,6 +265,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node) + INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC); + INIT_HLIST_HEAD(&ioc->icq_list); + INIT_WORK(&ioc->release_work, ioc_release_fn); ++ ioc->ioprio = IOPRIO_DEFAULT; + + /* + * Try to install. ioc shouldn't be installed if someone else +diff --git a/block/ioprio.c b/block/ioprio.c +index 6f01d35a5145a..6c830154856fc 100644 +--- a/block/ioprio.c ++++ b/block/ioprio.c +@@ -189,9 +189,9 @@ out: + int ioprio_best(unsigned short aprio, unsigned short bprio) + { + if (!ioprio_valid(aprio)) +- aprio = IOPRIO_DEFAULT; ++ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); + if (!ioprio_valid(bprio)) +- bprio = IOPRIO_DEFAULT; ++ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); + + return min(aprio, bprio); + } +diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c +index 598fd19b65fa4..45973aa6e06d4 100644 +--- a/drivers/acpi/apei/bert.c ++++ b/drivers/acpi/apei/bert.c +@@ -29,16 +29,26 @@ + + #undef pr_fmt + #define pr_fmt(fmt) "BERT: " fmt ++ ++#define ACPI_BERT_PRINT_MAX_RECORDS 5 + #define ACPI_BERT_PRINT_MAX_LEN 1024 + + static int bert_disable; + ++/* ++ * Print "all" the error records in the BERT table, but avoid huge spam to ++ * the console if the BIOS included oversize records, or too many records. ++ * Skipping some records here does not lose anything because the full ++ * data is available to user tools in: ++ * /sys/firmware/acpi/tables/data/BERT ++ */ + static void __init bert_print_all(struct acpi_bert_region *region, + unsigned int region_len) + { + struct acpi_hest_generic_status *estatus = + (struct acpi_hest_generic_status *)region; + int remain = region_len; ++ int printed = 0, skipped = 0; + u32 estatus_len; + + while (remain >= sizeof(struct acpi_bert_region)) { +@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region, + if (remain < estatus_len) { + pr_err(FW_BUG "Truncated status block (length: %u).\n", + estatus_len); +- return; ++ break; + } + + /* No more error records. */ + if (!estatus->block_status) +- return; ++ break; + + if (cper_estatus_check(estatus)) { + pr_err(FW_BUG "Invalid error record.\n"); +- return; ++ break; + } + +- pr_info_once("Error records from previous boot:\n"); +- if (region_len < ACPI_BERT_PRINT_MAX_LEN) ++ if (estatus_len < ACPI_BERT_PRINT_MAX_LEN && ++ printed < ACPI_BERT_PRINT_MAX_RECORDS) { ++ pr_info_once("Error records from previous boot:\n"); + cper_estatus_print(KERN_INFO HW_ERR, estatus); +- else +- pr_info_once("Max print length exceeded, table data is available at:\n" +- "/sys/firmware/acpi/tables/data/BERT"); ++ printed++; ++ } else { ++ skipped++; ++ } + + /* + * Because the boot error source is "one-time polled" type, +@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region, + estatus = (void *)estatus + estatus_len; + remain -= estatus_len; + } ++ ++ if (skipped) ++ pr_info(HW_ERR "Skipped %d error records\n", skipped); + } + + static int __init setup_bert_disable(char *str) +diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c +index 7b9793cb55c50..e39d59ad64964 100644 +--- a/drivers/acpi/video_detect.c ++++ b/drivers/acpi/video_detect.c +@@ -424,7 +424,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, +@@ -432,59 +431,75 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), +- DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), ++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), ++ DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), +- DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), ++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), ++ DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), + }, + }, + { + .callback = video_detect_force_native, +- .ident = "Clevo NL5xRU", ++ .ident = "Clevo NL5xNU", + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), +- DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), ++ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, ++ /* ++ * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10, ++ * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo ++ * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description ++ * above. ++ */ + { + .callback = video_detect_force_native, +- .ident = "Clevo NL5xRU", ++ .ident = "TongFang PF5PU1G", + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), +- DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), ++ DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"), + }, + }, + { + .callback = video_detect_force_native, +- .ident = "Clevo NL5xNU", ++ .ident = "TongFang PF4NU1F", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"), ++ }, ++ }, ++ { ++ .callback = video_detect_force_native, ++ .ident = "TongFang PF4NU1F", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), +- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), ++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"), + }, + }, + { + .callback = video_detect_force_native, +- .ident = "Clevo NL5xNU", ++ .ident = "TongFang PF5NU1G", + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), +- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), ++ DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"), + }, + }, + { + .callback = video_detect_force_native, +- .ident = "Clevo NL5xNU", ++ .ident = "TongFang PF5NU1G", + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), +- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), ++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), ++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"), ++ }, ++ }, ++ { ++ .callback = video_detect_force_native, ++ .ident = "TongFang PF5LUXG", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"), + }, + }, +- + /* + * Desktops which falsely report a backlight and which our heuristics + * for this do not catch. +diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c +index d9ceca7a7935c..a18f289d73466 100644 +--- a/drivers/bluetooth/btbcm.c ++++ b/drivers/bluetooth/btbcm.c +@@ -453,6 +453,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = { + { 0x6606, "BCM4345C5" }, /* 003.006.006 */ + { 0x230f, "BCM4356A2" }, /* 001.003.015 */ + { 0x220e, "BCM20702A1" }, /* 001.002.014 */ ++ { 0x420d, "BCM4349B1" }, /* 002.002.013 */ ++ { 0x420e, "BCM4349B1" }, /* 002.002.014 */ + { 0x4217, "BCM4329B1" }, /* 002.002.023 */ + { 0x6106, "BCM4359C0" }, /* 003.001.006 */ + { 0x4106, "BCM4335A0" }, /* 002.001.006 */ +diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c +index a68edbc7be0ff..627436329b502 100644 +--- a/drivers/bluetooth/btusb.c ++++ b/drivers/bluetooth/btusb.c +@@ -420,6 +420,18 @@ static const struct usb_device_id blacklist_table[] = { + { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + ++ /* Realtek 8852CE Bluetooth devices */ ++ { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK | ++ BTUSB_WIDEBAND_SPEECH }, ++ { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK | ++ BTUSB_WIDEBAND_SPEECH }, ++ { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK | ++ BTUSB_WIDEBAND_SPEECH }, ++ { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK | ++ BTUSB_WIDEBAND_SPEECH }, ++ { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK | ++ BTUSB_WIDEBAND_SPEECH }, ++ + /* Realtek Bluetooth devices */ + { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), + .driver_info = BTUSB_REALTEK }, +@@ -459,6 +471,9 @@ static const struct usb_device_id blacklist_table[] = { + { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, ++ { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK | ++ BTUSB_WIDEBAND_SPEECH | ++ BTUSB_VALID_LE_STATES }, + + /* Additional Realtek 8723AE Bluetooth devices */ + { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, +diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c +index 7abf99f0ee399..cf622e4596055 100644 +--- a/drivers/bluetooth/hci_bcm.c ++++ b/drivers/bluetooth/hci_bcm.c +@@ -1515,8 +1515,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = { + { .compatible = "brcm,bcm4345c5" }, + { .compatible = "brcm,bcm4330-bt" }, + { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data }, ++ { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data }, + { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, + { .compatible = "brcm,bcm4335a0" }, ++ { .compatible = "infineon,cyw55572-bt" }, + { }, + }; + MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match); +diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c +index 73b3961890397..afb0942ccc293 100644 +--- a/drivers/macintosh/adb.c ++++ b/drivers/macintosh/adb.c +@@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req) + + switch(req->data[1]) { + case ADB_QUERY_GETDEVINFO: +- if (req->nbytes < 3) ++ if (req->nbytes < 3 || req->data[2] >= 16) + break; + mutex_lock(&adb_handler_mutex); + req->reply[0] = adb_handler[req->data[2]].original_address; +diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h +index 37e55ebde735b..d73db0dfacb26 100644 +--- a/fs/btrfs/block-group.h ++++ b/fs/btrfs/block-group.h +@@ -98,6 +98,7 @@ struct btrfs_block_group { + unsigned int to_copy:1; + unsigned int relocating_repair:1; + unsigned int chunk_item_inserted:1; ++ unsigned int zoned_data_reloc_ongoing:1; + + int disk_cache_state; + +diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c +index e3514f9a4e8dd..248ea15c97346 100644 +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -3804,7 +3804,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, + block_group->start == fs_info->data_reloc_bg || + fs_info->data_reloc_bg == 0); + +- if (block_group->ro) { ++ if (block_group->ro || block_group->zoned_data_reloc_ongoing) { + ret = 1; + goto out; + } +@@ -3865,8 +3865,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, + out: + if (ret && ffe_ctl->for_treelog) + fs_info->treelog_bg = 0; +- if (ret && ffe_ctl->for_data_reloc) ++ if (ret && ffe_ctl->for_data_reloc && ++ fs_info->data_reloc_bg == block_group->start) { ++ /* ++ * Do not allow further allocations from this block group. ++ * Compared to increasing the ->ro, setting the ++ * ->zoned_data_reloc_ongoing flag still allows nocow ++ * writers to come in. See btrfs_inc_nocow_writers(). ++ * ++ * We need to disable an allocation to avoid an allocation of ++ * regular (non-relocation data) extent. With mix of relocation ++ * extents and regular extents, we can dispatch WRITE commands ++ * (for relocation extents) and ZONE APPEND commands (for ++ * regular extents) at the same time to the same zone, which ++ * easily break the write pointer. ++ */ ++ block_group->zoned_data_reloc_ongoing = 1; + fs_info->data_reloc_bg = 0; ++ } + spin_unlock(&fs_info->relocation_bg_lock); + spin_unlock(&fs_info->treelog_bg_lock); + spin_unlock(&block_group->lock); +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index b791e280af0ca..a90546b3107c5 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -5152,13 +5152,14 @@ int extent_writepages(struct address_space *mapping, + */ + btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); + ret = extent_write_cache_pages(mapping, wbc, &epd); +- btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); + ASSERT(ret <= 0); + if (ret < 0) { ++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); + end_write_bio(&epd, ret); + return ret; + } + ret = flush_write_bio(&epd); ++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); + return ret; + } + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index ea72620507901..1b4fee8a2f28b 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -3069,6 +3069,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) + ordered_extent->file_offset, + ordered_extent->file_offset + + logical_len); ++ btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr, ++ ordered_extent->disk_num_bytes); + } else { + BUG_ON(root == fs_info->tree_root); + ret = insert_ordered_extent_file_extent(trans, ordered_extent); +diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c +index 574769f921a22..fc791f7c71428 100644 +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1623,3 +1623,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) + } + mutex_unlock(&fs_devices->device_list_mutex); + } ++ ++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, ++ u64 length) ++{ ++ struct btrfs_block_group *block_group; ++ ++ if (!btrfs_is_zoned(fs_info)) ++ return; ++ ++ block_group = btrfs_lookup_block_group(fs_info, logical); ++ /* It should be called on a previous data relocation block group. */ ++ ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); ++ ++ spin_lock(&block_group->lock); ++ if (!block_group->zoned_data_reloc_ongoing) ++ goto out; ++ ++ /* All relocation extents are written. */ ++ if (block_group->start + block_group->alloc_offset == logical + length) { ++ /* Now, release this block group for further allocations. */ ++ block_group->zoned_data_reloc_ongoing = 0; ++ } ++ ++out: ++ spin_unlock(&block_group->lock); ++ btrfs_put_block_group(block_group); ++} +diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h +index 3a826f7c20403..574490ea2cc87 100644 +--- a/fs/btrfs/zoned.h ++++ b/fs/btrfs/zoned.h +@@ -70,6 +70,8 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, + u64 logical, u64 length); + void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); + void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); ++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, ++ u64 length); + #else /* CONFIG_BLK_DEV_ZONED */ + static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, + struct blk_zone *zone) +@@ -207,6 +209,9 @@ static inline struct btrfs_device *btrfs_zoned_get_device( + static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } + + static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { } ++ ++static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, ++ u64 logical, u64 length) { } + #endif + + static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) +diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h +index 3f53bc27a19bf..3d088a88f8320 100644 +--- a/include/linux/ioprio.h ++++ b/include/linux/ioprio.h +@@ -11,7 +11,7 @@ + /* + * Default IO priority. + */ +-#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) ++#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) + + /* + * Check that a priority value has a valid class. +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index 3781a7f489ef3..bcaedfe60572f 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -300,6 +300,7 @@ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ + #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h +index ec2967e7249f5..8f38265bc81dc 100644 +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -148,6 +148,10 @@ + * are restricted to targets in + * kernel. + */ ++#define ARCH_CAP_PBRSB_NO BIT(24) /* ++ * Not susceptible to Post-Barrier ++ * Return Stack Buffer Predictions. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h +index e2c8f946c5416..8330e3ca8fbfb 100644 +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -5347,7 +5347,8 @@ struct bpf_sock { + __u32 src_ip4; + __u32 src_ip6[4]; + __u32 src_port; /* host byte order */ +- __u32 dst_port; /* network byte order */ ++ __be16 dst_port; /* network byte order */ ++ __u16 :16; /* zero padding */ + __u32 dst_ip4; + __u32 dst_ip6[4]; + __u32 state; +diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat +index 5a5bd74f55bd5..9c366b3a676db 100755 +--- a/tools/kvm/kvm_stat/kvm_stat ++++ b/tools/kvm/kvm_stat/kvm_stat +@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately. + .format(values)) + if len(pids) > 1: + sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' +- ' to specify the desired pid'.format(" ".join(pids))) ++ ' to specify the desired pid' ++ .format(" ".join(map(str, pids)))) + namespace.pid = pids[0] + + argparser = argparse.ArgumentParser(description=description_text, +diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c +index 577d619fb07ed..197ec1d1b7026 100644 +--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c ++++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c +@@ -1,9 +1,11 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright (c) 2019 Facebook */ + ++#define _GNU_SOURCE + #include <netinet/in.h> + #include <arpa/inet.h> + #include <unistd.h> ++#include <sched.h> + #include <stdlib.h> + #include <string.h> + #include <errno.h> +@@ -21,6 +23,7 @@ + enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, ++ READ_SK_DST_PORT_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, + }; + +@@ -43,8 +46,16 @@ static __u64 child_cg_id; + static int linum_map_fd; + static __u32 duration; + +-static __u32 egress_linum_idx = EGRESS_LINUM_IDX; +-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; ++static bool create_netns(void) ++{ ++ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) ++ return false; ++ ++ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) ++ return false; ++ ++ return true; ++} + + static void print_sk(const struct bpf_sock *sk, const char *prefix) + { +@@ -92,19 +103,24 @@ static void check_result(void) + { + struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; + struct bpf_sock srv_sk, cli_sk, listen_sk; +- __u32 ingress_linum, egress_linum; ++ __u32 idx, ingress_linum, egress_linum, linum; + int err; + +- err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, +- &egress_linum); ++ idx = EGRESS_LINUM_IDX; ++ err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum); + CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d\n", err, errno); + +- err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, +- &ingress_linum); ++ idx = INGRESS_LINUM_IDX; ++ err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum); + CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d\n", err, errno); + ++ idx = READ_SK_DST_PORT_LINUM_IDX; ++ err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum); ++ ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)"); ++ ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line"); ++ + memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); + memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); + memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); +@@ -263,7 +279,7 @@ static void test(void) + char buf[DATA_LEN]; + + /* Prepare listen_fd */ +- listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); ++ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0); + /* start_server() has logged the error details */ + if (CHECK_FAIL(listen_fd == -1)) + goto done; +@@ -331,8 +347,12 @@ done: + + void test_sock_fields(void) + { +- struct bpf_link *egress_link = NULL, *ingress_link = NULL; + int parent_cg_fd = -1, child_cg_fd = -1; ++ struct bpf_link *link; ++ ++ /* Use a dedicated netns to have a fixed listen port */ ++ if (!create_netns()) ++ return; + + /* Create a cgroup, get fd, and join it */ + parent_cg_fd = test__join_cgroup(PARENT_CGROUP); +@@ -353,15 +373,20 @@ void test_sock_fields(void) + if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) + goto done; + +- egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, +- child_cg_fd); +- if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)")) ++ link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd); ++ if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)")) ++ goto done; ++ skel->links.egress_read_sock_fields = link; ++ ++ link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd); ++ if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)")) + goto done; ++ skel->links.ingress_read_sock_fields = link; + +- ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, +- child_cg_fd); +- if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)")) ++ link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd); ++ if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port")) + goto done; ++ skel->links.read_sk_dst_port = link; + + linum_map_fd = bpf_map__fd(skel->maps.linum_map); + sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); +@@ -370,8 +395,7 @@ void test_sock_fields(void) + test(); + + done: +- bpf_link__destroy(egress_link); +- bpf_link__destroy(ingress_link); ++ test_sock_fields__detach(skel); + test_sock_fields__destroy(skel); + if (child_cg_fd >= 0) + close(child_cg_fd); +diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c +index 7967348b11af6..43b31aa1fcf72 100644 +--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c ++++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c +@@ -12,6 +12,7 @@ + enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, ++ READ_SK_DST_PORT_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, + }; + +@@ -250,4 +251,48 @@ int ingress_read_sock_fields(struct __sk_buff *skb) + return CG_OK; + } + ++static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) ++{ ++ __u32 *word = (__u32 *)&sk->dst_port; ++ return word[0] == bpf_htonl(0xcafe0000); ++} ++ ++static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) ++{ ++ __u16 *half = (__u16 *)&sk->dst_port; ++ return half[0] == bpf_htons(0xcafe); ++} ++ ++static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk) ++{ ++ __u8 *byte = (__u8 *)&sk->dst_port; ++ return byte[0] == 0xca && byte[1] == 0xfe; ++} ++ ++SEC("cgroup_skb/egress") ++int read_sk_dst_port(struct __sk_buff *skb) ++{ ++ __u32 linum, linum_idx; ++ struct bpf_sock *sk; ++ ++ linum_idx = READ_SK_DST_PORT_LINUM_IDX; ++ ++ sk = skb->sk; ++ if (!sk) ++ RET_LOG(); ++ ++ /* Ignore everything but the SYN from the client socket */ ++ if (sk->state != BPF_TCP_SYN_SENT) ++ return CG_OK; ++ ++ if (!sk_dst_port__load_word(sk)) ++ RET_LOG(); ++ if (!sk_dst_port__load_half(sk)) ++ RET_LOG(); ++ if (!sk_dst_port__load_byte(sk)) ++ RET_LOG(); ++ ++ return CG_OK; ++} ++ + char _license[] SEC("license") = "GPL"; +diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c +index ce13ece08d51c..8c224eac93df7 100644 +--- a/tools/testing/selftests/bpf/verifier/sock.c ++++ b/tools/testing/selftests/bpf/verifier/sock.c +@@ -121,7 +121,25 @@ + .result = ACCEPT, + }, + { +- "sk_fullsock(skb->sk): sk->dst_port [narrow load]", ++ "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)", ++ .insns = { ++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB, ++ .result = ACCEPT, ++}, ++{ ++ "sk_fullsock(skb->sk): sk->dst_port [half load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), +@@ -139,7 +157,64 @@ + .result = ACCEPT, + }, + { +- "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", ++ "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)", ++ .insns = { ++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB, ++ .result = REJECT, ++ .errstr = "invalid sock access", ++}, ++{ ++ "sk_fullsock(skb->sk): sk->dst_port [byte load]", ++ .insns = { ++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), ++ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB, ++ .result = ACCEPT, ++}, ++{ ++ "sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)", ++ .insns = { ++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB, ++ .result = REJECT, ++ .errstr = "invalid sock access", ++}, ++{ ++ "sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), +@@ -149,7 +224,7 @@ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), +- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), ++ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c +index e0b0164e9af85..be1d9728c4cea 100644 +--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c ++++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c +@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm) + + void ucall(uint64_t cmd, int nargs, ...) + { +- struct ucall uc = { +- .cmd = cmd, +- }; ++ struct ucall uc = {}; + va_list va; + int i; + ++ WRITE_ONCE(uc.cmd, cmd); + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) +- uc.args[i] = va_arg(va, uint64_t); ++ WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); + va_end(va); + +- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; ++ WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); + } + + uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +index e0b2bb1339b16..3330fb183c680 100644 +--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c ++++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +@@ -44,7 +44,7 @@ static inline void nop_loop(void) + { + int i; + +- for (i = 0; i < 1000000; i++) ++ for (i = 0; i < 100000000; i++) + asm volatile("nop"); + } + +@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void) + tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); + GUEST_ASSERT(tsc_freq > 0); + +- /* First, check MSR-based clocksource */ ++ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */ + r1 = rdtsc(); + t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); ++ r1 = (r1 + rdtsc()) / 2; + nop_loop(); + r2 = rdtsc(); + t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); ++ r2 = (r2 + rdtsc()) / 2; + + GUEST_ASSERT(r2 > r1 && t2 > t1); + +@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) + tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); + TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); + +- /* First, check MSR-based clocksource */ ++ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */ + r1 = rdtsc(); + t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); ++ r1 = (r1 + rdtsc()) / 2; + nop_loop(); + r2 = rdtsc(); + t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); ++ r2 = (r2 + rdtsc()) / 2; + + TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); + +diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c +index 9b68658b6bb85..5b98f3ee58a58 100644 +--- a/tools/vm/slabinfo.c ++++ b/tools/vm/slabinfo.c +@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name) + return l; + } + ++static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name) ++{ ++ char x[128]; ++ FILE *f; ++ size_t l; ++ ++ snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name); ++ f = fopen(x, "r"); ++ if (!f) { ++ buffer[0] = 0; ++ l = 0; ++ } else { ++ l = fread(buffer, 1, sizeof(buffer), f); ++ buffer[l] = 0; ++ fclose(f); ++ } ++ return l; ++} + + /* + * Put a size string together +@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s) + { + printf("\n%s: Kernel object allocation\n", s->name); + printf("-----------------------------------------------------------------------\n"); +- if (read_slab_obj(s, "alloc_calls")) ++ if (read_debug_slab_obj(s, "alloc_traces")) ++ printf("%s", buffer); ++ else if (read_slab_obj(s, "alloc_calls")) + printf("%s", buffer); + else + printf("No Data\n"); + + printf("\n%s: Kernel object freeing\n", s->name); + printf("------------------------------------------------------------------------\n"); +- if (read_slab_obj(s, "free_calls")) ++ if (read_debug_slab_obj(s, "free_traces")) ++ printf("%s", buffer); ++ else if (read_slab_obj(s, "free_calls")) + printf("%s", buffer); + else + printf("No Data\n");
