diff options
author | Mike Pagano <mpagano@gentoo.org> | 2022-07-29 11:29:36 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2022-07-29 11:29:36 -0400 |
commit | e7156b6245475d4c9bb6eeeb88d03653df4da664 (patch) | |
tree | 4378a37a599b4ebbcfe6a1587a77ed73673e1189 /1207_linux-5.4.208.patch | |
parent | Linux patch 5.4.207 (diff) | |
download | linux-patches-e7156b6245475d4c9bb6eeeb88d03653df4da664.tar.gz linux-patches-e7156b6245475d4c9bb6eeeb88d03653df4da664.tar.bz2 linux-patches-e7156b6245475d4c9bb6eeeb88d03653df4da664.zip |
Linux patch 5.4.2085.4-213
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
Diffstat (limited to '1207_linux-5.4.208.patch')
-rw-r--r-- | 1207_linux-5.4.208.patch | 4094 |
1 files changed, 4094 insertions, 0 deletions
diff --git a/1207_linux-5.4.208.patch b/1207_linux-5.4.208.patch new file mode 100644 index 00000000..7b70b2ac --- /dev/null +++ b/1207_linux-5.4.208.patch @@ -0,0 +1,4094 @@ +diff --git a/Makefile b/Makefile +index 89d19f04faabf..884a3f314baf8 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 4 +-SUBLEVEL = 207 ++SUBLEVEL = 208 + EXTRAVERSION = + NAME = Kleptomaniac Octopus + +diff --git a/arch/Kconfig b/arch/Kconfig +index a8df66e645442..2219a07dca1ef 100644 +--- a/arch/Kconfig ++++ b/arch/Kconfig +@@ -915,27 +915,6 @@ config STRICT_MODULE_RWX + config ARCH_HAS_PHYS_TO_DMA + bool + +-config ARCH_HAS_REFCOUNT +- bool +- help +- An architecture selects this when it has implemented refcount_t +- using open coded assembly primitives that provide an optimized +- refcount_t implementation, possibly at the expense of some full +- refcount state checks of CONFIG_REFCOUNT_FULL=y. +- +- The refcount overflow check behavior, however, must be retained. +- Catching overflows is the primary security concern for protecting +- against bugs in reference counts. +- +-config REFCOUNT_FULL +- bool "Perform full reference count validation at the expense of speed" +- help +- Enabling this switches the refcounting infrastructure from a fast +- unchecked atomic_t implementation to a fully state checked +- implementation, which can be (slightly) slower but provides protections +- against various use-after-free conditions that can be used in +- security flaw exploits. +- + config HAVE_ARCH_COMPILER_H + bool + help +diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c +index 438b10c44d732..2b7a314b84522 100644 +--- a/arch/alpha/kernel/srmcons.c ++++ b/arch/alpha/kernel/srmcons.c +@@ -59,7 +59,7 @@ srmcons_do_receive_chars(struct tty_port *port) + } while((result.bits.status & 1) && (++loops < 10)); + + if (count) +- tty_schedule_flip(port); ++ tty_flip_buffer_push(port); + + return count; + } +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index a1622b9290fd5..a4364cce85f8d 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -119,7 +119,6 @@ config ARM + select OLD_SIGSUSPEND3 + select PCI_SYSCALL if PCI + select PERF_USE_VMALLOC +- select REFCOUNT_FULL + select RTC_LIB + select SYS_SUPPORTS_APM_EMULATION + # Above selects are sorted alphabetically; please add new ones +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index a1a828ca188cf..6b73143f0cf8c 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -181,7 +181,6 @@ config ARM64 + select PCI_SYSCALL if PCI + select POWER_RESET + select POWER_SUPPLY +- select REFCOUNT_FULL + select SPARSE_IRQ + select SWIOTLB + select SYSCTL_EXCEPTION_TRACE +diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile +index 7b579a003e30a..d1a9615391010 100644 +--- a/arch/riscv/Makefile ++++ b/arch/riscv/Makefile +@@ -74,6 +74,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) + endif + + KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) ++KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) + + # GCC versions that support the "-mstrict-align" option default to allowing + # unaligned accesses. While unaligned accesses are explicitly allowed in the +diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig +index 38d64030aacf6..2e60c80395ab0 100644 +--- a/arch/s390/configs/debug_defconfig ++++ b/arch/s390/configs/debug_defconfig +@@ -62,7 +62,6 @@ CONFIG_OPROFILE=m + CONFIG_KPROBES=y + CONFIG_JUMP_LABEL=y + CONFIG_STATIC_KEYS_SELFTEST=y +-CONFIG_REFCOUNT_FULL=y + CONFIG_LOCK_EVENT_COUNTS=y + CONFIG_MODULES=y + CONFIG_MODULE_FORCE_LOAD=y +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index c6c71592f6e46..6002252692af4 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -73,7 +73,6 @@ config X86 + select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_PTE_DEVMAP if X86_64 + select ARCH_HAS_PTE_SPECIAL +- select ARCH_HAS_REFCOUNT + select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE + select ARCH_HAS_SET_MEMORY +diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h +index 1b563f9167eae..cd339b88d5d46 100644 +--- a/arch/x86/include/asm/asm.h ++++ b/arch/x86/include/asm/asm.h +@@ -141,9 +141,6 @@ + # define _ASM_EXTABLE_EX(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) + +-# define _ASM_EXTABLE_REFCOUNT(from, to) \ +- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) +- + # define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ +@@ -172,9 +169,6 @@ + # define _ASM_EXTABLE_EX(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) + +-# define _ASM_EXTABLE_REFCOUNT(from, to) \ +- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) +- + /* For C file, we already have NOKPROBE_SYMBOL macro */ + #endif + +diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h +deleted file mode 100644 +index 232f856e0db06..0000000000000 +--- a/arch/x86/include/asm/refcount.h ++++ /dev/null +@@ -1,126 +0,0 @@ +-#ifndef __ASM_X86_REFCOUNT_H +-#define __ASM_X86_REFCOUNT_H +-/* +- * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from +- * PaX/grsecurity. +- */ +-#include <linux/refcount.h> +-#include <asm/bug.h> +- +-/* +- * This is the first portion of the refcount error handling, which lives in +- * .text.unlikely, and is jumped to from the CPU flag check (in the +- * following macros). This saves the refcount value location into CX for +- * the exception handler to use (in mm/extable.c), and then triggers the +- * central refcount exception. The fixup address for the exception points +- * back to the regular execution flow in .text. +- */ +-#define _REFCOUNT_EXCEPTION \ +- ".pushsection .text..refcount\n" \ +- "111:\tlea %[var], %%" _ASM_CX "\n" \ +- "112:\t" ASM_UD2 "\n" \ +- ASM_UNREACHABLE \ +- ".popsection\n" \ +- "113:\n" \ +- _ASM_EXTABLE_REFCOUNT(112b, 113b) +- +-/* Trigger refcount exception if refcount result is negative. */ +-#define REFCOUNT_CHECK_LT_ZERO \ +- "js 111f\n\t" \ +- _REFCOUNT_EXCEPTION +- +-/* Trigger refcount exception if refcount result is zero or negative. */ +-#define REFCOUNT_CHECK_LE_ZERO \ +- "jz 111f\n\t" \ +- REFCOUNT_CHECK_LT_ZERO +- +-/* Trigger refcount exception unconditionally. */ +-#define REFCOUNT_ERROR \ +- "jmp 111f\n\t" \ +- _REFCOUNT_EXCEPTION +- +-static __always_inline void refcount_add(unsigned int i, refcount_t *r) +-{ +- asm volatile(LOCK_PREFIX "addl %1,%0\n\t" +- REFCOUNT_CHECK_LT_ZERO +- : [var] "+m" (r->refs.counter) +- : "ir" (i) +- : "cc", "cx"); +-} +- +-static __always_inline void refcount_inc(refcount_t *r) +-{ +- asm volatile(LOCK_PREFIX "incl %0\n\t" +- REFCOUNT_CHECK_LT_ZERO +- : [var] "+m" (r->refs.counter) +- : : "cc", "cx"); +-} +- +-static __always_inline void refcount_dec(refcount_t *r) +-{ +- asm volatile(LOCK_PREFIX "decl %0\n\t" +- REFCOUNT_CHECK_LE_ZERO +- : [var] "+m" (r->refs.counter) +- : : "cc", "cx"); +-} +- +-static __always_inline __must_check +-bool refcount_sub_and_test(unsigned int i, refcount_t *r) +-{ +- bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", +- REFCOUNT_CHECK_LT_ZERO, +- r->refs.counter, e, "er", i, "cx"); +- +- if (ret) { +- smp_acquire__after_ctrl_dep(); +- return true; +- } +- +- return false; +-} +- +-static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) +-{ +- bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", +- REFCOUNT_CHECK_LT_ZERO, +- r->refs.counter, e, "cx"); +- +- if (ret) { +- smp_acquire__after_ctrl_dep(); +- return true; +- } +- +- return false; +-} +- +-static __always_inline __must_check +-bool refcount_add_not_zero(unsigned int i, refcount_t *r) +-{ +- int c, result; +- +- c = atomic_read(&(r->refs)); +- do { +- if (unlikely(c == 0)) +- return false; +- +- result = c + i; +- +- /* Did we try to increment from/to an undesirable state? */ +- if (unlikely(c < 0 || c == INT_MAX || result < c)) { +- asm volatile(REFCOUNT_ERROR +- : : [var] "m" (r->refs.counter) +- : "cc", "cx"); +- break; +- } +- +- } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); +- +- return c != 0; +-} +- +-static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) +-{ +- return refcount_add_not_zero(1, r); +-} +- +-#endif +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +index 61d93f062a36e..d6a0e57ecc073 100644 +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -378,18 +378,6 @@ do { \ + : "=r" (err), ltype(x) \ + : "m" (__m(addr)), "i" (errret), "0" (err)) + +-#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \ +- asm volatile("\n" \ +- "1: mov"itype" %2,%"rtype"1\n" \ +- "2:\n" \ +- ".section .fixup,\"ax\"\n" \ +- "3: mov %3,%0\n" \ +- " jmp 2b\n" \ +- ".previous\n" \ +- _ASM_EXTABLE_UA(1b, 3b) \ +- : "=r" (err), ltype(x) \ +- : "m" (__m(addr)), "i" (errret), "0" (err)) +- + /* + * This doesn't do __uaccess_begin/end - the exception handling + * around it must do that. +@@ -453,6 +441,103 @@ __pu_label: \ + __builtin_expect(__gu_err, 0); \ + }) + ++#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT ++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ ++ bool success; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm_volatile_goto("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ ++ _ASM_EXTABLE_UA(1b, %l[label]) \ ++ : CC_OUT(z) (success), \ ++ [ptr] "+m" (*_ptr), \ ++ [old] "+a" (__old) \ ++ : [new] ltype (__new) \ ++ : "memory" \ ++ : label); \ ++ if (unlikely(!success)) \ ++ *_old = __old; \ ++ likely(success); }) ++ ++#ifdef CONFIG_X86_32 ++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ ++ bool success; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm_volatile_goto("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ ++ _ASM_EXTABLE_UA(1b, %l[label]) \ ++ : CC_OUT(z) (success), \ ++ "+A" (__old), \ ++ [ptr] "+m" (*_ptr) \ ++ : "b" ((u32)__new), \ ++ "c" ((u32)((u64)__new >> 32)) \ ++ : "memory" \ ++ : label); \ ++ if (unlikely(!success)) \ ++ *_old = __old; \ ++ likely(success); }) ++#endif // CONFIG_X86_32 ++#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT ++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ ++ int __err = 0; \ ++ bool success; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm volatile("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ ++ CC_SET(z) \ ++ "2:\n" \ ++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ ++ %[errout]) \ ++ : CC_OUT(z) (success), \ ++ [errout] "+r" (__err), \ ++ [ptr] "+m" (*_ptr), \ ++ [old] "+a" (__old) \ ++ : [new] ltype (__new) \ ++ : "memory"); \ ++ if (unlikely(__err)) \ ++ goto label; \ ++ if (unlikely(!success)) \ ++ *_old = __old; \ ++ likely(success); }) ++ ++#ifdef CONFIG_X86_32 ++/* ++ * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error. ++ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are ++ * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses ++ * both ESI and EDI for the memory operand, compilation will fail if the error ++ * is an input+output as there will be no register available for input. ++ */ ++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ ++ int __result; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm volatile("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ ++ "mov $0, %%ecx\n\t" \ ++ "setz %%cl\n" \ ++ "2:\n" \ ++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \ ++ : [result]"=c" (__result), \ ++ "+A" (__old), \ ++ [ptr] "+m" (*_ptr) \ ++ : "b" ((u32)__new), \ ++ "c" ((u32)((u64)__new >> 32)) \ ++ : "memory", "cc"); \ ++ if (unlikely(__result < 0)) \ ++ goto label; \ ++ if (unlikely(!__result)) \ ++ *_old = __old; \ ++ likely(__result); }) ++#endif // CONFIG_X86_32 ++#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT ++ + /* FIXME: this hack is definitely wrong -AK */ + struct __large_struct { unsigned long buf[100]; }; + #define __m(x) (*(struct __large_struct __user *)(x)) +@@ -734,6 +819,51 @@ do { \ + if (unlikely(__gu_err)) goto err_label; \ + } while (0) + ++extern void __try_cmpxchg_user_wrong_size(void); ++ ++#ifndef CONFIG_X86_32 ++#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ ++ __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) ++#endif ++ ++/* ++ * Force the pointer to u<size> to match the size expected by the asm helper. ++ * clang/LLVM compiles all cases and only discards the unused paths after ++ * processing errors, which breaks i386 if the pointer is an 8-byte value. ++ */ ++#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ ++ bool __ret; \ ++ __chk_user_ptr(_ptr); \ ++ switch (sizeof(*(_ptr))) { \ ++ case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ ++ (__force u8 *)(_ptr), (_oldp), \ ++ (_nval), _label); \ ++ break; \ ++ case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ ++ (__force u16 *)(_ptr), (_oldp), \ ++ (_nval), _label); \ ++ break; \ ++ case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ ++ (__force u32 *)(_ptr), (_oldp), \ ++ (_nval), _label); \ ++ break; \ ++ case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ ++ (_nval), _label); \ ++ break; \ ++ default: __try_cmpxchg_user_wrong_size(); \ ++ } \ ++ __ret; }) ++ ++/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ ++#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ ++ int __ret = -EFAULT; \ ++ __uaccess_begin_nospec(); \ ++ __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ ++_label: \ ++ __uaccess_end(); \ ++ __ret; \ ++ }) ++ + /* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. +diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h +index ba2dc19306303..388a40660c7b5 100644 +--- a/arch/x86/include/asm/uaccess_32.h ++++ b/arch/x86/include/asm/uaccess_32.h +@@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) + static __always_inline unsigned long + raw_copy_from_user(void *to, const void __user *from, unsigned long n) + { +- if (__builtin_constant_p(n)) { +- unsigned long ret; +- +- switch (n) { +- case 1: +- ret = 0; +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u8 *)to, from, ret, +- "b", "b", "=q", 1); +- __uaccess_end(); +- return ret; +- case 2: +- ret = 0; +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u16 *)to, from, ret, +- "w", "w", "=r", 2); +- __uaccess_end(); +- return ret; +- case 4: +- ret = 0; +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u32 *)to, from, ret, +- "l", "k", "=r", 4); +- __uaccess_end(); +- return ret; +- } +- } + return __copy_user_ll(to, (__force const void *)from, n); + } + +diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h +index 5cd1caa8bc653..bc10e3dc64fed 100644 +--- a/arch/x86/include/asm/uaccess_64.h ++++ b/arch/x86/include/asm/uaccess_64.h +@@ -65,117 +65,13 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len) + static __always_inline __must_check unsigned long + raw_copy_from_user(void *dst, const void __user *src, unsigned long size) + { +- int ret = 0; +- +- if (!__builtin_constant_p(size)) +- return copy_user_generic(dst, (__force void *)src, size); +- switch (size) { +- case 1: +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, +- ret, "b", "b", "=q", 1); +- __uaccess_end(); +- return ret; +- case 2: +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, +- ret, "w", "w", "=r", 2); +- __uaccess_end(); +- return ret; +- case 4: +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, +- ret, "l", "k", "=r", 4); +- __uaccess_end(); +- return ret; +- case 8: +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, +- ret, "q", "", "=r", 8); +- __uaccess_end(); +- return ret; +- case 10: +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, +- ret, "q", "", "=r", 10); +- if (likely(!ret)) +- __get_user_asm_nozero(*(u16 *)(8 + (char *)dst), +- (u16 __user *)(8 + (char __user *)src), +- ret, "w", "w", "=r", 2); +- __uaccess_end(); +- return ret; +- case 16: +- __uaccess_begin_nospec(); +- __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, +- ret, "q", "", "=r", 16); +- if (likely(!ret)) +- __get_user_asm_nozero(*(u64 *)(8 + (char *)dst), +- (u64 __user *)(8 + (char __user *)src), +- ret, "q", "", "=r", 8); +- __uaccess_end(); +- return ret; +- default: +- return copy_user_generic(dst, (__force void *)src, size); +- } ++ return copy_user_generic(dst, (__force void *)src, size); + } + + static __always_inline __must_check unsigned long + raw_copy_to_user(void __user *dst, const void *src, unsigned long size) + { +- int ret = 0; +- +- if (!__builtin_constant_p(size)) +- return copy_user_generic((__force void *)dst, src, size); +- switch (size) { +- case 1: +- __uaccess_begin(); +- __put_user_asm(*(u8 *)src, (u8 __user *)dst, +- ret, "b", "b", "iq", 1); +- __uaccess_end(); +- return ret; +- case 2: +- __uaccess_begin(); +- __put_user_asm(*(u16 *)src, (u16 __user *)dst, +- ret, "w", "w", "ir", 2); +- __uaccess_end(); +- return ret; +- case 4: +- __uaccess_begin(); +- __put_user_asm(*(u32 *)src, (u32 __user *)dst, +- ret, "l", "k", "ir", 4); +- __uaccess_end(); +- return ret; +- case 8: +- __uaccess_begin(); +- __put_user_asm(*(u64 *)src, (u64 __user *)dst, +- ret, "q", "", "er", 8); +- __uaccess_end(); +- return ret; +- case 10: +- __uaccess_begin(); +- __put_user_asm(*(u64 *)src, (u64 __user *)dst, +- ret, "q", "", "er", 10); +- if (likely(!ret)) { +- asm("":::"memory"); +- __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, +- ret, "w", "w", "ir", 2); +- } +- __uaccess_end(); +- return ret; +- case 16: +- __uaccess_begin(); +- __put_user_asm(*(u64 *)src, (u64 __user *)dst, +- ret, "q", "", "er", 16); +- if (likely(!ret)) { +- asm("":::"memory"); +- __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, +- ret, "q", "", "er", 8); +- } +- __uaccess_end(); +- return ret; +- default: +- return copy_user_generic((__force void *)dst, src, size); +- } ++ return copy_user_generic((__force void *)dst, src, size); + } + + static __always_inline __must_check +diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c +index 8a2b8e7913149..9b98a7d8ac604 100644 +--- a/arch/x86/kernel/cpu/mce/core.c ++++ b/arch/x86/kernel/cpu/mce/core.c +@@ -397,13 +397,16 @@ static int msr_to_offset(u32 msr) + return -1; + } + +-__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, +- struct pt_regs *regs, int trapnr, +- unsigned long error_code, +- unsigned long fault_addr) ++static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) + { +- pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", +- (unsigned int)regs->cx, regs->ip, (void *)regs->ip); ++ if (wrmsr) { ++ pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", ++ (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, ++ regs->ip, (void *)regs->ip); ++ } else { ++ pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", ++ (unsigned int)regs->cx, regs->ip, (void *)regs->ip); ++ } + + show_stack_regs(regs); + +@@ -411,7 +414,14 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, + + while (true) + cpu_relax(); ++} + ++__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, ++ struct pt_regs *regs, int trapnr, ++ unsigned long error_code, ++ unsigned long fault_addr) ++{ ++ ex_handler_msr_mce(regs, false); + return true; + } + +@@ -447,17 +457,7 @@ __visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, + unsigned long error_code, + unsigned long fault_addr) + { +- pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", +- (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, +- regs->ip, (void *)regs->ip); +- +- show_stack_regs(regs); +- +- panic("MCA architectural violation!\n"); +- +- while (true) +- cpu_relax(); +- ++ ex_handler_msr_mce(regs, true); + return true; + } + +diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c +index 4d75bc656f971..30bb0bd3b1b88 100644 +--- a/arch/x86/mm/extable.c ++++ b/arch/x86/mm/extable.c +@@ -44,55 +44,6 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup, + } + EXPORT_SYMBOL_GPL(ex_handler_fault); + +-/* +- * Handler for UD0 exception following a failed test against the +- * result of a refcount inc/dec/add/sub. +- */ +-__visible bool ex_handler_refcount(const struct exception_table_entry *fixup, +- struct pt_regs *regs, int trapnr, +- unsigned long error_code, +- unsigned long fault_addr) +-{ +- /* First unconditionally saturate the refcount. */ +- *(int *)regs->cx = INT_MIN / 2; +- +- /* +- * Strictly speaking, this reports the fixup destination, not +- * the fault location, and not the actually overflowing +- * instruction, which is the instruction before the "js", but +- * since that instruction could be a variety of lengths, just +- * report the location after the overflow, which should be close +- * enough for finding the overflow, as it's at least back in +- * the function, having returned from .text.unlikely. +- */ +- regs->ip = ex_fixup_addr(fixup); +- +- /* +- * This function has been called because either a negative refcount +- * value was seen by any of the refcount functions, or a zero +- * refcount value was seen by refcount_dec(). +- * +- * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result +- * wrapped around) will be set. Additionally, seeing the refcount +- * reach 0 will set ZF (Zero Flag: result was zero). In each of +- * these cases we want a report, since it's a boundary condition. +- * The SF case is not reported since it indicates post-boundary +- * manipulations below zero or above INT_MAX. And if none of the +- * flags are set, something has gone very wrong, so report it. +- */ +- if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { +- bool zero = regs->flags & X86_EFLAGS_ZF; +- +- refcount_error_report(regs, zero ? "hit zero" : "overflow"); +- } else if ((regs->flags & X86_EFLAGS_SF) == 0) { +- /* Report if none of OF, ZF, nor SF are set. */ +- refcount_error_report(regs, "unexpected saturation"); +- } +- +- return true; +-} +-EXPORT_SYMBOL(ex_handler_refcount); +- + /* + * Handler for when we fail to restore a task's FPU state. We should never get + * here because the FPU state of a task using the FPU (task->thread.fpu.state) +diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c +index 82b76df43ae57..3b79bcd03e7bc 100644 +--- a/drivers/crypto/chelsio/chtls/chtls_cm.c ++++ b/drivers/crypto/chelsio/chtls/chtls_cm.c +@@ -1103,8 +1103,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, + csk->sndbuf = newsk->sk_sndbuf; + csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx; + RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk), +- sock_net(newsk)-> +- ipv4.sysctl_tcp_window_scaling, ++ READ_ONCE(sock_net(newsk)-> ++ ipv4.sysctl_tcp_window_scaling), + tp->window_clamp); + neigh_release(n); + inet_inherit_port(&tcp_hashinfo, lsk, newsk); +@@ -1235,7 +1235,7 @@ static void chtls_pass_accept_request(struct sock *sk, + chtls_set_req_addr(oreq, iph->daddr, iph->saddr); + ip_dsfield = ipv4_get_dsfield(iph); + if (req->tcpopt.wsf <= 14 && +- sock_net(sk)->ipv4.sysctl_tcp_window_scaling) { ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { + inet_rsk(oreq)->wscale_ok = 1; + inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf; + } +diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c +index 54da66d02b0e5..317f54f19477e 100644 +--- a/drivers/gpio/gpio-pca953x.c ++++ b/drivers/gpio/gpio-pca953x.c +@@ -379,6 +379,9 @@ static const struct regmap_config pca953x_i2c_regmap = { + .reg_bits = 8, + .val_bits = 8, + ++ .use_single_read = true, ++ .use_single_write = true, ++ + .readable_reg = pca953x_readable_register, + .writeable_reg = pca953x_writeable_register, + .volatile_reg = pca953x_volatile_register, +diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug +index 41c8e39a73ba8..e4f03fcb125e4 100644 +--- a/drivers/gpu/drm/i915/Kconfig.debug ++++ b/drivers/gpu/drm/i915/Kconfig.debug +@@ -21,7 +21,6 @@ config DRM_I915_DEBUG + depends on DRM_I915 + select DEBUG_FS + select PREEMPT_COUNT +- select REFCOUNT_FULL + select I2C_CHARDEV + select STACKDEPOT + select DRM_DP_AUX_CHARDEV +diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c +index 3a1bdc75275f4..8750e444f4492 100644 +--- a/drivers/i2c/busses/i2c-cadence.c ++++ b/drivers/i2c/busses/i2c-cadence.c +@@ -198,9 +198,9 @@ static inline bool cdns_is_holdquirk(struct cdns_i2c *id, bool hold_wrkaround) + */ + static irqreturn_t cdns_i2c_isr(int irq, void *ptr) + { +- unsigned int isr_status, avail_bytes, updatetx; ++ unsigned int isr_status, avail_bytes; + unsigned int bytes_to_send; +- bool hold_quirk; ++ bool updatetx; + struct cdns_i2c *id = ptr; + /* Signal completion only after everything is updated */ + int done_flag = 0; +@@ -219,11 +219,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) + * Check if transfer size register needs to be updated again for a + * large data receive operation. + */ +- updatetx = 0; +- if (id->recv_count > id->curr_recv_count) +- updatetx = 1; +- +- hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx; ++ updatetx = id->recv_count > id->curr_recv_count; + + /* When receiving, handle data interrupt and completion interrupt */ + if (id->p_recv_buf && +@@ -246,7 +242,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) + id->recv_count--; + id->curr_recv_count--; + +- if (cdns_is_holdquirk(id, hold_quirk)) ++ if (cdns_is_holdquirk(id, updatetx)) + break; + } + +@@ -257,7 +253,7 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) + * maintain transfer size non-zero while performing a large + * receive operation. + */ +- if (cdns_is_holdquirk(id, hold_quirk)) { ++ if (cdns_is_holdquirk(id, updatetx)) { + /* wait while fifo is full */ + while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) != + (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH)) +@@ -279,22 +275,6 @@ static irqreturn_t cdns_i2c_isr(int irq, void *ptr) + CDNS_I2C_XFER_SIZE_OFFSET); + id->curr_recv_count = id->recv_count; + } +- } else if (id->recv_count && !hold_quirk && +- !id->curr_recv_count) { +- +- /* Set the slave address in address register*/ +- cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK, +- CDNS_I2C_ADDR_OFFSET); +- +- if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) { +- cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE, +- CDNS_I2C_XFER_SIZE_OFFSET); +- id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE; +- } else { +- cdns_i2c_writereg(id->recv_count, +- CDNS_I2C_XFER_SIZE_OFFSET); +- id->curr_recv_count = id->recv_count; +- } + } + + /* Clear hold (if not repeated start) and signal completion */ +diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c +index 0a146b32da132..abf3b7c1f686c 100644 +--- a/drivers/misc/lkdtm/refcount.c ++++ b/drivers/misc/lkdtm/refcount.c +@@ -6,14 +6,6 @@ + #include "lkdtm.h" + #include <linux/refcount.h> + +-#ifdef CONFIG_REFCOUNT_FULL +-#define REFCOUNT_MAX (UINT_MAX - 1) +-#define REFCOUNT_SATURATED UINT_MAX +-#else +-#define REFCOUNT_MAX INT_MAX +-#define REFCOUNT_SATURATED (INT_MIN / 2) +-#endif +- + static void overflow_check(refcount_t *ref) + { + switch (refcount_read(ref)) { +diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c +index 649c5c429bd7c..1288b5e3d2201 100644 +--- a/drivers/net/ethernet/emulex/benet/be_cmds.c ++++ b/drivers/net/ethernet/emulex/benet/be_cmds.c +@@ -2287,7 +2287,7 @@ err: + + /* Uses sync mcc */ + int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, +- u8 page_num, u8 *data) ++ u8 page_num, u32 off, u32 len, u8 *data) + { + struct be_dma_mem cmd; + struct be_mcc_wrb *wrb; +@@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, + req->port = cpu_to_le32(adapter->hba_port_num); + req->page_num = cpu_to_le32(page_num); + status = be_mcc_notify_wait(adapter); +- if (!status) { ++ if (!status && len > 0) { + struct be_cmd_resp_port_type *resp = cmd.va; + +- memcpy(data, resp->page_data, PAGE_DATA_LEN); ++ memcpy(data, resp->page_data + off, len); + } + err: + mutex_unlock(&adapter->mcc_lock); +@@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter) + int status; + + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, +- page_data); ++ 0, PAGE_DATA_LEN, page_data); + if (!status) { + switch (adapter->phy.interface_type) { + case PHY_TYPE_QSFP: +@@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter) + int status; + + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, +- page_data); ++ 0, PAGE_DATA_LEN, page_data); + if (!status) { + strlcpy(adapter->phy.vendor_name, page_data + + SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1); +diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h +index c30d6d6f0f3a0..9e17d6a7ab8cd 100644 +--- a/drivers/net/ethernet/emulex/benet/be_cmds.h ++++ b/drivers/net/ethernet/emulex/benet/be_cmds.h +@@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, + int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, + u32 *state); + int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, +- u8 page_num, u8 *data); ++ u8 page_num, u32 off, u32 len, u8 *data); + int be_cmd_query_cable_type(struct be_adapter *adapter); + int be_cmd_query_sfp_info(struct be_adapter *adapter); + int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, +diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c +index 5bb5abf995887..7cc1f41971c57 100644 +--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c ++++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c +@@ -1339,7 +1339,7 @@ static int be_get_module_info(struct net_device *netdev, + return -EOPNOTSUPP; + + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, +- page_data); ++ 0, PAGE_DATA_LEN, page_data); + if (!status) { + if (!page_data[SFP_PLUS_SFF_8472_COMP]) { + modinfo->type = ETH_MODULE_SFF_8079; +@@ -1357,25 +1357,32 @@ static int be_get_module_eeprom(struct net_device *netdev, + { + struct be_adapter *adapter = netdev_priv(netdev); + int status; ++ u32 begin, end; + + if (!check_privilege(adapter, MAX_PRIVILEGES)) + return -EOPNOTSUPP; + +- status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, +- data); +- if (status) +- goto err; ++ begin = eeprom->offset; ++ end = eeprom->offset + eeprom->len; ++ ++ if (begin < PAGE_DATA_LEN) { ++ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin, ++ min_t(u32, end, PAGE_DATA_LEN) - begin, ++ data); ++ if (status) ++ goto err; ++ ++ data += PAGE_DATA_LEN - begin; ++ begin = PAGE_DATA_LEN; ++ } + +- if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) { +- status = be_cmd_read_port_transceiver_data(adapter, +- TR_PAGE_A2, +- data + +- PAGE_DATA_LEN); ++ if (end > PAGE_DATA_LEN) { ++ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2, ++ begin - PAGE_DATA_LEN, ++ end - begin, data); + if (status) + goto err; + } +- if (eeprom->offset) +- memcpy(data, data + eeprom->offset, eeprom->len); + err: + return be_cmd_status(status); + } +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index 05442bbc218cd..0610d344fdbf0 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -10068,7 +10068,7 @@ static int i40e_reset(struct i40e_pf *pf) + **/ + static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) + { +- int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); ++ const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_hw *hw = &pf->hw; + i40e_status ret; +@@ -10076,13 +10076,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) + int v; + + if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && +- i40e_check_recovery_mode(pf)) { ++ is_recovery_mode_reported) + i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); +- } + + if (test_bit(__I40E_DOWN, pf->state) && +- !test_bit(__I40E_RECOVERY_MODE, pf->state) && +- !old_recovery_mode_bit) ++ !test_bit(__I40E_RECOVERY_MODE, pf->state)) + goto clear_recovery; + dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); + +@@ -10109,13 +10107,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) + * accordingly with regard to resources initialization + * and deinitialization + */ +- if (test_bit(__I40E_RECOVERY_MODE, pf->state) || +- old_recovery_mode_bit) { ++ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (i40e_get_capabilities(pf, + i40e_aqc_opc_list_func_capabilities)) + goto end_unlock; + +- if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { ++ if (is_recovery_mode_reported) { + /* we're staying in recovery mode so we'll reinitialize + * misc vector here + */ +diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +index 7a30d5d5ef53a..c6905d1b6182c 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +@@ -1263,11 +1263,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, + { + struct iavf_rx_buffer *rx_buffer; + +- if (!size) +- return NULL; +- + rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); ++ if (!size) ++ return rx_buffer; + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, +diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h +index 50d7c04dccf59..7bc7d7618fe1e 100644 +--- a/drivers/net/ethernet/intel/igc/igc_regs.h ++++ b/drivers/net/ethernet/intel/igc/igc_regs.h +@@ -236,4 +236,6 @@ do { \ + + #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) + ++#define IGC_REMOVED(h) unlikely(!(h)) ++ + #endif +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h +index 39e73ad60352f..fa49ef2afde5f 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h +@@ -773,6 +773,7 @@ struct ixgbe_adapter { + #ifdef CONFIG_IXGBE_IPSEC + struct ixgbe_ipsec *ipsec; + #endif /* CONFIG_IXGBE_IPSEC */ ++ spinlock_t vfs_lock; + }; + + static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +index 8a894e5d923f0..f8aa1a0b89c5d 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -6396,6 +6396,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, + /* n-tuple support exists, always init our spinlock */ + spin_lock_init(&adapter->fdir_perfect_lock); + ++ /* init spinlock to avoid concurrency of VF resources */ ++ spin_lock_init(&adapter->vfs_lock); ++ + #ifdef CONFIG_IXGBE_DCB + ixgbe_init_dcb(adapter); + #endif +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +index cf5c2b9465eba..0e73e3b1af19a 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +@@ -204,10 +204,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) + int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) + { + unsigned int num_vfs = adapter->num_vfs, vf; ++ unsigned long flags; + int rss; + ++ spin_lock_irqsave(&adapter->vfs_lock, flags); + /* set num VFs to 0 to prevent access to vfinfo */ + adapter->num_vfs = 0; ++ spin_unlock_irqrestore(&adapter->vfs_lock, flags); + + /* put the reference to all of the vf devices */ + for (vf = 0; vf < num_vfs; ++vf) { +@@ -1305,8 +1308,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) + void ixgbe_msg_task(struct ixgbe_adapter *adapter) + { + struct ixgbe_hw *hw = &adapter->hw; ++ unsigned long flags; + u32 vf; + ++ spin_lock_irqsave(&adapter->vfs_lock, flags); + for (vf = 0; vf < adapter->num_vfs; vf++) { + /* process any reset requests */ + if (!ixgbe_check_for_rst(hw, vf)) +@@ -1320,6 +1325,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) + if (!ixgbe_check_for_ack(hw, vf)) + ixgbe_rcv_ack_from_vf(adapter, vf); + } ++ spin_unlock_irqrestore(&adapter->vfs_lock, flags); + } + + void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter) +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +index 2f013fc716985..91214cce874b1 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +@@ -3871,7 +3871,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, + { + const struct fib_nh *nh = fib_info_nh(fi, 0); + +- return nh->fib_nh_scope == RT_SCOPE_LINK || ++ return nh->fib_nh_gw_family || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); + } + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +index 66e60c7e98504..c440b192ec715 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +@@ -215,6 +215,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan) + if (queue == 0 || queue == 4) { + value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK; + value |= MTL_RXQ_DMA_Q04MDMACH(chan); ++ } else if (queue > 4) { ++ value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4); ++ value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4); + } else { + value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue); + value |= MTL_RXQ_DMA_QXMDMACH(chan, queue); +diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c +index ea9c8361bf464..5ee3e457a79c7 100644 +--- a/drivers/net/usb/ax88179_178a.c ++++ b/drivers/net/usb/ax88179_178a.c +@@ -1690,7 +1690,7 @@ static const struct driver_info ax88179_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +@@ -1703,7 +1703,7 @@ static const struct driver_info ax88178a_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +@@ -1716,7 +1716,7 @@ static const struct driver_info cypress_GX3_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +@@ -1729,7 +1729,7 @@ static const struct driver_info dlink_dub1312_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +@@ -1742,7 +1742,7 @@ static const struct driver_info sitecom_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +@@ -1755,7 +1755,7 @@ static const struct driver_info samsung_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +@@ -1768,7 +1768,7 @@ static const struct driver_info lenovo_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +@@ -1781,7 +1781,7 @@ static const struct driver_info belkin_info = { + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, +- .flags = FLAG_ETHER | FLAG_FRAMING_AX, ++ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, + }; +diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c +index 8c45d6c32c30e..3d48fa685aaae 100644 +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -1110,6 +1110,10 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev, + u8 buffer[sizeof(struct pci_delete_interrupt)]; + } ctxt; + ++ if (!int_desc->vector_count) { ++ kfree(int_desc); ++ return; ++ } + memset(&ctxt, 0, sizeof(ctxt)); + int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; + int_pkt->message_type.type = +@@ -1172,6 +1176,28 @@ static void hv_irq_mask(struct irq_data *data) + pci_msi_mask_irq(data); + } + ++static unsigned int hv_msi_get_int_vector(struct irq_data *data) ++{ ++ struct irq_cfg *cfg = irqd_cfg(data); ++ ++ return cfg->vector; ++} ++ ++static int hv_msi_prepare(struct irq_domain *domain, struct device *dev, ++ int nvec, msi_alloc_info_t *info) ++{ ++ int ret = pci_msi_prepare(domain, dev, nvec, info); ++ ++ /* ++ * By using the interrupt remapper in the hypervisor IOMMU, contiguous ++ * CPU vectors is not needed for multi-MSI ++ */ ++ if (info->type == X86_IRQ_ALLOC_TYPE_MSI) ++ info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; ++ ++ return ret; ++} ++ + /** + * hv_irq_unmask() - "Unmask" the IRQ by setting its current + * affinity. +@@ -1187,6 +1213,7 @@ static void hv_irq_unmask(struct irq_data *data) + struct msi_desc *msi_desc = irq_data_get_msi_desc(data); + struct irq_cfg *cfg = irqd_cfg(data); + struct retarget_msi_interrupt *params; ++ struct tran_int_desc *int_desc; + struct hv_pcibus_device *hbus; + struct cpumask *dest; + cpumask_var_t tmp; +@@ -1201,6 +1228,7 @@ static void hv_irq_unmask(struct irq_data *data) + pdev = msi_desc_to_pci_dev(msi_desc); + pbus = pdev->bus; + hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); ++ int_desc = data->chip_data; + + spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags); + +@@ -1208,8 +1236,8 @@ static void hv_irq_unmask(struct irq_data *data) + memset(params, 0, sizeof(*params)); + params->partition_id = HV_PARTITION_ID_SELF; + params->int_entry.source = 1; /* MSI(-X) */ +- params->int_entry.address = msi_desc->msg.address_lo; +- params->int_entry.data = msi_desc->msg.data; ++ params->int_entry.address = int_desc->address & 0xffffffff; ++ params->int_entry.data = int_desc->data; + params->device_id = (hbus->hdev->dev_instance.b[5] << 24) | + (hbus->hdev->dev_instance.b[4] << 16) | + (hbus->hdev->dev_instance.b[7] << 8) | +@@ -1296,12 +1324,12 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp, + + static u32 hv_compose_msi_req_v1( + struct pci_create_interrupt *int_pkt, struct cpumask *affinity, +- u32 slot, u8 vector) ++ u32 slot, u8 vector, u8 vector_count) + { + int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; + int_pkt->wslot.slot = slot; + int_pkt->int_desc.vector = vector; +- int_pkt->int_desc.vector_count = 1; ++ int_pkt->int_desc.vector_count = vector_count; + int_pkt->int_desc.delivery_mode = dest_Fixed; + + /* +@@ -1315,14 +1343,14 @@ static u32 hv_compose_msi_req_v1( + + static u32 hv_compose_msi_req_v2( + struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity, +- u32 slot, u8 vector) ++ u32 slot, u8 vector, u8 vector_count) + { + int cpu; + + int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; + int_pkt->wslot.slot = slot; + int_pkt->int_desc.vector = vector; +- int_pkt->int_desc.vector_count = 1; ++ int_pkt->int_desc.vector_count = vector_count; + int_pkt->int_desc.delivery_mode = dest_Fixed; + + /* +@@ -1350,7 +1378,6 @@ static u32 hv_compose_msi_req_v2( + */ + static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + { +- struct irq_cfg *cfg = irqd_cfg(data); + struct hv_pcibus_device *hbus; + struct hv_pci_dev *hpdev; + struct pci_bus *pbus; +@@ -1359,6 +1386,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + unsigned long flags; + struct compose_comp_ctxt comp; + struct tran_int_desc *int_desc; ++ struct msi_desc *msi_desc; ++ u8 vector, vector_count; + struct { + struct pci_packet pci_pkt; + union { +@@ -1370,7 +1399,17 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + u32 size; + int ret; + +- pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); ++ /* Reuse the previous allocation */ ++ if (data->chip_data) { ++ int_desc = data->chip_data; ++ msg->address_hi = int_desc->address >> 32; ++ msg->address_lo = int_desc->address & 0xffffffff; ++ msg->data = int_desc->data; ++ return; ++ } ++ ++ msi_desc = irq_data_get_msi_desc(data); ++ pdev = msi_desc_to_pci_dev(msi_desc); + dest = irq_data_get_effective_affinity_mask(data); + pbus = pdev->bus; + hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); +@@ -1378,17 +1417,40 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + if (!hpdev) + goto return_null_message; + +- /* Free any previous message that might have already been composed. */ +- if (data->chip_data) { +- int_desc = data->chip_data; +- data->chip_data = NULL; +- hv_int_desc_free(hpdev, int_desc); +- } +- + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); + if (!int_desc) + goto drop_reference; + ++ if (!msi_desc->msi_attrib.is_msix && msi_desc->nvec_used > 1) { ++ /* ++ * If this is not the first MSI of Multi MSI, we already have ++ * a mapping. Can exit early. ++ */ ++ if (msi_desc->irq != data->irq) { ++ data->chip_data = int_desc; ++ int_desc->address = msi_desc->msg.address_lo | ++ (u64)msi_desc->msg.address_hi << 32; ++ int_desc->data = msi_desc->msg.data + ++ (data->irq - msi_desc->irq); ++ msg->address_hi = msi_desc->msg.address_hi; ++ msg->address_lo = msi_desc->msg.address_lo; ++ msg->data = int_desc->data; ++ put_pcichild(hpdev); ++ return; ++ } ++ /* ++ * The vector we select here is a dummy value. The correct ++ * value gets sent to the hypervisor in unmask(). This needs ++ * to be aligned with the count, and also not zero. Multi-msi ++ * is powers of 2 up to 32, so 32 will always work here. ++ */ ++ vector = 32; ++ vector_count = msi_desc->nvec_used; ++ } else { ++ vector = hv_msi_get_int_vector(data); ++ vector_count = 1; ++ } ++ + memset(&ctxt, 0, sizeof(ctxt)); + init_completion(&comp.comp_pkt.host_event); + ctxt.pci_pkt.completion_func = hv_pci_compose_compl; +@@ -1399,14 +1461,16 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, + dest, + hpdev->desc.win_slot.slot, +- cfg->vector); ++ vector, ++ vector_count); + break; + + case PCI_PROTOCOL_VERSION_1_2: + size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, + dest, + hpdev->desc.win_slot.slot, +- cfg->vector); ++ vector, ++ vector_count); + break; + + default: +@@ -1518,7 +1582,7 @@ static irq_hw_number_t hv_msi_domain_ops_get_hwirq(struct msi_domain_info *info, + + static struct msi_domain_ops hv_msi_ops = { + .get_hwirq = hv_msi_domain_ops_get_hwirq, +- .msi_prepare = pci_msi_prepare, ++ .msi_prepare = hv_msi_prepare, + .set_desc = pci_msi_set_desc, + .msi_free = hv_msi_free, + }; +diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c +index f9abd4364fbaa..e8149ff1d401c 100644 +--- a/drivers/pinctrl/stm32/pinctrl-stm32.c ++++ b/drivers/pinctrl/stm32/pinctrl-stm32.c +@@ -1215,15 +1215,17 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, + bank->bank_ioport_nr = bank_ioport_nr; + spin_lock_init(&bank->lock); + +- /* create irq hierarchical domain */ +- bank->fwnode = of_node_to_fwnode(np); ++ if (pctl->domain) { ++ /* create irq hierarchical domain */ ++ bank->fwnode = of_node_to_fwnode(np); + +- bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, +- STM32_GPIO_IRQ_LINE, bank->fwnode, +- &stm32_gpio_domain_ops, bank); ++ bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, STM32_GPIO_IRQ_LINE, ++ bank->fwnode, &stm32_gpio_domain_ops, ++ bank); + +- if (!bank->domain) +- return -ENODEV; ++ if (!bank->domain) ++ return -ENODEV; ++ } + + err = gpiochip_add_data(&bank->gpio_chip, bank); + if (err) { +@@ -1393,6 +1395,8 @@ int stm32_pctl_probe(struct platform_device *pdev) + pctl->domain = stm32_pctrl_get_irq_domain(np); + if (IS_ERR(pctl->domain)) + return PTR_ERR(pctl->domain); ++ if (!pctl->domain) ++ dev_warn(dev, "pinctrl without interrupt support\n"); + + /* hwspinlock is optional */ + hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0); +diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c +index 08d0a07b58ef2..c7624d7611a7e 100644 +--- a/drivers/power/reset/arm-versatile-reboot.c ++++ b/drivers/power/reset/arm-versatile-reboot.c +@@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void) + versatile_reboot_type = (enum versatile_reboot)reboot_id->data; + + syscon_regmap = syscon_node_to_regmap(np); ++ of_node_put(np); + if (IS_ERR(syscon_regmap)) + return PTR_ERR(syscon_regmap); + +diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h +index c467589c7f452..c06d399b9b1f1 100644 +--- a/drivers/s390/char/keyboard.h ++++ b/drivers/s390/char/keyboard.h +@@ -56,7 +56,7 @@ static inline void + kbd_put_queue(struct tty_port *port, int ch) + { + tty_insert_flip_char(port, ch, 0); +- tty_schedule_flip(port); ++ tty_flip_buffer_push(port); + } + + static inline void +@@ -64,5 +64,5 @@ kbd_puts_queue(struct tty_port *port, char *cp) + { + while (*cp) + tty_insert_flip_char(port, *cp++, 0); +- tty_schedule_flip(port); ++ tty_flip_buffer_push(port); + } +diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c +index c86c3ac6097dd..b1003876cb350 100644 +--- a/drivers/spi/spi-bcm2835.c ++++ b/drivers/spi/spi-bcm2835.c +@@ -1159,10 +1159,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr, + struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); + + /* if an error occurred and we have an active dma, then terminate */ +- dmaengine_terminate_sync(ctlr->dma_tx); +- bs->tx_dma_active = false; +- dmaengine_terminate_sync(ctlr->dma_rx); +- bs->rx_dma_active = false; ++ if (ctlr->dma_tx) { ++ dmaengine_terminate_sync(ctlr->dma_tx); ++ bs->tx_dma_active = false; ++ } ++ if (ctlr->dma_rx) { ++ dmaengine_terminate_sync(ctlr->dma_rx); ++ bs->rx_dma_active = false; ++ } + bcm2835_spi_undo_prologue(bs); + + /* and reset */ +diff --git a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c +index 0ba4e4e070a9f..7cfbdfb10e23e 100644 +--- a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c ++++ b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c +@@ -267,6 +267,8 @@ static int rt2880_pinmux_pins(struct rt2880_priv *p) + p->func[i]->pin_count, + sizeof(int), + GFP_KERNEL); ++ if (!p->func[i]->pins) ++ return -ENOMEM; + for (j = 0; j < p->func[i]->pin_count; j++) + p->func[i]->pins[j] = p->func[i]->pin_first + j; + +diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c +index 472804c3f44dc..aec8222361743 100644 +--- a/drivers/staging/speakup/spk_ttyio.c ++++ b/drivers/staging/speakup/spk_ttyio.c +@@ -88,7 +88,7 @@ static int spk_ttyio_receive_buf2(struct tty_struct *tty, + } + + if (!ldisc_data->buf_free) +- /* ttyio_in will tty_schedule_flip */ ++ /* ttyio_in will tty_flip_buffer_push */ + return 0; + + /* Make sure the consumer has read buf before we have seen +@@ -325,7 +325,7 @@ static unsigned char ttyio_in(int timeout) + mb(); + ldisc_data->buf_free = true; + /* Let TTY push more characters */ +- tty_schedule_flip(speakup_tty->port); ++ tty_flip_buffer_push(speakup_tty->port); + + return rv; + } +diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c +index 4562c8060d09e..26581d2456c8f 100644 +--- a/drivers/tty/cyclades.c ++++ b/drivers/tty/cyclades.c +@@ -556,7 +556,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip, + } + info->idle_stats.recv_idle = jiffies; + } +- tty_schedule_flip(port); ++ tty_flip_buffer_push(port); + + /* end of service */ + cyy_writeb(info, CyRIR, save_xir & 0x3f); +@@ -996,7 +996,7 @@ static void cyz_handle_rx(struct cyclades_port *info) + mod_timer(&info->rx_full_timer, jiffies + 1); + #endif + info->idle_stats.recv_idle = jiffies; +- tty_schedule_flip(&info->port); ++ tty_flip_buffer_push(&info->port); + + /* Update rx_get */ + cy_writel(&buf_ctrl->rx_get, new_rx_get); +@@ -1172,7 +1172,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) + if (delta_count) + wake_up_interruptible(&info->port.delta_msr_wait); + if (special_count) +- tty_schedule_flip(&info->port); ++ tty_flip_buffer_push(&info->port); + } + } + +diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c +index 9180ca5e4dcd4..d6e82eb61fc2d 100644 +--- a/drivers/tty/goldfish.c ++++ b/drivers/tty/goldfish.c +@@ -151,7 +151,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) + address = (unsigned long)(void *)buf; + goldfish_tty_rw(qtty, address, count, 0); + +- tty_schedule_flip(&qtty->port); ++ tty_flip_buffer_push(&qtty->port); + return IRQ_HANDLED; + } + +diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c +index 1254b39074edb..e67a1aef1fd0d 100644 +--- a/drivers/tty/moxa.c ++++ b/drivers/tty/moxa.c +@@ -1385,7 +1385,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, + if (inited && !tty_throttled(tty) && + MoxaPortRxQueue(p) > 0) { /* RX */ + MoxaPortReadData(p); +- tty_schedule_flip(&p->port); ++ tty_flip_buffer_push(&p->port); + } + } else { + clear_bit(EMPTYWAIT, &p->statusflags); +@@ -1410,7 +1410,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, + + if (tty && (intr & IntrBreak) && !I_IGNBRK(tty)) { /* BREAK */ + tty_insert_flip_char(&p->port, 0, TTY_BREAK); +- tty_schedule_flip(&p->port); ++ tty_flip_buffer_push(&p->port); + } + + if (intr & IntrLine) +diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c +index c6a1d8c4e6894..73226e482e919 100644 +--- a/drivers/tty/pty.c ++++ b/drivers/tty/pty.c +@@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty) + static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) + { + struct tty_struct *to = tty->link; +- unsigned long flags; + +- if (tty->stopped) ++ if (tty->stopped || !c) + return 0; + +- if (c > 0) { +- spin_lock_irqsave(&to->port->lock, flags); +- /* Stuff the data into the input queue of the other end */ +- c = tty_insert_flip_string(to->port, buf, c); +- spin_unlock_irqrestore(&to->port->lock, flags); +- /* And shovel */ +- if (c) +- tty_flip_buffer_push(to->port); +- } +- return c; ++ return tty_insert_flip_string_and_push_buffer(to->port, buf, c); + } + + /** +diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c +index 9a836dcac157c..a03618f89c0dd 100644 +--- a/drivers/tty/serial/lpc32xx_hs.c ++++ b/drivers/tty/serial/lpc32xx_hs.c +@@ -345,7 +345,7 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id) + LPC32XX_HSUART_IIR(port->membase)); + port->icount.overrun++; + tty_insert_flip_char(tport, 0, TTY_OVERRUN); +- tty_schedule_flip(tport); ++ tty_flip_buffer_push(tport); + } + + /* Data received? */ +diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c +index 13db15118cb94..2ce0d05be3681 100644 +--- a/drivers/tty/serial/mvebu-uart.c ++++ b/drivers/tty/serial/mvebu-uart.c +@@ -443,13 +443,13 @@ static void mvebu_uart_shutdown(struct uart_port *port) + } + } + +-static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) ++static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) + { + unsigned int d_divisor, m_divisor; + u32 brdv, osamp; + + if (!port->uartclk) +- return -EOPNOTSUPP; ++ return 0; + + /* + * The baudrate is derived from the UART clock thanks to two divisors: +@@ -473,7 +473,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) + osamp &= ~OSAMP_DIVISORS_MASK; + writel(osamp, port->membase + UART_OSAMP); + +- return 0; ++ return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor); + } + + static void mvebu_uart_set_termios(struct uart_port *port, +@@ -510,15 +510,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, + max_baud = 230400; + + baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud); +- if (mvebu_uart_baud_rate_set(port, baud)) { +- /* No clock available, baudrate cannot be changed */ +- if (old) +- baud = uart_get_baud_rate(port, old, NULL, +- min_baud, max_baud); +- } else { +- tty_termios_encode_baud_rate(termios, baud, baud); +- uart_update_timeout(port, termios->c_cflag, baud); +- } ++ baud = mvebu_uart_baud_rate_set(port, baud); ++ ++ /* In case baudrate cannot be changed, report previous old value */ ++ if (baud == 0 && old) ++ baud = tty_termios_baud_rate(old); + + /* Only the following flag changes are supported */ + if (old) { +@@ -529,6 +525,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, + termios->c_cflag |= CS8; + } + ++ if (baud != 0) { ++ tty_termios_encode_baud_rate(termios, baud, baud); ++ uart_update_timeout(port, termios->c_cflag, baud); ++ } ++ + spin_unlock_irqrestore(&port->lock, flags); + } + +diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c +index 47f2370ad85cb..49f39c041c351 100644 +--- a/drivers/tty/tty_buffer.c ++++ b/drivers/tty/tty_buffer.c +@@ -394,27 +394,6 @@ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) + } + EXPORT_SYMBOL(__tty_insert_flip_char); + +-/** +- * tty_schedule_flip - push characters to ldisc +- * @port: tty port to push from +- * +- * Takes any pending buffers and transfers their ownership to the +- * ldisc side of the queue. It then schedules those characters for +- * processing by the line discipline. +- */ +- +-void tty_schedule_flip(struct tty_port *port) +-{ +- struct tty_bufhead *buf = &port->buf; +- +- /* paired w/ acquire in flush_to_ldisc(); ensures +- * flush_to_ldisc() sees buffer data. +- */ +- smp_store_release(&buf->tail->commit, buf->tail->used); +- queue_work(system_unbound_wq, &buf->work); +-} +-EXPORT_SYMBOL(tty_schedule_flip); +- + /** + * tty_prepare_flip_string - make room for characters + * @port: tty port +@@ -544,6 +523,15 @@ static void flush_to_ldisc(struct work_struct *work) + + } + ++static inline void tty_flip_buffer_commit(struct tty_buffer *tail) ++{ ++ /* ++ * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees ++ * buffer data. ++ */ ++ smp_store_release(&tail->commit, tail->used); ++} ++ + /** + * tty_flip_buffer_push - terminal + * @port: tty port to push +@@ -557,10 +545,44 @@ static void flush_to_ldisc(struct work_struct *work) + + void tty_flip_buffer_push(struct tty_port *port) + { +- tty_schedule_flip(port); ++ struct tty_bufhead *buf = &port->buf; ++ ++ tty_flip_buffer_commit(buf->tail); ++ queue_work(system_unbound_wq, &buf->work); + } + EXPORT_SYMBOL(tty_flip_buffer_push); + ++/** ++ * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and ++ * push ++ * @port: tty port ++ * @chars: characters ++ * @size: size ++ * ++ * The function combines tty_insert_flip_string() and tty_flip_buffer_push() ++ * with the exception of properly holding the @port->lock. ++ * ++ * To be used only internally (by pty currently). ++ * ++ * Returns: the number added. ++ */ ++int tty_insert_flip_string_and_push_buffer(struct tty_port *port, ++ const unsigned char *chars, size_t size) ++{ ++ struct tty_bufhead *buf = &port->buf; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&port->lock, flags); ++ size = tty_insert_flip_string(port, chars, size); ++ if (size) ++ tty_flip_buffer_commit(buf->tail); ++ spin_unlock_irqrestore(&port->lock, flags); ++ ++ queue_work(system_unbound_wq, &buf->work); ++ ++ return size; ++} ++ + /** + * tty_buffer_init - prepare a tty buffer structure + * @tty: tty to initialise +diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c +index b6e78fdbfdff9..68643f61f6f90 100644 +--- a/drivers/tty/vt/keyboard.c ++++ b/drivers/tty/vt/keyboard.c +@@ -310,7 +310,7 @@ int kbd_rate(struct kbd_repeat *rpt) + static void put_queue(struct vc_data *vc, int ch) + { + tty_insert_flip_char(&vc->port, ch, 0); +- tty_schedule_flip(&vc->port); ++ tty_flip_buffer_push(&vc->port); + } + + static void puts_queue(struct vc_data *vc, char *cp) +@@ -319,7 +319,7 @@ static void puts_queue(struct vc_data *vc, char *cp) + tty_insert_flip_char(&vc->port, *cp, 0); + cp++; + } +- tty_schedule_flip(&vc->port); ++ tty_flip_buffer_push(&vc->port); + } + + static void applkey(struct vc_data *vc, int key, char mode) +@@ -564,7 +564,7 @@ static void fn_inc_console(struct vc_data *vc) + static void fn_send_intr(struct vc_data *vc) + { + tty_insert_flip_char(&vc->port, 0, TTY_BREAK); +- tty_schedule_flip(&vc->port); ++ tty_flip_buffer_push(&vc->port); + } + + static void fn_scroll_forw(struct vc_data *vc) +diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c +index c3df1660cb5c5..9d9e056f94ac8 100644 +--- a/drivers/tty/vt/vt.c ++++ b/drivers/tty/vt/vt.c +@@ -1837,7 +1837,7 @@ static void respond_string(const char *p, struct tty_port *port) + tty_insert_flip_char(port, *p, 0); + p++; + } +- tty_schedule_flip(port); ++ tty_flip_buffer_push(port); + } + + static void cursor_report(struct vc_data *vc, struct tty_struct *tty) +diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c +index f464793477650..bba849e5d8a7b 100644 +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -413,7 +413,8 @@ static void __unmap_grant_pages_done(int result, + unsigned int offset = data->unmap_ops - map->unmap_ops; + + for (i = 0; i < data->count; i++) { +- WARN_ON(map->unmap_ops[offset+i].status); ++ WARN_ON(map->unmap_ops[offset+i].status && ++ map->unmap_ops[offset+i].handle != -1); + pr_debug("unmap handle=%d st=%d\n", + map->unmap_ops[offset+i].handle, + map->unmap_ops[offset+i].status); +diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c +index 4ae8becdb51db..9165bf56c6e8e 100644 +--- a/fs/dlm/lock.c ++++ b/fs/dlm/lock.c +@@ -4067,13 +4067,14 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len) + rv = _create_message(ls, sizeof(struct dlm_message) + len, + dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); + if (rv) +- return; ++ goto out; + + memcpy(ms->m_extra, name, len); + ms->m_hash = hash; + + send_message(mh, ms); + ++out: + spin_lock(&ls->ls_remove_spin); + ls->ls_remove_len = 0; + memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); +diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h +index 4c0224ff0a14b..4f1c0f8e1bb0b 100644 +--- a/include/linux/bitfield.h ++++ b/include/linux/bitfield.h +@@ -41,6 +41,22 @@ + + #define __bf_shf(x) (__builtin_ffsll(x) - 1) + ++#define __scalar_type_to_unsigned_cases(type) \ ++ unsigned type: (unsigned type)0, \ ++ signed type: (unsigned type)0 ++ ++#define __unsigned_scalar_typeof(x) typeof( \ ++ _Generic((x), \ ++ char: (unsigned char)0, \ ++ __scalar_type_to_unsigned_cases(char), \ ++ __scalar_type_to_unsigned_cases(short), \ ++ __scalar_type_to_unsigned_cases(int), \ ++ __scalar_type_to_unsigned_cases(long), \ ++ __scalar_type_to_unsigned_cases(long long), \ ++ default: (x))) ++ ++#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) ++ + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ +@@ -49,7 +65,8 @@ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ +- BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ ++ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ ++ __bf_cast_unsigned(_reg, ~0ull), \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ +diff --git a/include/linux/mm.h b/include/linux/mm.h +index c125fea49752f..d35c29d322d83 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -15,6 +15,7 @@ + #include <linux/atomic.h> + #include <linux/debug_locks.h> + #include <linux/mm_types.h> ++#include <linux/mmap_lock.h> + #include <linux/range.h> + #include <linux/pfn.h> + #include <linux/percpu-refcount.h> +diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h +new file mode 100644 +index 0000000000000..97ac53b66052e +--- /dev/null ++++ b/include/linux/mmap_lock.h +@@ -0,0 +1,54 @@ ++#ifndef _LINUX_MMAP_LOCK_H ++#define _LINUX_MMAP_LOCK_H ++ ++static inline void mmap_init_lock(struct mm_struct *mm) ++{ ++ init_rwsem(&mm->mmap_sem); ++} ++ ++static inline void mmap_write_lock(struct mm_struct *mm) ++{ ++ down_write(&mm->mmap_sem); ++} ++ ++static inline int mmap_write_lock_killable(struct mm_struct *mm) ++{ ++ return down_write_killable(&mm->mmap_sem); ++} ++ ++static inline bool mmap_write_trylock(struct mm_struct *mm) ++{ ++ return down_write_trylock(&mm->mmap_sem) != 0; ++} ++ ++static inline void mmap_write_unlock(struct mm_struct *mm) ++{ ++ up_write(&mm->mmap_sem); ++} ++ ++static inline void mmap_write_downgrade(struct mm_struct *mm) ++{ ++ downgrade_write(&mm->mmap_sem); ++} ++ ++static inline void mmap_read_lock(struct mm_struct *mm) ++{ ++ down_read(&mm->mmap_sem); ++} ++ ++static inline int mmap_read_lock_killable(struct mm_struct *mm) ++{ ++ return down_read_killable(&mm->mmap_sem); ++} ++ ++static inline bool mmap_read_trylock(struct mm_struct *mm) ++{ ++ return down_read_trylock(&mm->mmap_sem) != 0; ++} ++ ++static inline void mmap_read_unlock(struct mm_struct *mm) ++{ ++ up_read(&mm->mmap_sem); ++} ++ ++#endif /* _LINUX_MMAP_LOCK_H */ +diff --git a/include/linux/refcount.h b/include/linux/refcount.h +index e28cce21bad6c..0ac50cf62d062 100644 +--- a/include/linux/refcount.h ++++ b/include/linux/refcount.h +@@ -1,9 +1,88 @@ + /* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Variant of atomic_t specialized for reference counts. ++ * ++ * The interface matches the atomic_t interface (to aid in porting) but only ++ * provides the few functions one should use for reference counting. ++ * ++ * Saturation semantics ++ * ==================== ++ * ++ * refcount_t differs from atomic_t in that the counter saturates at ++ * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the ++ * counter and causing 'spurious' use-after-free issues. In order to avoid the ++ * cost associated with introducing cmpxchg() loops into all of the saturating ++ * operations, we temporarily allow the counter to take on an unchecked value ++ * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow ++ * or overflow has occurred. Although this is racy when multiple threads ++ * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly ++ * equidistant from 0 and INT_MAX we minimise the scope for error: ++ * ++ * INT_MAX REFCOUNT_SATURATED UINT_MAX ++ * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff) ++ * +--------------------------------+----------------+----------------+ ++ * <---------- bad value! ----------> ++ * ++ * (in a signed view of the world, the "bad value" range corresponds to ++ * a negative counter value). ++ * ++ * As an example, consider a refcount_inc() operation that causes the counter ++ * to overflow: ++ * ++ * int old = atomic_fetch_add_relaxed(r); ++ * // old is INT_MAX, refcount now INT_MIN (0x8000_0000) ++ * if (old < 0) ++ * atomic_set(r, REFCOUNT_SATURATED); ++ * ++ * If another thread also performs a refcount_inc() operation between the two ++ * atomic operations, then the count will continue to edge closer to 0. If it ++ * reaches a value of 1 before /any/ of the threads reset it to the saturated ++ * value, then a concurrent refcount_dec_and_test() may erroneously free the ++ * underlying object. Given the precise timing details involved with the ++ * round-robin scheduling of each thread manipulating the refcount and the need ++ * to hit the race multiple times in succession, there doesn't appear to be a ++ * practical avenue of attack even if using refcount_add() operations with ++ * larger increments. ++ * ++ * Memory ordering ++ * =============== ++ * ++ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions ++ * and provide only what is strictly required for refcounts. ++ * ++ * The increments are fully relaxed; these will not provide ordering. The ++ * rationale is that whatever is used to obtain the object we're increasing the ++ * reference count on will provide the ordering. For locked data structures, ++ * its the lock acquire, for RCU/lockless data structures its the dependent ++ * load. ++ * ++ * Do note that inc_not_zero() provides a control dependency which will order ++ * future stores against the inc, this ensures we'll never modify the object ++ * if we did not in fact acquire a reference. ++ * ++ * The decrements will provide release order, such that all the prior loads and ++ * stores will be issued before, it also provides a control dependency, which ++ * will order us against the subsequent free(). ++ * ++ * The control dependency is against the load of the cmpxchg (ll/sc) that ++ * succeeded. This means the stores aren't fully ordered, but this is fine ++ * because the 1->0 transition indicates no concurrency. ++ * ++ * Note that the allocator is responsible for ordering things between free() ++ * and alloc(). ++ * ++ * The decrements dec_and_test() and sub_and_test() also provide acquire ++ * ordering on success. ++ * ++ */ ++ + #ifndef _LINUX_REFCOUNT_H + #define _LINUX_REFCOUNT_H + + #include <linux/atomic.h> ++#include <linux/bug.h> + #include <linux/compiler.h> ++#include <linux/limits.h> + #include <linux/spinlock_types.h> + + struct mutex; +@@ -12,7 +91,7 @@ struct mutex; + * struct refcount_t - variant of atomic_t specialized for reference counts + * @refs: atomic_t counter field + * +- * The counter saturates at UINT_MAX and will not move once ++ * The counter saturates at REFCOUNT_SATURATED and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free bugs. + */ +@@ -21,13 +100,25 @@ typedef struct refcount_struct { + } refcount_t; + + #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } ++#define REFCOUNT_MAX INT_MAX ++#define REFCOUNT_SATURATED (INT_MIN / 2) ++ ++enum refcount_saturation_type { ++ REFCOUNT_ADD_NOT_ZERO_OVF, ++ REFCOUNT_ADD_OVF, ++ REFCOUNT_ADD_UAF, ++ REFCOUNT_SUB_UAF, ++ REFCOUNT_DEC_LEAK, ++}; ++ ++void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); + + /** + * refcount_set - set a refcount's value + * @r: the refcount + * @n: value to which the refcount will be set + */ +-static inline void refcount_set(refcount_t *r, unsigned int n) ++static inline void refcount_set(refcount_t *r, int n) + { + atomic_set(&r->refs, n); + } +@@ -43,70 +134,168 @@ static inline unsigned int refcount_read(const refcount_t *r) + return atomic_read(&r->refs); + } + +-extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r); +-extern void refcount_add_checked(unsigned int i, refcount_t *r); +- +-extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); +-extern void refcount_inc_checked(refcount_t *r); +- +-extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r); +- +-extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); +-extern void refcount_dec_checked(refcount_t *r); +- +-#ifdef CONFIG_REFCOUNT_FULL +- +-#define refcount_add_not_zero refcount_add_not_zero_checked +-#define refcount_add refcount_add_checked +- +-#define refcount_inc_not_zero refcount_inc_not_zero_checked +-#define refcount_inc refcount_inc_checked ++/** ++ * refcount_add_not_zero - add a value to a refcount unless it is 0 ++ * @i: the value to add to the refcount ++ * @r: the refcount ++ * ++ * Will saturate at REFCOUNT_SATURATED and WARN. ++ * ++ * Provides no memory ordering, it is assumed the caller has guaranteed the ++ * object memory to be stable (RCU, etc.). It does provide a control dependency ++ * and thereby orders future stores. See the comment on top. ++ * ++ * Use of this function is not recommended for the normal reference counting ++ * use case in which references are taken and released one at a time. In these ++ * cases, refcount_inc(), or one of its variants, should instead be used to ++ * increment a reference count. ++ * ++ * Return: false if the passed refcount is 0, true otherwise ++ */ ++static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) ++{ ++ int old = refcount_read(r); + +-#define refcount_sub_and_test refcount_sub_and_test_checked ++ do { ++ if (!old) ++ break; ++ } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i)); + +-#define refcount_dec_and_test refcount_dec_and_test_checked +-#define refcount_dec refcount_dec_checked ++ if (unlikely(old < 0 || old + i < 0)) ++ refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); + +-#else +-# ifdef CONFIG_ARCH_HAS_REFCOUNT +-# include <asm/refcount.h> +-# else +-static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) +-{ +- return atomic_add_unless(&r->refs, i, 0); ++ return old; + } + +-static inline void refcount_add(unsigned int i, refcount_t *r) ++/** ++ * refcount_add - add a value to a refcount ++ * @i: the value to add to the refcount ++ * @r: the refcount ++ * ++ * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. ++ * ++ * Provides no memory ordering, it is assumed the caller has guaranteed the ++ * object memory to be stable (RCU, etc.). It does provide a control dependency ++ * and thereby orders future stores. See the comment on top. ++ * ++ * Use of this function is not recommended for the normal reference counting ++ * use case in which references are taken and released one at a time. In these ++ * cases, refcount_inc(), or one of its variants, should instead be used to ++ * increment a reference count. ++ */ ++static inline void refcount_add(int i, refcount_t *r) + { +- atomic_add(i, &r->refs); ++ int old = atomic_fetch_add_relaxed(i, &r->refs); ++ ++ if (unlikely(!old)) ++ refcount_warn_saturate(r, REFCOUNT_ADD_UAF); ++ else if (unlikely(old < 0 || old + i < 0)) ++ refcount_warn_saturate(r, REFCOUNT_ADD_OVF); + } + ++/** ++ * refcount_inc_not_zero - increment a refcount unless it is 0 ++ * @r: the refcount to increment ++ * ++ * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED ++ * and WARN. ++ * ++ * Provides no memory ordering, it is assumed the caller has guaranteed the ++ * object memory to be stable (RCU, etc.). It does provide a control dependency ++ * and thereby orders future stores. See the comment on top. ++ * ++ * Return: true if the increment was successful, false otherwise ++ */ + static inline __must_check bool refcount_inc_not_zero(refcount_t *r) + { +- return atomic_add_unless(&r->refs, 1, 0); ++ return refcount_add_not_zero(1, r); + } + ++/** ++ * refcount_inc - increment a refcount ++ * @r: the refcount to increment ++ * ++ * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN. ++ * ++ * Provides no memory ordering, it is assumed the caller already has a ++ * reference on the object. ++ * ++ * Will WARN if the refcount is 0, as this represents a possible use-after-free ++ * condition. ++ */ + static inline void refcount_inc(refcount_t *r) + { +- atomic_inc(&r->refs); ++ refcount_add(1, r); + } + +-static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) ++/** ++ * refcount_sub_and_test - subtract from a refcount and test if it is 0 ++ * @i: amount to subtract from the refcount ++ * @r: the refcount ++ * ++ * Similar to atomic_dec_and_test(), but it will WARN, return false and ++ * ultimately leak on underflow and will fail to decrement when saturated ++ * at REFCOUNT_SATURATED. ++ * ++ * Provides release memory ordering, such that prior loads and stores are done ++ * before, and provides an acquire ordering on success such that free() ++ * must come after. ++ * ++ * Use of this function is not recommended for the normal reference counting ++ * use case in which references are taken and released one at a time. In these ++ * cases, refcount_dec(), or one of its variants, should instead be used to ++ * decrement a reference count. ++ * ++ * Return: true if the resulting refcount is 0, false otherwise ++ */ ++static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) + { +- return atomic_sub_and_test(i, &r->refs); ++ int old = atomic_fetch_sub_release(i, &r->refs); ++ ++ if (old == i) { ++ smp_acquire__after_ctrl_dep(); ++ return true; ++ } ++ ++ if (unlikely(old < 0 || old - i < 0)) ++ refcount_warn_saturate(r, REFCOUNT_SUB_UAF); ++ ++ return false; + } + ++/** ++ * refcount_dec_and_test - decrement a refcount and test if it is 0 ++ * @r: the refcount ++ * ++ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to ++ * decrement when saturated at REFCOUNT_SATURATED. ++ * ++ * Provides release memory ordering, such that prior loads and stores are done ++ * before, and provides an acquire ordering on success such that free() ++ * must come after. ++ * ++ * Return: true if the resulting refcount is 0, false otherwise ++ */ + static inline __must_check bool refcount_dec_and_test(refcount_t *r) + { +- return atomic_dec_and_test(&r->refs); ++ return refcount_sub_and_test(1, r); + } + ++/** ++ * refcount_dec - decrement a refcount ++ * @r: the refcount ++ * ++ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement ++ * when saturated at REFCOUNT_SATURATED. ++ * ++ * Provides release memory ordering, such that prior loads and stores are done ++ * before. ++ */ + static inline void refcount_dec(refcount_t *r) + { +- atomic_dec(&r->refs); ++ if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1)) ++ refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); + } +-# endif /* !CONFIG_ARCH_HAS_REFCOUNT */ +-#endif /* CONFIG_REFCOUNT_FULL */ + + extern __must_check bool refcount_dec_if_one(refcount_t *r); + extern __must_check bool refcount_dec_not_one(refcount_t *r); +diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h +index 767f62086bd9b..c326bfdb5ec2c 100644 +--- a/include/linux/tty_flip.h ++++ b/include/linux/tty_flip.h +@@ -12,7 +12,6 @@ extern int tty_insert_flip_string_fixed_flag(struct tty_port *port, + extern int tty_prepare_flip_string(struct tty_port *port, + unsigned char **chars, size_t size); + extern void tty_flip_buffer_push(struct tty_port *port); +-void tty_schedule_flip(struct tty_port *port); + int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); + + static inline int tty_insert_flip_char(struct tty_port *port, +@@ -40,4 +39,7 @@ static inline int tty_insert_flip_string(struct tty_port *port, + extern void tty_buffer_lock_exclusive(struct tty_port *port); + extern void tty_buffer_unlock_exclusive(struct tty_port *port); + ++int tty_insert_flip_string_and_push_buffer(struct tty_port *port, ++ const unsigned char *chars, size_t cnt); ++ + #endif /* _LINUX_TTY_FLIP_H */ +diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h +index fabee6db0abb7..421d41ef4e9ca 100644 +--- a/include/net/bluetooth/bluetooth.h ++++ b/include/net/bluetooth/bluetooth.h +@@ -370,6 +370,71 @@ out: + return NULL; + } + ++/* Shall not be called with lock_sock held */ ++static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, ++ struct msghdr *msg, ++ size_t len, size_t mtu, ++ size_t headroom, size_t tailroom) ++{ ++ struct sk_buff *skb; ++ size_t size = min_t(size_t, len, mtu); ++ int err; ++ ++ skb = bt_skb_send_alloc(sk, size + headroom + tailroom, ++ msg->msg_flags & MSG_DONTWAIT, &err); ++ if (!skb) ++ return ERR_PTR(err); ++ ++ skb_reserve(skb, headroom); ++ skb_tailroom_reserve(skb, mtu, tailroom); ++ ++ if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) { ++ kfree_skb(skb); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ skb->priority = sk->sk_priority; ++ ++ return skb; ++} ++ ++/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments ++ * accourding to the MTU. ++ */ ++static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, ++ struct msghdr *msg, ++ size_t len, size_t mtu, ++ size_t headroom, size_t tailroom) ++{ ++ struct sk_buff *skb, **frag; ++ ++ skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); ++ if (IS_ERR_OR_NULL(skb)) ++ return skb; ++ ++ len -= skb->len; ++ if (!len) ++ return skb; ++ ++ /* Add remaining data over MTU as continuation fragments */ ++ frag = &skb_shinfo(skb)->frag_list; ++ while (len) { ++ struct sk_buff *tmp; ++ ++ tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); ++ if (IS_ERR(tmp)) { ++ return skb; ++ } ++ ++ len -= tmp->len; ++ ++ *frag = tmp; ++ frag = &(*frag)->next; ++ } ++ ++ return skb; ++} ++ + int bt_to_errno(u16 code); + + void hci_sock_set_flag(struct sock *sk, int nr); +diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h +index 34c4436fd18ff..58db7c69c146d 100644 +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) + + static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) + { +- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) ++ if (!sk->sk_mark && ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) + return skb->mark; + + return sk->sk_mark; +@@ -375,7 +376,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) + static inline bool inet_can_nonlocal_bind(struct net *net, + struct inet_sock *inet) + { +- return net->ipv4.sysctl_ip_nonlocal_bind || ++ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || + inet->freebind || inet->transparent; + } + +diff --git a/include/net/ip.h b/include/net/ip.h +index 3f3ea86b2173c..db841ab388c0e 100644 +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -381,7 +381,7 @@ void ipfrag_init(void); + void ip_static_sysctl_init(void); + + #define IP4_REPLY_MARK(net, mark) \ +- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) ++ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) + + static inline bool ip_is_fragment(const struct iphdr *iph) + { +@@ -442,7 +442,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, + struct net *net = dev_net(dst->dev); + unsigned int mtu; + +- if (net->ipv4.sysctl_ip_fwd_use_pmtu || ++ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || + ip_mtu_locked(dst) || + !forwarding) + return dst_mtu(dst); +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 65be8bd1f0f4a..aaf1d5d5a13b0 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1373,8 +1373,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) + struct tcp_sock *tp = tcp_sk(sk); + s32 delta; + +- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || +- ca_ops->cong_control) ++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || ++ tp->packets_out || ca_ops->cong_control) + return; + delta = tcp_jiffies32 - tp->lsndtime; + if (delta > inet_csk(sk)->icsk_rto) +@@ -1465,7 +1465,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) + + static inline int tcp_fin_time(const struct sock *sk) + { +- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; ++ int fin_timeout = tcp_sk(sk)->linger2 ? : ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); + const int rto = inet_csk(sk)->icsk_rto; + + if (fin_timeout < (rto << 2) - (rto >> 1)) +@@ -1946,7 +1947,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); + static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); +- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; ++ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); + } + + /* @wake is one when sk_stream_write_space() calls us. +diff --git a/include/net/udp.h b/include/net/udp.h +index 9787a42f7ed3e..e66854e767dcc 100644 +--- a/include/net/udp.h ++++ b/include/net/udp.h +@@ -252,7 +252,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) + { + #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) +- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, ++ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), + bound_dev_if, dif, sdif); + #else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 1238ef9c569df..6b33a8a148b85 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -64,11 +64,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns + { + u8 *ptr = NULL; + +- if (k >= SKF_NET_OFF) ++ if (k >= SKF_NET_OFF) { + ptr = skb_network_header(skb) + k - SKF_NET_OFF; +- else if (k >= SKF_LL_OFF) ++ } else if (k >= SKF_LL_OFF) { ++ if (unlikely(!skb_mac_header_was_set(skb))) ++ return NULL; + ptr = skb_mac_header(skb) + k - SKF_LL_OFF; +- ++ } + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) + return ptr; + +diff --git a/kernel/events/core.c b/kernel/events/core.c +index 8336dcb2bd432..0a54780e0942d 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -5819,10 +5819,10 @@ again: + + if (!atomic_inc_not_zero(&event->rb->mmap_count)) { + /* +- * Raced against perf_mmap_close() through +- * perf_event_set_output(). Try again, hope for better +- * luck. ++ * Raced against perf_mmap_close(); remove the ++ * event and try again. + */ ++ ring_buffer_attach(event, NULL); + mutex_unlock(&event->mmap_mutex); + goto again; + } +@@ -10763,14 +10763,25 @@ err_size: + goto out; + } + ++static void mutex_lock_double(struct mutex *a, struct mutex *b) ++{ ++ if (b < a) ++ swap(a, b); ++ ++ mutex_lock(a); ++ mutex_lock_nested(b, SINGLE_DEPTH_NESTING); ++} ++ + static int + perf_event_set_output(struct perf_event *event, struct perf_event *output_event) + { + struct ring_buffer *rb = NULL; + int ret = -EINVAL; + +- if (!output_event) ++ if (!output_event) { ++ mutex_lock(&event->mmap_mutex); + goto set; ++ } + + /* don't allow circular references */ + if (event == output_event) +@@ -10808,8 +10819,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) + event->pmu != output_event->pmu) + goto out; + ++ /* ++ * Hold both mmap_mutex to serialize against perf_mmap_close(). Since ++ * output_event is already on rb->event_list, and the list iteration ++ * restarts after every removal, it is guaranteed this new event is ++ * observed *OR* if output_event is already removed, it's guaranteed we ++ * observe !rb->mmap_count. ++ */ ++ mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); + set: +- mutex_lock(&event->mmap_mutex); + /* Can't redirect output if we've got an active mmap() */ + if (atomic_read(&event->mmap_count)) + goto unlock; +@@ -10819,6 +10837,12 @@ set: + rb = ring_buffer_get(output_event); + if (!rb) + goto unlock; ++ ++ /* did we race against perf_mmap_close() */ ++ if (!atomic_read(&rb->mmap_count)) { ++ ring_buffer_put(rb); ++ goto unlock; ++ } + } + + ring_buffer_attach(event, rb); +@@ -10826,20 +10850,13 @@ set: + ret = 0; + unlock: + mutex_unlock(&event->mmap_mutex); ++ if (output_event) ++ mutex_unlock(&output_event->mmap_mutex); + + out: + return ret; + } + +-static void mutex_lock_double(struct mutex *a, struct mutex *b) +-{ +- if (b < a) +- swap(a, b); +- +- mutex_lock(a); +- mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +-} +- + static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) + { + bool nmi_safe = false; +diff --git a/lib/refcount.c b/lib/refcount.c +index 6e904af0fb3e1..ebac8b7d15a7c 100644 +--- a/lib/refcount.c ++++ b/lib/refcount.c +@@ -1,41 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + /* +- * Variant of atomic_t specialized for reference counts. +- * +- * The interface matches the atomic_t interface (to aid in porting) but only +- * provides the few functions one should use for reference counting. +- * +- * It differs in that the counter saturates at UINT_MAX and will not move once +- * there. This avoids wrapping the counter and causing 'spurious' +- * use-after-free issues. +- * +- * Memory ordering rules are slightly relaxed wrt regular atomic_t functions +- * and provide only what is strictly required for refcounts. +- * +- * The increments are fully relaxed; these will not provide ordering. The +- * rationale is that whatever is used to obtain the object we're increasing the +- * reference count on will provide the ordering. For locked data structures, +- * its the lock acquire, for RCU/lockless data structures its the dependent +- * load. +- * +- * Do note that inc_not_zero() provides a control dependency which will order +- * future stores against the inc, this ensures we'll never modify the object +- * if we did not in fact acquire a reference. +- * +- * The decrements will provide release order, such that all the prior loads and +- * stores will be issued before, it also provides a control dependency, which +- * will order us against the subsequent free(). +- * +- * The control dependency is against the load of the cmpxchg (ll/sc) that +- * succeeded. This means the stores aren't fully ordered, but this is fine +- * because the 1->0 transition indicates no concurrency. +- * +- * Note that the allocator is responsible for ordering things between free() +- * and alloc(). +- * +- * The decrements dec_and_test() and sub_and_test() also provide acquire +- * ordering on success. +- * ++ * Out-of-line refcount functions. + */ + + #include <linux/mutex.h> +@@ -43,199 +8,33 @@ + #include <linux/spinlock.h> + #include <linux/bug.h> + +-/** +- * refcount_add_not_zero_checked - add a value to a refcount unless it is 0 +- * @i: the value to add to the refcount +- * @r: the refcount +- * +- * Will saturate at UINT_MAX and WARN. +- * +- * Provides no memory ordering, it is assumed the caller has guaranteed the +- * object memory to be stable (RCU, etc.). It does provide a control dependency +- * and thereby orders future stores. See the comment on top. +- * +- * Use of this function is not recommended for the normal reference counting +- * use case in which references are taken and released one at a time. In these +- * cases, refcount_inc(), or one of its variants, should instead be used to +- * increment a reference count. +- * +- * Return: false if the passed refcount is 0, true otherwise +- */ +-bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r) +-{ +- unsigned int new, val = atomic_read(&r->refs); +- +- do { +- if (!val) +- return false; +- +- if (unlikely(val == UINT_MAX)) +- return true; +- +- new = val + i; +- if (new < val) +- new = UINT_MAX; +- +- } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); +- +- WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); +- +- return true; +-} +-EXPORT_SYMBOL(refcount_add_not_zero_checked); +- +-/** +- * refcount_add_checked - add a value to a refcount +- * @i: the value to add to the refcount +- * @r: the refcount +- * +- * Similar to atomic_add(), but will saturate at UINT_MAX and WARN. +- * +- * Provides no memory ordering, it is assumed the caller has guaranteed the +- * object memory to be stable (RCU, etc.). It does provide a control dependency +- * and thereby orders future stores. See the comment on top. +- * +- * Use of this function is not recommended for the normal reference counting +- * use case in which references are taken and released one at a time. In these +- * cases, refcount_inc(), or one of its variants, should instead be used to +- * increment a reference count. +- */ +-void refcount_add_checked(unsigned int i, refcount_t *r) +-{ +- WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n"); +-} +-EXPORT_SYMBOL(refcount_add_checked); +- +-/** +- * refcount_inc_not_zero_checked - increment a refcount unless it is 0 +- * @r: the refcount to increment +- * +- * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN. +- * +- * Provides no memory ordering, it is assumed the caller has guaranteed the +- * object memory to be stable (RCU, etc.). It does provide a control dependency +- * and thereby orders future stores. See the comment on top. +- * +- * Return: true if the increment was successful, false otherwise +- */ +-bool refcount_inc_not_zero_checked(refcount_t *r) +-{ +- unsigned int new, val = atomic_read(&r->refs); +- +- do { +- new = val + 1; +- +- if (!val) +- return false; +- +- if (unlikely(!new)) +- return true; +- +- } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); ++#define REFCOUNT_WARN(str) WARN_ONCE(1, "refcount_t: " str ".\n") + +- WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); +- +- return true; +-} +-EXPORT_SYMBOL(refcount_inc_not_zero_checked); +- +-/** +- * refcount_inc_checked - increment a refcount +- * @r: the refcount to increment +- * +- * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN. +- * +- * Provides no memory ordering, it is assumed the caller already has a +- * reference on the object. +- * +- * Will WARN if the refcount is 0, as this represents a possible use-after-free +- * condition. +- */ +-void refcount_inc_checked(refcount_t *r) ++void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t) + { +- WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n"); +-} +-EXPORT_SYMBOL(refcount_inc_checked); +- +-/** +- * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0 +- * @i: amount to subtract from the refcount +- * @r: the refcount +- * +- * Similar to atomic_dec_and_test(), but it will WARN, return false and +- * ultimately leak on underflow and will fail to decrement when saturated +- * at UINT_MAX. +- * +- * Provides release memory ordering, such that prior loads and stores are done +- * before, and provides an acquire ordering on success such that free() +- * must come after. +- * +- * Use of this function is not recommended for the normal reference counting +- * use case in which references are taken and released one at a time. In these +- * cases, refcount_dec(), or one of its variants, should instead be used to +- * decrement a reference count. +- * +- * Return: true if the resulting refcount is 0, false otherwise +- */ +-bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r) +-{ +- unsigned int new, val = atomic_read(&r->refs); +- +- do { +- if (unlikely(val == UINT_MAX)) +- return false; +- +- new = val - i; +- if (new > val) { +- WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); +- return false; +- } +- +- } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); +- +- if (!new) { +- smp_acquire__after_ctrl_dep(); +- return true; ++ refcount_set(r, REFCOUNT_SATURATED); ++ ++ switch (t) { ++ case REFCOUNT_ADD_NOT_ZERO_OVF: ++ REFCOUNT_WARN("saturated; leaking memory"); ++ break; ++ case REFCOUNT_ADD_OVF: ++ REFCOUNT_WARN("saturated; leaking memory"); ++ break; ++ case REFCOUNT_ADD_UAF: ++ REFCOUNT_WARN("addition on 0; use-after-free"); ++ break; ++ case REFCOUNT_SUB_UAF: ++ REFCOUNT_WARN("underflow; use-after-free"); ++ break; ++ case REFCOUNT_DEC_LEAK: ++ REFCOUNT_WARN("decrement hit 0; leaking memory"); ++ break; ++ default: ++ REFCOUNT_WARN("unknown saturation event!?"); + } +- return false; +- +-} +-EXPORT_SYMBOL(refcount_sub_and_test_checked); +- +-/** +- * refcount_dec_and_test_checked - decrement a refcount and test if it is 0 +- * @r: the refcount +- * +- * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to +- * decrement when saturated at UINT_MAX. +- * +- * Provides release memory ordering, such that prior loads and stores are done +- * before, and provides an acquire ordering on success such that free() +- * must come after. +- * +- * Return: true if the resulting refcount is 0, false otherwise +- */ +-bool refcount_dec_and_test_checked(refcount_t *r) +-{ +- return refcount_sub_and_test_checked(1, r); +-} +-EXPORT_SYMBOL(refcount_dec_and_test_checked); +- +-/** +- * refcount_dec_checked - decrement a refcount +- * @r: the refcount +- * +- * Similar to atomic_dec(), it will WARN on underflow and fail to decrement +- * when saturated at UINT_MAX. +- * +- * Provides release memory ordering, such that prior loads and stores are done +- * before. +- */ +-void refcount_dec_checked(refcount_t *r) +-{ +- WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n"); + } +-EXPORT_SYMBOL(refcount_dec_checked); ++EXPORT_SYMBOL(refcount_warn_saturate); + + /** + * refcount_dec_if_one - decrement a refcount if it is 1 +@@ -277,7 +76,7 @@ bool refcount_dec_not_one(refcount_t *r) + unsigned int new, val = atomic_read(&r->refs); + + do { +- if (unlikely(val == UINT_MAX)) ++ if (unlikely(val == REFCOUNT_SATURATED)) + return true; + + if (val == 1) +@@ -302,7 +101,7 @@ EXPORT_SYMBOL(refcount_dec_not_one); + * @lock: the mutex to be locked + * + * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail +- * to decrement when saturated at UINT_MAX. ++ * to decrement when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. +@@ -333,7 +132,7 @@ EXPORT_SYMBOL(refcount_dec_and_mutex_lock); + * @lock: the spinlock to be locked + * + * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to +- * decrement when saturated at UINT_MAX. ++ * decrement when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index d79ab5116a7be..f7b231f67156b 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -348,7 +348,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol, + */ + static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) + { +- if (!pol) ++ if (!pol || pol->mode == MPOL_LOCAL) + return; + if (!mpol_store_user_nodemask(pol) && !(pol->flags & MPOL_F_LOCAL) && + nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) +diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c +index 3a9e9d9670be4..83a8c48dfaa8b 100644 +--- a/net/bluetooth/rfcomm/core.c ++++ b/net/bluetooth/rfcomm/core.c +@@ -553,22 +553,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel) + return dlc; + } + ++static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag) ++{ ++ int len = frag->len; ++ ++ BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); ++ ++ if (len > d->mtu) ++ return -EINVAL; ++ ++ rfcomm_make_uih(frag, d->addr); ++ __skb_queue_tail(&d->tx_queue, frag); ++ ++ return len; ++} ++ + int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) + { +- int len = skb->len; ++ unsigned long flags; ++ struct sk_buff *frag, *next; ++ int len; + + if (d->state != BT_CONNECTED) + return -ENOTCONN; + +- BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); ++ frag = skb_shinfo(skb)->frag_list; ++ skb_shinfo(skb)->frag_list = NULL; + +- if (len > d->mtu) +- return -EINVAL; ++ /* Queue all fragments atomically. */ ++ spin_lock_irqsave(&d->tx_queue.lock, flags); + +- rfcomm_make_uih(skb, d->addr); +- skb_queue_tail(&d->tx_queue, skb); ++ len = rfcomm_dlc_send_frag(d, skb); ++ if (len < 0 || !frag) ++ goto unlock; ++ ++ for (; frag; frag = next) { ++ int ret; ++ ++ next = frag->next; ++ ++ ret = rfcomm_dlc_send_frag(d, frag); ++ if (ret < 0) { ++ kfree_skb(frag); ++ goto unlock; ++ } ++ ++ len += ret; ++ } ++ ++unlock: ++ spin_unlock_irqrestore(&d->tx_queue.lock, flags); + +- if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) ++ if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags)) + rfcomm_schedule(); + return len; + } +diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c +index 90bb53aa4beed..e67310a749d27 100644 +--- a/net/bluetooth/rfcomm/sock.c ++++ b/net/bluetooth/rfcomm/sock.c +@@ -578,46 +578,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, + lock_sock(sk); + + sent = bt_sock_wait_ready(sk, msg->msg_flags); +- if (sent) +- goto done; +- +- while (len) { +- size_t size = min_t(size_t, len, d->mtu); +- int err; +- +- skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, +- msg->msg_flags & MSG_DONTWAIT, &err); +- if (!skb) { +- if (sent == 0) +- sent = err; +- break; +- } +- skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); +- +- err = memcpy_from_msg(skb_put(skb, size), msg, size); +- if (err) { +- kfree_skb(skb); +- if (sent == 0) +- sent = err; +- break; +- } + +- skb->priority = sk->sk_priority; ++ release_sock(sk); + +- err = rfcomm_dlc_send(d, skb); +- if (err < 0) { +- kfree_skb(skb); +- if (sent == 0) +- sent = err; +- break; +- } ++ if (sent) ++ return sent; + +- sent += size; +- len -= size; +- } ++ skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, ++ RFCOMM_SKB_TAIL_RESERVE); ++ if (IS_ERR(skb)) ++ return PTR_ERR(skb); + +-done: +- release_sock(sk); ++ sent = rfcomm_dlc_send(d, skb); ++ if (sent < 0) ++ kfree_skb(skb); + + return sent; + } +diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c +index fbfb12e430101..78a549e506b12 100644 +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -279,12 +279,10 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) + return err; + } + +-static int sco_send_frame(struct sock *sk, void *buf, int len, +- unsigned int msg_flags) ++static int sco_send_frame(struct sock *sk, struct sk_buff *skb) + { + struct sco_conn *conn = sco_pi(sk)->conn; +- struct sk_buff *skb; +- int err; ++ int len = skb->len; + + /* Check outgoing MTU */ + if (len > conn->mtu) +@@ -292,11 +290,6 @@ static int sco_send_frame(struct sock *sk, void *buf, int len, + + BT_DBG("sk %p len %d", sk, len); + +- skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err); +- if (!skb) +- return err; +- +- memcpy(skb_put(skb, len), buf, len); + hci_send_sco(conn->hcon, skb); + + return len; +@@ -715,7 +708,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) + { + struct sock *sk = sock->sk; +- void *buf; ++ struct sk_buff *skb; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); +@@ -727,24 +720,21 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + +- buf = kmalloc(len, GFP_KERNEL); +- if (!buf) +- return -ENOMEM; +- +- if (memcpy_from_msg(buf, msg, len)) { +- kfree(buf); +- return -EFAULT; +- } ++ skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); ++ if (IS_ERR(skb)) ++ return PTR_ERR(skb); + + lock_sock(sk); + + if (sk->sk_state == BT_CONNECTED) +- err = sco_send_frame(sk, buf, len, msg->msg_flags); ++ err = sco_send_frame(sk, skb); + else + err = -ENOTCONN; + + release_sock(sk); +- kfree(buf); ++ ++ if (err < 0) ++ kfree_skb(skb); + return err; + } + +diff --git a/net/core/filter.c b/net/core/filter.c +index 75f53b5e63893..72bf78032f458 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -5839,7 +5839,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len + if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) + return -EINVAL; + +- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) ++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) + return -EINVAL; + + if (!th->ack || th->rst || th->syn) +@@ -5914,7 +5914,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, + if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) + return -EINVAL; + +- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) ++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) + return -ENOENT; + + if (!th->syn || th->ack || th->fin || th->rst) +diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c +index a1867c65ac632..6d86506e315f3 100644 +--- a/net/core/secure_seq.c ++++ b/net/core/secure_seq.c +@@ -65,7 +65,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, + .daddr = *(struct in6_addr *)daddr, + }; + +- if (net->ipv4.sysctl_tcp_timestamps != 1) ++ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) + return 0; + + ts_secret_init(); +@@ -121,7 +121,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); + #ifdef CONFIG_INET + u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) + { +- if (net->ipv4.sysctl_tcp_timestamps != 1) ++ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) + return 0; + + ts_secret_init(); +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 9ab73fcc7411c..d61ca7be6eda6 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -219,7 +219,7 @@ int inet_listen(struct socket *sock, int backlog) + * because the socket was in TCP_LISTEN state previously but + * was shutdown() rather than close(). + */ +- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; ++ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); + if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (tcp_fastopen & TFO_SERVER_ENABLE) && + !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { +@@ -337,7 +337,7 @@ lookup_protocol: + inet->hdrincl = 1; + } + +- if (net->ipv4.sysctl_ip_no_pmtu_disc) ++ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) + inet->pmtudisc = IP_PMTUDISC_DONT; + else + inet->pmtudisc = IP_PMTUDISC_WANT; +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index 16fe034615635..28da0443f3e9e 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -2209,7 +2209,7 @@ void fib_select_multipath(struct fib_result *res, int hash) + } + + change_nexthops(fi) { +- if (net->ipv4.sysctl_fib_multipath_use_neigh) { ++ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { + if (!fib_good_nh(nexthop_nh)) + continue; + if (!first) { +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index 9bc01411be4cc..b44f51e404aee 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -886,7 +886,7 @@ static bool icmp_unreach(struct sk_buff *skb) + * values please see + * Documentation/networking/ip-sysctl.txt + */ +- switch (net->ipv4.sysctl_ip_no_pmtu_disc) { ++ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { + default: + net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", + &iph->daddr); +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c +index cac2fdd08df05..660b41040c771 100644 +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -469,7 +469,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, + + if (pmc->multiaddr == IGMP_ALL_HOSTS) + return skb; +- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) ++ if (ipv4_is_local_multicast(pmc->multiaddr) && ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + return skb; + + mtu = READ_ONCE(dev->mtu); +@@ -595,7 +596,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) + if (pmc->multiaddr == IGMP_ALL_HOSTS) + continue; + if (ipv4_is_local_multicast(pmc->multiaddr) && +- !net->ipv4.sysctl_igmp_llm_reports) ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + continue; + spin_lock_bh(&pmc->lock); + if (pmc->sfcount[MCAST_EXCLUDE]) +@@ -738,7 +739,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, + if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) + return igmpv3_send_report(in_dev, pmc); + +- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) ++ if (ipv4_is_local_multicast(group) && ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + return 0; + + if (type == IGMP_HOST_LEAVE_MESSAGE) +@@ -922,7 +924,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) + + if (group == IGMP_ALL_HOSTS) + return false; +- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) ++ if (ipv4_is_local_multicast(group) && ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + return false; + + rcu_read_lock(); +@@ -1047,7 +1050,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, + if (im->multiaddr == IGMP_ALL_HOSTS) + continue; + if (ipv4_is_local_multicast(im->multiaddr) && +- !net->ipv4.sysctl_igmp_llm_reports) ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + continue; + spin_lock_bh(&im->lock); + if (im->tm_running) +@@ -1298,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) + #ifdef CONFIG_IP_MULTICAST + if (im->multiaddr == IGMP_ALL_HOSTS) + return; +- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) ++ if (ipv4_is_local_multicast(im->multiaddr) && ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + return; + + reporter = im->reporter; +@@ -1340,7 +1344,8 @@ static void igmp_group_added(struct ip_mc_list *im) + #ifdef CONFIG_IP_MULTICAST + if (im->multiaddr == IGMP_ALL_HOSTS) + return; +- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) ++ if (ipv4_is_local_multicast(im->multiaddr) && ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + return; + + if (in_dev->dead) +@@ -1644,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) + if (im->multiaddr == IGMP_ALL_HOSTS) + continue; + if (ipv4_is_local_multicast(im->multiaddr) && +- !net->ipv4.sysctl_igmp_llm_reports) ++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) + continue; + + /* a failover is happening and switches +@@ -2194,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, + count++; + } + err = -ENOBUFS; +- if (count >= net->ipv4.sysctl_igmp_max_memberships) ++ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships)) + goto done; + iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); + if (!iml) +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 9280e50871596..7004e379c325f 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1423,7 +1423,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) + struct fib_info *fi = res->fi; + u32 mtu = 0; + +- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || ++ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || + fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) + mtu = fi->fib_mtu; + +diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c +index 6811174ad5189..3f6c9514c7a93 100644 +--- a/net/ipv4/syncookies.c ++++ b/net/ipv4/syncookies.c +@@ -243,12 +243,12 @@ bool cookie_timestamp_decode(const struct net *net, + return true; + } + +- if (!net->ipv4.sysctl_tcp_timestamps) ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) + return false; + + tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; + +- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) ++ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) + return false; + + if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) +@@ -257,7 +257,7 @@ bool cookie_timestamp_decode(const struct net *net, + tcp_opt->wscale_ok = 1; + tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; + +- return net->ipv4.sysctl_tcp_window_scaling != 0; ++ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; + } + EXPORT_SYMBOL(cookie_timestamp_decode); + +@@ -297,7 +297,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) + struct flowi4 fl4; + u32 tsoff = 0; + +- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) ++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || ++ !th->ack || th->rst) + goto out; + + if (tcp_synq_no_recent_overflow(sk)) +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 4815cf72569e0..4b31f6e9ec61f 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -437,7 +437,7 @@ void tcp_init_sock(struct sock *sk) + tp->snd_cwnd_clamp = ~0; + tp->mss_cache = TCP_MSS_DEFAULT; + +- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; ++ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); + tcp_assign_congestion_control(sk); + + tp->tsoffset = 0; +@@ -1148,7 +1148,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + struct sockaddr *uaddr = msg->msg_name; + int err, flags; + +- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || ++ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & ++ TFO_CLIENT_ENABLE) || + (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && + uaddr->sa_family == AF_UNSPEC)) + return -EOPNOTSUPP; +@@ -3127,7 +3128,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, + case TCP_FASTOPEN_CONNECT: + if (val > 1 || val < 0) { + err = -EINVAL; +- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { ++ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & ++ TFO_CLIENT_ENABLE) { + if (sk->sk_state == TCP_CLOSE) + tp->fastopen_connect = val; + else +@@ -3466,7 +3468,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, + case TCP_LINGER2: + val = tp->linger2; + if (val >= 0) +- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; ++ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; + break; + case TCP_DEFER_ACCEPT: + val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, +diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c +index a5ec77a5ad6f5..21705b2ddaffa 100644 +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -349,7 +349,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk, + const struct dst_entry *dst, + int flag) + { +- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || ++ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || + tcp_sk(sk)->fastopen_no_cookie || + (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); + } +@@ -364,7 +364,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + const struct dst_entry *dst) + { + bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; +- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; ++ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + struct sock *child; + int ret = 0; +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 0808110451a0f..c151c4dd4ae63 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -905,7 +905,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, + tp->undo_marker ? tp->undo_retrans : 0); + #endif + tp->reordering = min_t(u32, (metric + mss - 1) / mss, +- sock_net(sk)->ipv4.sysctl_tcp_max_reordering); ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); + } + + /* This exciting event is worth to be remembered. 8) */ +@@ -1886,7 +1886,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) + return; + + tp->reordering = min_t(u32, tp->packets_out + addend, +- sock_net(sk)->ipv4.sysctl_tcp_max_reordering); ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); + tp->reord_seen++; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); + } +@@ -1950,7 +1950,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp) + + static bool tcp_is_rack(const struct sock *sk) + { +- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION; ++ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & ++ TCP_RACK_LOSS_DETECTION; + } + + /* If we detect SACK reneging, forget all SACK information +@@ -1994,6 +1995,7 @@ void tcp_enter_loss(struct sock *sk) + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; ++ u8 reordering; + + tcp_timeout_mark_lost(sk); + +@@ -2014,10 +2016,12 @@ void tcp_enter_loss(struct sock *sk) + /* Timeout in disordered state after receiving substantial DUPACKs + * suggests that the degree of reordering is over-estimated. + */ ++ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); + if (icsk->icsk_ca_state <= TCP_CA_Disorder && +- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) ++ tp->sacked_out >= reordering) + tp->reordering = min_t(unsigned int, tp->reordering, +- net->ipv4.sysctl_tcp_reordering); ++ reordering); ++ + tcp_set_ca_state(sk, TCP_CA_Loss); + tp->high_seq = tp->snd_nxt; + tcp_ecn_queue_cwr(tp); +@@ -3319,7 +3323,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) + * new SACK or ECE mark may first advance cwnd here and later reduce + * cwnd in tcp_fastretrans_alert() based on more states. + */ +- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) ++ if (tcp_sk(sk)->reordering > ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering)) + return flag & FLAG_FORWARD_PROGRESS; + + return flag & FLAG_DATA_ACKED; +@@ -3902,7 +3907,7 @@ void tcp_parse_options(const struct net *net, + break; + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && th->syn && +- !estab && net->ipv4.sysctl_tcp_window_scaling) { ++ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) { + __u8 snd_wscale = *(__u8 *)ptr; + opt_rx->wscale_ok = 1; + if (snd_wscale > TCP_MAX_WSCALE) { +@@ -3918,7 +3923,7 @@ void tcp_parse_options(const struct net *net, + case TCPOPT_TIMESTAMP: + if ((opsize == TCPOLEN_TIMESTAMP) && + ((estab && opt_rx->tstamp_ok) || +- (!estab && net->ipv4.sysctl_tcp_timestamps))) { ++ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) { + opt_rx->saw_tstamp = 1; + opt_rx->rcv_tsval = get_unaligned_be32(ptr); + opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); +@@ -3926,7 +3931,7 @@ void tcp_parse_options(const struct net *net, + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM && th->syn && +- !estab && net->ipv4.sysctl_tcp_sack) { ++ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) { + opt_rx->sack_ok = TCP_SACK_SEEN; + tcp_sack_reset(opt_rx); + } +@@ -5351,7 +5356,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) + struct tcp_sock *tp = tcp_sk(sk); + u32 ptr = ntohs(th->urg_ptr); + +- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) ++ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg)) + ptr--; + ptr += ntohl(th->seq); + +@@ -6530,11 +6535,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) + { + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + const char *msg = "Dropping request"; +- bool want_cookie = false; + struct net *net = sock_net(sk); ++ bool want_cookie = false; ++ u8 syncookies; ++ ++ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); + + #ifdef CONFIG_SYN_COOKIES +- if (net->ipv4.sysctl_tcp_syncookies) { ++ if (syncookies) { + msg = "Sending cookies"; + want_cookie = true; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); +@@ -6542,8 +6550,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) + #endif + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + +- if (!queue->synflood_warned && +- net->ipv4.sysctl_tcp_syncookies != 2 && ++ if (!queue->synflood_warned && syncookies != 2 && + xchg(&queue->synflood_warned, 1) == 0) + net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", + proto, sk->sk_num, msg); +@@ -6578,7 +6585,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + +- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 && + !inet_csk_reqsk_queue_is_full(sk)) + return 0; + +@@ -6612,13 +6619,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, + bool want_cookie = false; + struct dst_entry *dst; + struct flowi fl; ++ u8 syncookies; ++ ++ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); + + /* TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ +- if ((net->ipv4.sysctl_tcp_syncookies == 2 || +- inet_csk_reqsk_queue_is_full(sk)) && !isn) { ++ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { + want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); + if (!want_cookie) + goto drop; +@@ -6668,10 +6677,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, + goto drop_and_free; + + if (!want_cookie && !isn) { ++ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); ++ + /* Kill the following clause, if you dislike this way. */ +- if (!net->ipv4.sysctl_tcp_syncookies && +- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < +- (net->ipv4.sysctl_max_syn_backlog >> 2)) && ++ if (!syncookies && ++ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) < ++ (max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst)) { + /* Without syncookies last quarter of + * backlog is filled with destinations, +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index 72fe93ace7d73..b95e1a3487c8b 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -105,10 +105,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) + + int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) + { ++ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); + const struct inet_timewait_sock *tw = inet_twsk(sktw); + const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); + struct tcp_sock *tp = tcp_sk(sk); +- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse; + + if (reuse == 2) { + /* Still does not detect *everything* that goes through +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index c4848e7a0aad1..9a7d8a5998578 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -425,7 +425,8 @@ void tcp_update_metrics(struct sock *sk) + if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { + val = tcp_metric_get(tm, TCP_METRIC_REORDERING); + if (val < tp->reordering && +- tp->reordering != net->ipv4.sysctl_tcp_reordering) ++ tp->reordering != ++ READ_ONCE(net->ipv4.sysctl_tcp_reordering)) + tcp_metric_set(tm, TCP_METRIC_REORDERING, + tp->reordering); + } +diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c +index 9b038cb0a43d2..324f43fadb37a 100644 +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -180,7 +180,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, + * Oh well... nobody has a sufficient solution to this + * protocol bug yet. + */ +- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { ++ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { + kill: + inet_twsk_deschedule_put(tw); + return TCP_TW_SUCCESS; +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 739fc69cdcc62..97f29ece38000 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -620,18 +620,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, + opts->mss = tcp_advertise_mss(sk); + remaining -= TCPOLEN_MSS_ALIGNED; + +- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { ++ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { + opts->options |= OPTION_TS; + opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; + opts->tsecr = tp->rx_opt.ts_recent; + remaining -= TCPOLEN_TSTAMP_ALIGNED; + } +- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { ++ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) { + opts->ws = tp->rx_opt.rcv_wscale; + opts->options |= OPTION_WSCALE; + remaining -= TCPOLEN_WSCALE_ALIGNED; + } +- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { ++ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) { + opts->options |= OPTION_SACK_ADVERTISE; + if (unlikely(!(OPTION_TS & opts->options))) + remaining -= TCPOLEN_SACKPERM_ALIGNED; +@@ -1494,7 +1494,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ +- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); ++ mss_now = max(mss_now, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss)); + return mss_now; + } + +@@ -1537,10 +1538,10 @@ void tcp_mtup_init(struct sock *sk) + struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); + +- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; ++ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; + icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + + icsk->icsk_af_ops->net_header_len; +- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); ++ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss)); + icsk->icsk_mtup.probe_size = 0; + if (icsk->icsk_mtup.enabled) + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; +@@ -1672,7 +1673,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) + if (tp->packets_out > tp->snd_cwnd_used) + tp->snd_cwnd_used = tp->packets_out; + +- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) && + (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && + !ca_ops->cong_control) + tcp_cwnd_application_limited(sk); +@@ -2051,7 +2052,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) + u32 interval; + s32 delta; + +- interval = net->ipv4.sysctl_tcp_probe_interval; ++ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval); + delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; + if (unlikely(delta >= interval * HZ)) { + int mss = tcp_current_mss(sk); +@@ -2133,7 +2134,7 @@ static int tcp_mtu_probe(struct sock *sk) + * probing process by not resetting search range to its orignal. + */ + if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || +- interval < net->ipv4.sysctl_tcp_probe_threshold) { ++ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) { + /* Check whether enough time has elaplased for + * another round of probing. + */ +@@ -2508,7 +2509,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) + if (rcu_access_pointer(tp->fastopen_rsk)) + return false; + +- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; ++ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans); + /* Schedule a loss probe in 2*RTT for SACK capable connections + * not in loss recovery, that are either limited by cwnd or application. + */ +@@ -2870,7 +2871,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, + struct sk_buff *skb = to, *tmp; + bool first = true; + +- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) ++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)) + return; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) + return; +@@ -3406,7 +3407,7 @@ static void tcp_connect_init(struct sock *sk) + * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. + */ + tp->tcp_header_len = sizeof(struct tcphdr); +- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) + tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; + + #ifdef CONFIG_TCP_MD5SIG +@@ -3442,7 +3443,7 @@ static void tcp_connect_init(struct sock *sk) + tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), + &tp->rcv_wnd, + &tp->window_clamp, +- sock_net(sk)->ipv4.sysctl_tcp_window_scaling, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), + &rcv_wscale, + rcv_wnd); + +@@ -3846,7 +3847,7 @@ void tcp_send_probe0(struct sock *sk) + + icsk->icsk_probes_out++; + if (err <= 0) { +- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) ++ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) + icsk->icsk_backoff++; + timeout = tcp_probe0_when(sk, TCP_RTO_MAX); + } else { +diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c +index 8757bb6cb1d93..22ec8dcc1428a 100644 +--- a/net/ipv4/tcp_recovery.c ++++ b/net/ipv4/tcp_recovery.c +@@ -33,7 +33,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) + return 0; + + if (tp->sacked_out >= tp->reordering && +- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH)) ++ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & ++ TCP_RACK_NO_DUPTHRESH)) + return 0; + } + +@@ -204,7 +205,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) + { + struct tcp_sock *tp = tcp_sk(sk); + +- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || ++ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & ++ TCP_RACK_STATIC_REO_WND) || + !rs->prior_delivered) + return; + +diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c +index fa2ae96ecdc40..a0107eb02ae4c 100644 +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) + */ + static int tcp_orphan_retries(struct sock *sk, bool alive) + { +- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ ++ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ + + /* We know from an ICMP that something is wrong. */ + if (sk->sk_err_soft && !alive) +@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) + int mss; + + /* Black hole detection */ +- if (!net->ipv4.sysctl_tcp_mtu_probing) ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) + return; + + if (!icsk->icsk_mtup.enabled) { +@@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; + } else { + mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; +- mss = min(net->ipv4.sysctl_tcp_base_mss, mss); +- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); +- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); ++ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); ++ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); ++ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + } + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); +@@ -245,7 +245,7 @@ static int tcp_write_timeout(struct sock *sk) + retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + expired = icsk->icsk_retransmits >= retry_until; + } else { +- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { ++ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { + /* Black hole detection */ + tcp_mtu_probing(icsk, sk); + +@@ -254,7 +254,7 @@ static int tcp_write_timeout(struct sock *sk) + sk_rethink_txhash(sk); + } + +- retry_until = net->ipv4.sysctl_tcp_retries2; ++ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); + if (sock_flag(sk, SOCK_DEAD)) { + const bool alive = icsk->icsk_rto < TCP_RTO_MAX; + +@@ -381,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk) + msecs_to_jiffies(icsk->icsk_user_timeout)) + goto abort; + +- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; ++ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); + if (sock_flag(sk, SOCK_DEAD)) { + const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; + +@@ -569,7 +569,7 @@ out_reset_timer: + * linear-timeout retransmissions into a black hole + */ + if (sk->sk_state == TCP_ESTABLISHED && +- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && ++ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && + tcp_stream_is_thin(tp) && + icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { + icsk->icsk_backoff = 0; +@@ -580,7 +580,7 @@ out_reset_timer: + } + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); +- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) ++ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) + __sk_dst_reset(sk); + + out:; +diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c +index 942da168f18fb..56f396ecc26b7 100644 +--- a/net/ipv6/af_inet6.c ++++ b/net/ipv6/af_inet6.c +@@ -222,7 +222,7 @@ lookup_protocol: + inet->mc_list = NULL; + inet->rcv_tos = 0; + +- if (net->ipv4.sysctl_ip_no_pmtu_disc) ++ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) + inet->pmtudisc = IP_PMTUDISC_DONT; + else + inet->pmtudisc = IP_PMTUDISC_WANT; +diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c +index 37ab254f7b92d..7e55505465949 100644 +--- a/net/ipv6/syncookies.c ++++ b/net/ipv6/syncookies.c +@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) + __u8 rcv_wscale; + u32 tsoff = 0; + +- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) ++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || ++ !th->ack || th->rst) + goto out; + + if (tcp_synq_no_recent_overflow(sk)) +diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c +index bb370a7948f42..363a64c124144 100644 +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) + if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && + ret != RTN_LOCAL && + !sp->inet.freebind && +- !net->ipv4.sysctl_ip_nonlocal_bind) ++ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) + return 0; + + if (ipv6_only_sock(sctp_opt2sk(sp))) +diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c +index abb93f7343c53..2c3cf47d730bb 100644 +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -94,13 +94,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) + unsigned long flags; + + spin_lock_irqsave(&tls_device_lock, flags); ++ if (unlikely(!refcount_dec_and_test(&ctx->refcount))) ++ goto unlock; ++ + list_move_tail(&ctx->list, &tls_device_gc_list); + + /* schedule_work inside the spinlock + * to make sure tls_device_down waits for that work. + */ + schedule_work(&tls_device_gc_work); +- ++unlock: + spin_unlock_irqrestore(&tls_device_lock, flags); + } + +@@ -191,8 +194,7 @@ static void tls_device_sk_destruct(struct sock *sk) + clean_acked_data_disable(inet_csk(sk)); + } + +- if (refcount_dec_and_test(&tls_ctx->refcount)) +- tls_device_queue_ctx_destruction(tls_ctx); ++ tls_device_queue_ctx_destruction(tls_ctx); + } + + void tls_device_free_resources_tx(struct sock *sk) +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index 3ecb77c58c44e..28a8cdef8e51f 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -2679,8 +2679,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, + *num_xfrms = 0; + return 0; + } +- if (IS_ERR(pols[0])) ++ if (IS_ERR(pols[0])) { ++ *num_pols = 0; + return PTR_ERR(pols[0]); ++ } + + *num_xfrms = pols[0]->xfrm_nr; + +@@ -2695,6 +2697,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, + if (pols[1]) { + if (IS_ERR(pols[1])) { + xfrm_pols_put(pols, *num_pols); ++ *num_pols = 0; + return PTR_ERR(pols[1]); + } + (*num_pols)++; +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index 268bba29bb603..bee1a8143d75f 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -2488,7 +2488,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) + int err; + + if (family == AF_INET && +- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) ++ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) + x->props.flags |= XFRM_STATE_NOPMTUDISC; + + err = -EPROTONOSUPPORT; +diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig +index 748f3ee27b23d..44b3315f32352 100644 +--- a/security/integrity/ima/Kconfig ++++ b/security/integrity/ima/Kconfig +@@ -69,10 +69,9 @@ choice + hash, defined as 20 bytes, and a null terminated pathname, + limited to 255 characters. The 'ima-ng' measurement list + template permits both larger hash digests and longer +- pathnames. ++ pathnames. The configured default template can be replaced ++ by specifying "ima_template=" on the boot command line. + +- config IMA_TEMPLATE +- bool "ima" + config IMA_NG_TEMPLATE + bool "ima-ng (default)" + config IMA_SIG_TEMPLATE +@@ -82,7 +81,6 @@ endchoice + config IMA_DEFAULT_TEMPLATE + string + depends on IMA +- default "ima" if IMA_TEMPLATE + default "ima-ng" if IMA_NG_TEMPLATE + default "ima-sig" if IMA_SIG_TEMPLATE + +@@ -102,15 +100,15 @@ choice + + config IMA_DEFAULT_HASH_SHA256 + bool "SHA256" +- depends on CRYPTO_SHA256=y && !IMA_TEMPLATE ++ depends on CRYPTO_SHA256=y + + config IMA_DEFAULT_HASH_SHA512 + bool "SHA512" +- depends on CRYPTO_SHA512=y && !IMA_TEMPLATE ++ depends on CRYPTO_SHA512=y + + config IMA_DEFAULT_HASH_WP512 + bool "WP512" +- depends on CRYPTO_WP512=y && !IMA_TEMPLATE ++ depends on CRYPTO_WP512=y + endchoice + + config IMA_DEFAULT_HASH +diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c +index a073e49d5cd7d..14aef74d3588a 100644 +--- a/security/integrity/ima/ima_policy.c ++++ b/security/integrity/ima/ima_policy.c +@@ -1542,6 +1542,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id) + if (id >= READING_MAX_ID) + return false; + ++ if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE) ++ && security_locked_down(LOCKDOWN_KEXEC)) ++ return false; ++ + func = read_idmap[id] ?: FILE_CHECK; + + rcu_read_lock(); +diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c +index fe1ea03582cbb..9fc7c81ec6ae5 100644 +--- a/sound/core/memalloc.c ++++ b/sound/core/memalloc.c +@@ -124,6 +124,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, + if (WARN_ON(!device)) + return -EINVAL; + ++ size = PAGE_ALIGN(size); + dmab->dev.type = type; + dmab->dev.dev = device; + dmab->bytes = 0; |