summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2022-06-22 09:25:14 -0400
committerMike Pagano <mpagano@gentoo.org>2022-06-22 09:25:14 -0400
commitd0ea94c7ad1cdd7b12f4f977756b062563ba9b17 (patch)
treed89fb44a045ea7462689d675952b3902a8cee560
parentLinux patch 5.4.200 (diff)
downloadlinux-patches-d0ea94c7ad1cdd7b12f4f977756b062563ba9b17.tar.gz
linux-patches-d0ea94c7ad1cdd7b12f4f977756b062563ba9b17.tar.bz2
linux-patches-d0ea94c7ad1cdd7b12f4f977756b062563ba9b17.zip
Update wireguard patch for 5.45.4-204
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rwxr-xr-x2400_wireguard-backport-v1.0.20210606.patch1594
1 files changed, 908 insertions, 686 deletions
diff --git a/2400_wireguard-backport-v1.0.20210606.patch b/2400_wireguard-backport-v1.0.20210606.patch
index a5b7b802..0c615d4d 100755
--- a/2400_wireguard-backport-v1.0.20210606.patch
+++ b/2400_wireguard-backport-v1.0.20210606.patch
@@ -10504,9 +10504,9 @@ exit 0
+$output=pop and open STDOUT,">$output";
+print $code;
+close STDOUT;
---- /dev/null
+--- b/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
-@@ -0,0 +1,106 @@
+@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -10609,9 +10609,6 @@ exit 0
+ blake2s_final(&state, out);
+}
+
-+void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
-+ const size_t keylen);
-+
+#endif /* BLAKE2S_H */
--- b/include/crypto/internal/blake2s.h
+++ b/include/crypto/internal/blake2s.h
@@ -10640,123 +10637,9 @@ exit 0
+}
+
+#endif /* BLAKE2S_INTERNAL_H */
---- /dev/null
-+++ b/lib/crypto/blake2s-generic.c
-@@ -0,0 +1,111 @@
-+// SPDX-License-Identifier: GPL-2.0 OR MIT
-+/*
-+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
-+ *
-+ * This is an implementation of the BLAKE2s hash and PRF functions.
-+ *
-+ * Information: https://blake2.net/
-+ *
-+ */
-+
-+#include <crypto/internal/blake2s.h>
-+#include <linux/types.h>
-+#include <linux/string.h>
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/bug.h>
-+#include <asm/unaligned.h>
-+
-+static const u8 blake2s_sigma[10][16] = {
-+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
-+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
-+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
-+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
-+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
-+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
-+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
-+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
-+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
-+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
-+};
-+
-+static inline void blake2s_increment_counter(struct blake2s_state *state,
-+ const u32 inc)
-+{
-+ state->t[0] += inc;
-+ state->t[1] += (state->t[0] < inc);
-+}
-+
-+void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
-+ size_t nblocks, const u32 inc)
-+{
-+ u32 m[16];
-+ u32 v[16];
-+ int i;
-+
-+ WARN_ON(IS_ENABLED(DEBUG) &&
-+ (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
-+
-+ while (nblocks > 0) {
-+ blake2s_increment_counter(state, inc);
-+ memcpy(m, block, BLAKE2S_BLOCK_SIZE);
-+ le32_to_cpu_array(m, ARRAY_SIZE(m));
-+ memcpy(v, state->h, 32);
-+ v[ 8] = BLAKE2S_IV0;
-+ v[ 9] = BLAKE2S_IV1;
-+ v[10] = BLAKE2S_IV2;
-+ v[11] = BLAKE2S_IV3;
-+ v[12] = BLAKE2S_IV4 ^ state->t[0];
-+ v[13] = BLAKE2S_IV5 ^ state->t[1];
-+ v[14] = BLAKE2S_IV6 ^ state->f[0];
-+ v[15] = BLAKE2S_IV7 ^ state->f[1];
-+
-+#define G(r, i, a, b, c, d) do { \
-+ a += b + m[blake2s_sigma[r][2 * i + 0]]; \
-+ d = ror32(d ^ a, 16); \
-+ c += d; \
-+ b = ror32(b ^ c, 12); \
-+ a += b + m[blake2s_sigma[r][2 * i + 1]]; \
-+ d = ror32(d ^ a, 8); \
-+ c += d; \
-+ b = ror32(b ^ c, 7); \
-+} while (0)
-+
-+#define ROUND(r) do { \
-+ G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
-+ G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
-+ G(r, 2, v[2], v[ 6], v[10], v[14]); \
-+ G(r, 3, v[3], v[ 7], v[11], v[15]); \
-+ G(r, 4, v[0], v[ 5], v[10], v[15]); \
-+ G(r, 5, v[1], v[ 6], v[11], v[12]); \
-+ G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
-+ G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
-+} while (0)
-+ ROUND(0);
-+ ROUND(1);
-+ ROUND(2);
-+ ROUND(3);
-+ ROUND(4);
-+ ROUND(5);
-+ ROUND(6);
-+ ROUND(7);
-+ ROUND(8);
-+ ROUND(9);
-+
-+#undef G
-+#undef ROUND
-+
-+ for (i = 0; i < 8; ++i)
-+ state->h[i] ^= v[i] ^ v[i + 8];
-+
-+ block += BLAKE2S_BLOCK_SIZE;
-+ --nblocks;
-+ }
-+}
-+
-+EXPORT_SYMBOL(blake2s_compress_generic);
-+
-+MODULE_LICENSE("GPL v2");
-+MODULE_DESCRIPTION("BLAKE2s hash function");
-+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
---- /dev/null
+--- b/lib/crypto/blake2s-selftest.c
+++ b/lib/crypto/blake2s-selftest.c
-@@ -0,0 +1,622 @@
+@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -10774,7 +10657,6 @@ exit 0
+ * #include <stdio.h>
+ *
+ * #include <openssl/evp.h>
-+ * #include <openssl/hmac.h>
+ *
+ * #define BLAKE2S_TESTVEC_COUNT 256
+ *
@@ -10817,16 +10699,6 @@ exit 0
+ * }
+ * printf("};\n\n");
+ *
-+ * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n");
-+ *
-+ * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL);
-+ * print_vec(hash, BLAKE2S_OUTBYTES);
-+ *
-+ * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL);
-+ * print_vec(hash, BLAKE2S_OUTBYTES);
-+ *
-+ * printf("};\n");
-+ *
+ * return 0;
+ *}
+ */
@@ -11313,15 +11185,6 @@ exit 0
+ 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, },
+};
+
-+static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
-+ { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70,
-+ 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79,
-+ 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, },
-+ { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9,
-+ 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f,
-+ 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, },
-+};
-+
+bool __init blake2s_selftest(void)
+{
+ u8 key[BLAKE2S_KEY_SIZE];
@@ -11366,22 +11229,11 @@ exit 0
+ }
+ }
+
-+ if (success) {
-+ blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key));
-+ success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE);
-+
-+ blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf));
-+ success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE);
-+
-+ if (!success)
-+ pr_err("blake2s256_hmac self-test: FAIL\n");
-+ }
-+
+ return success;
+}
---- /dev/null
+--- b/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
-@@ -0,0 +1,126 @@
+@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -11454,43 +11306,6 @@ exit 0
+}
+EXPORT_SYMBOL(blake2s_final);
+
-+void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
-+ const size_t keylen)
-+{
-+ struct blake2s_state state;
-+ u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
-+ u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
-+ int i;
-+
-+ if (keylen > BLAKE2S_BLOCK_SIZE) {
-+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
-+ blake2s_update(&state, key, keylen);
-+ blake2s_final(&state, x_key);
-+ } else
-+ memcpy(x_key, key, keylen);
-+
-+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
-+ x_key[i] ^= 0x36;
-+
-+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
-+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
-+ blake2s_update(&state, in, inlen);
-+ blake2s_final(&state, i_hash);
-+
-+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
-+ x_key[i] ^= 0x5c ^ 0x36;
-+
-+ blake2s_init(&state, BLAKE2S_HASH_SIZE);
-+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
-+ blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
-+ blake2s_final(&state, i_hash);
-+
-+ memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
-+ memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
-+ memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
-+}
-+EXPORT_SYMBOL(blake2s256_hmac);
-+
+static int __init mod_init(void)
+{
+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
@@ -15654,7 +15469,7 @@ exit 0
+MODULE_LICENSE("GPL");
--- b/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
-@@ -0,0 +1,1512 @@
+@@ -0,0 +1,1724 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -15668,6 +15483,7 @@ exit 0
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
++#include <linux/scatterlist.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
@@ -15720,10 +15536,9 @@ exit 0
+
+ /* Return the carry bit in a register */
+ " adcx %%r11, %1;"
-+ : "+&r" (f2), "=&r" (carry_r)
-+ : "r" (out), "r" (f1)
-+ : "%r8", "%r9", "%r10", "%r11", "memory", "cc"
-+ );
++ : "+&r"(f2), "=&r"(carry_r)
++ : "r"(out), "r"(f1)
++ : "%r8", "%r9", "%r10", "%r11", "memory", "cc");
+
+ return carry_r;
+}
@@ -15764,17 +15579,16 @@ exit 0
+ " cmovc %0, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
-+ : "+&r" (f2)
-+ : "r" (out), "r" (f1)
-+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
-+ );
++ : "+&r"(f2)
++ : "r"(out), "r"(f1)
++ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
+}
+
-+/* Computes the field substraction of two field elements */
++/* Computes the field subtraction of two field elements */
+static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
+{
+ asm volatile(
-+ /* Compute the raw substraction of f1-f2 */
++ /* Compute the raw subtraction of f1-f2 */
+ " movq 0(%1), %%r8;"
+ " subq 0(%2), %%r8;"
+ " movq 8(%1), %%r9;"
@@ -15791,7 +15605,7 @@ exit 0
+ " mov $38, %%rcx;"
+ " cmovc %%rcx, %%rax;"
+
-+ /* Step 2: Substract carry*38 from the original difference */
++ /* Step 2: Subtract carry*38 from the original difference */
+ " sub %%rax, %%r8;"
+ " sbb $0, %%r9;"
+ " sbb $0, %%r10;"
@@ -15807,10 +15621,9 @@ exit 0
+ " movq %%r9, 8(%0);"
+ " movq %%r10, 16(%0);"
+ " movq %%r11, 24(%0);"
-+ :
-+ : "r" (out), "r" (f1), "r" (f2)
-+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
-+ );
++ :
++ : "r"(out), "r"(f1), "r"(f2)
++ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
+}
+
+/* Computes a field multiplication: out <- f1 * f2
@@ -15818,239 +15631,400 @@ exit 0
+static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
+{
+ asm volatile(
++
+ /* Compute the raw multiplication: tmp <- src1 * src2 */
+
+ /* Compute src1[0] * src2 */
-+ " movq 0(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;"
++ " movq 0(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " movq %%r8, 0(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " movq %%r10, 8(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++
+ /* Compute src1[1] * src2 */
-+ " movq 8(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
++ " movq 8(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 8(%2), %%r8;"
++ " movq %%r8, 8(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 16(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " mov $0, %%r8;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++
+ /* Compute src1[2] * src2 */
-+ " movq 16(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
++ " movq 16(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 16(%2), %%r8;"
++ " movq %%r8, 16(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 24(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " mov $0, %%r8;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++
+ /* Compute src1[3] * src2 */
-+ " movq 24(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
++ " movq 24(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 24(%2), %%r8;"
++ " movq %%r8, 24(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 32(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " movq %%rbx, 40(%2);"
++ " mov $0, %%r8;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " movq %%r14, 48(%2);"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++ " movq %%rax, 56(%2);"
++
+ /* Line up pointers */
-+ " mov %0, %1;"
+ " mov %2, %0;"
++ " mov %3, %2;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
-+ " mulxq 32(%1), %%r8, %%r13;"
-+ " xor %k3, %k3;"
-+ " adoxq 0(%1), %%r8;"
-+ " mulxq 40(%1), %%r9, %%rbx;"
++ " mulxq 32(%0), %%r8, %%r13;"
++ " xor %k1, %k1;"
++ " adoxq 0(%0), %%r8;"
++ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
-+ " adoxq 8(%1), %%r9;"
-+ " mulxq 48(%1), %%r10, %%r13;"
++ " adoxq 8(%0), %%r9;"
++ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
-+ " adoxq 16(%1), %%r10;"
-+ " mulxq 56(%1), %%r11, %%rax;"
++ " adoxq 16(%0), %%r10;"
++ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
-+ " adoxq 24(%1), %%r11;"
-+ " adcx %3, %%rax;"
-+ " adox %3, %%rax;"
++ " adoxq 24(%0), %%r11;"
++ " adcx %1, %%rax;"
++ " adox %1, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
-+ " adcx %3, %%r9;"
-+ " movq %%r9, 8(%0);"
-+ " adcx %3, %%r10;"
-+ " movq %%r10, 16(%0);"
-+ " adcx %3, %%r11;"
-+ " movq %%r11, 24(%0);"
++ " adcx %1, %%r9;"
++ " movq %%r9, 8(%2);"
++ " adcx %1, %%r10;"
++ " movq %%r10, 16(%2);"
++ " adcx %1, %%r11;"
++ " movq %%r11, 24(%2);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
-+ " movq %%r8, 0(%0);"
-+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
-+ :
-+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
-+ );
++ " movq %%r8, 0(%2);"
++ : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
++ : "r"(out)
++ : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
++ "%r14", "memory", "cc");
+}
+
+/* Computes two field multiplications:
-+ * out[0] <- f1[0] * f2[0]
-+ * out[1] <- f1[1] * f2[1]
-+ * Uses the 16-element buffer tmp for intermediate results. */
++ * out[0] <- f1[0] * f2[0]
++ * out[1] <- f1[1] * f2[1]
++ * Uses the 16-element buffer tmp for intermediate results: */
+static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
+{
+ asm volatile(
++
+ /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
+
+ /* Compute src1[0] * src2 */
-+ " movq 0(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;"
++ " movq 0(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " movq %%r8, 0(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " movq %%r10, 8(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++
+ /* Compute src1[1] * src2 */
-+ " movq 8(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
++ " movq 8(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 8(%2), %%r8;"
++ " movq %%r8, 8(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 16(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " mov $0, %%r8;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++
+ /* Compute src1[2] * src2 */
-+ " movq 16(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
++ " movq 16(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 16(%2), %%r8;"
++ " movq %%r8, 16(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 24(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " mov $0, %%r8;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++
+ /* Compute src1[3] * src2 */
-+ " movq 24(%1), %%rdx;"
-+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
-+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
-+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
-+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
++ " movq 24(%0), %%rdx;"
++ " mulxq 0(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 24(%2), %%r8;"
++ " movq %%r8, 24(%2);"
++ " mulxq 8(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 32(%2);"
++ " mulxq 16(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " movq %%rbx, 40(%2);"
++ " mov $0, %%r8;"
++ " mulxq 24(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " movq %%r14, 48(%2);"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++ " movq %%rax, 56(%2);"
+
+ /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
+
+ /* Compute src1[0] * src2 */
-+ " movq 32(%1), %%rdx;"
-+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 64(%0);"
-+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
-+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
-+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;"
++ " movq 32(%0), %%rdx;"
++ " mulxq 32(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " movq %%r8, 64(%2);"
++ " mulxq 40(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " movq %%r10, 72(%2);"
++ " mulxq 48(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " mulxq 56(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++
+ /* Compute src1[1] * src2 */
-+ " movq 40(%1), %%rdx;"
-+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
-+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
-+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
-+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
++ " movq 40(%0), %%rdx;"
++ " mulxq 32(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 72(%2), %%r8;"
++ " movq %%r8, 72(%2);"
++ " mulxq 40(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 80(%2);"
++ " mulxq 48(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " mov $0, %%r8;"
++ " mulxq 56(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++
+ /* Compute src1[2] * src2 */
-+ " movq 48(%1), %%rdx;"
-+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
-+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
-+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
-+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
++ " movq 48(%0), %%rdx;"
++ " mulxq 32(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 80(%2), %%r8;"
++ " movq %%r8, 80(%2);"
++ " mulxq 40(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 88(%2);"
++ " mulxq 48(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " mov $0, %%r8;"
++ " mulxq 56(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++
+ /* Compute src1[3] * src2 */
-+ " movq 56(%1), %%rdx;"
-+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
-+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
-+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
-+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
-+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
++ " movq 56(%0), %%rdx;"
++ " mulxq 32(%1), %%r8, %%r9;"
++ " xor %%r10d, %%r10d;"
++ " adcxq 88(%2), %%r8;"
++ " movq %%r8, 88(%2);"
++ " mulxq 40(%1), %%r10, %%r11;"
++ " adox %%r9, %%r10;"
++ " adcx %%rbx, %%r10;"
++ " movq %%r10, 96(%2);"
++ " mulxq 48(%1), %%rbx, %%r13;"
++ " adox %%r11, %%rbx;"
++ " adcx %%r14, %%rbx;"
++ " movq %%rbx, 104(%2);"
++ " mov $0, %%r8;"
++ " mulxq 56(%1), %%r14, %%rdx;"
++ " adox %%r13, %%r14;"
++ " adcx %%rax, %%r14;"
++ " movq %%r14, 112(%2);"
++ " mov $0, %%rax;"
++ " adox %%rdx, %%rax;"
++ " adcx %%r8, %%rax;"
++ " movq %%rax, 120(%2);"
++
+ /* Line up pointers */
-+ " mov %0, %1;"
+ " mov %2, %0;"
++ " mov %3, %2;"
+
+ /* Wrap the results back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
-+ " mulxq 32(%1), %%r8, %%r13;"
-+ " xor %k3, %k3;"
-+ " adoxq 0(%1), %%r8;"
-+ " mulxq 40(%1), %%r9, %%rbx;"
++ " mulxq 32(%0), %%r8, %%r13;"
++ " xor %k1, %k1;"
++ " adoxq 0(%0), %%r8;"
++ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
-+ " adoxq 8(%1), %%r9;"
-+ " mulxq 48(%1), %%r10, %%r13;"
++ " adoxq 8(%0), %%r9;"
++ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
-+ " adoxq 16(%1), %%r10;"
-+ " mulxq 56(%1), %%r11, %%rax;"
++ " adoxq 16(%0), %%r10;"
++ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
-+ " adoxq 24(%1), %%r11;"
-+ " adcx %3, %%rax;"
-+ " adox %3, %%rax;"
++ " adoxq 24(%0), %%r11;"
++ " adcx %1, %%rax;"
++ " adox %1, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
-+ " adcx %3, %%r9;"
-+ " movq %%r9, 8(%0);"
-+ " adcx %3, %%r10;"
-+ " movq %%r10, 16(%0);"
-+ " adcx %3, %%r11;"
-+ " movq %%r11, 24(%0);"
++ " adcx %1, %%r9;"
++ " movq %%r9, 8(%2);"
++ " adcx %1, %%r10;"
++ " movq %%r10, 16(%2);"
++ " adcx %1, %%r11;"
++ " movq %%r11, 24(%2);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
-+ " movq %%r8, 0(%0);"
++ " movq %%r8, 0(%2);"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
-+ " mulxq 96(%1), %%r8, %%r13;"
-+ " xor %k3, %k3;"
-+ " adoxq 64(%1), %%r8;"
-+ " mulxq 104(%1), %%r9, %%rbx;"
++ " mulxq 96(%0), %%r8, %%r13;"
++ " xor %k1, %k1;"
++ " adoxq 64(%0), %%r8;"
++ " mulxq 104(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
-+ " adoxq 72(%1), %%r9;"
-+ " mulxq 112(%1), %%r10, %%r13;"
++ " adoxq 72(%0), %%r9;"
++ " mulxq 112(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
-+ " adoxq 80(%1), %%r10;"
-+ " mulxq 120(%1), %%r11, %%rax;"
++ " adoxq 80(%0), %%r10;"
++ " mulxq 120(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
-+ " adoxq 88(%1), %%r11;"
-+ " adcx %3, %%rax;"
-+ " adox %3, %%rax;"
++ " adoxq 88(%0), %%r11;"
++ " adcx %1, %%rax;"
++ " adox %1, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
-+ " adcx %3, %%r9;"
-+ " movq %%r9, 40(%0);"
-+ " adcx %3, %%r10;"
-+ " movq %%r10, 48(%0);"
-+ " adcx %3, %%r11;"
-+ " movq %%r11, 56(%0);"
++ " adcx %1, %%r9;"
++ " movq %%r9, 40(%2);"
++ " adcx %1, %%r10;"
++ " movq %%r10, 48(%2);"
++ " adcx %1, %%r11;"
++ " movq %%r11, 56(%2);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
-+ " movq %%r8, 32(%0);"
-+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
-+ :
-+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
-+ );
++ " movq %%r8, 32(%2);"
++ : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
++ : "r"(out)
++ : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
++ "%r14", "memory", "cc");
+}
+
-+/* Computes the field multiplication of four-element f1 with value in f2 */
++/* Computes the field multiplication of four-element f1 with value in f2
++ * Requires f2 to be smaller than 2^17 */
+static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
+{
+ register u64 f2_r asm("rdx") = f2;
+
+ asm volatile(
+ /* Compute the raw multiplication of f1*f2 */
-+ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
-+ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
++ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
++ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
+ " add %%rcx, %%r9;"
+ " mov $0, %%rcx;"
-+ " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
++ " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
+ " adcx %%rbx, %%r10;"
-+ " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
++ " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
+ " adcx %%r13, %%r11;"
+ " adcx %%rcx, %%rax;"
+
@@ -16074,17 +16048,17 @@ exit 0
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
-+ : "+&r" (f2_r)
-+ : "r" (out), "r" (f1)
-+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
-+ );
++ : "+&r"(f2_r)
++ : "r"(out), "r"(f1)
++ : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13",
++ "memory", "cc");
+}
+
+/* Computes p1 <- bit ? p2 : p1 in constant time */
+static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
+{
+ asm volatile(
-+ /* Invert the polarity of bit to match cmov expectations */
++ /* Transfer bit into CF flag */
+ " add $18446744073709551615, %0;"
+
+ /* cswap p1[0], p2[0] */
@@ -16158,10 +16132,9 @@ exit 0
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 56(%1);"
+ " movq %%r9, 56(%2);"
-+ : "+&r" (bit)
-+ : "r" (p1), "r" (p2)
-+ : "%r8", "%r9", "%r10", "memory", "cc"
-+ );
++ : "+&r"(bit)
++ : "r"(p1), "r"(p2)
++ : "%r8", "%r9", "%r10", "memory", "cc");
+}
+
+/* Computes the square of a field element: out <- f * f
@@ -16172,15 +16145,22 @@ exit 0
+ /* Compute the raw multiplication: tmp <- f * f */
+
+ /* Step 1: Compute all partial products */
-+ " movq 0(%1), %%rdx;" /* f[0] */
-+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
-+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
-+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
-+ " movq 24(%1), %%rdx;" /* f[3] */
-+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
-+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
-+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
-+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
++ " movq 0(%0), %%rdx;" /* f[0] */
++ " mulxq 8(%0), %%r8, %%r14;"
++ " xor %%r15d, %%r15d;" /* f[1]*f[0] */
++ " mulxq 16(%0), %%r9, %%r10;"
++ " adcx %%r14, %%r9;" /* f[2]*f[0] */
++ " mulxq 24(%0), %%rax, %%rcx;"
++ " adcx %%rax, %%r10;" /* f[3]*f[0] */
++ " movq 24(%0), %%rdx;" /* f[3] */
++ " mulxq 8(%0), %%r11, %%rbx;"
++ " adcx %%rcx, %%r11;" /* f[1]*f[3] */
++ " mulxq 16(%0), %%rax, %%r13;"
++ " adcx %%rax, %%rbx;" /* f[2]*f[3] */
++ " movq 8(%0), %%rdx;"
++ " adcx %%r15, %%r13;" /* f1 */
++ " mulxq 16(%0), %%rax, %%rcx;"
++ " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15d, %%r15d;"
@@ -16198,39 +16178,50 @@ exit 0
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
-+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
-+ " movq %%rax, 0(%0);"
-+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
-+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
-+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
-+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
-+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
-+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
-+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
-+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
-+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
-+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
++ " movq 0(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
++ " movq %%rax, 0(%1);"
++ " add %%rcx, %%r8;"
++ " movq %%r8, 8(%1);"
++ " movq 8(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
++ " adcx %%rax, %%r9;"
++ " movq %%r9, 16(%1);"
++ " adcx %%rcx, %%r10;"
++ " movq %%r10, 24(%1);"
++ " movq 16(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
++ " adcx %%rax, %%r11;"
++ " movq %%r11, 32(%1);"
++ " adcx %%rcx, %%rbx;"
++ " movq %%rbx, 40(%1);"
++ " movq 24(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
++ " adcx %%rax, %%r13;"
++ " movq %%r13, 48(%1);"
++ " adcx %%rcx, %%r14;"
++ " movq %%r14, 56(%1);"
+
+ /* Line up pointers */
-+ " mov %0, %1;"
-+ " mov %2, %0;"
++ " mov %1, %0;"
++ " mov %2, %1;"
+
+ /* Wrap the result back into the field */
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
-+ " mulxq 32(%1), %%r8, %%r13;"
++ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %%ecx, %%ecx;"
-+ " adoxq 0(%1), %%r8;"
-+ " mulxq 40(%1), %%r9, %%rbx;"
++ " adoxq 0(%0), %%r8;"
++ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
-+ " adoxq 8(%1), %%r9;"
-+ " mulxq 48(%1), %%r10, %%r13;"
++ " adoxq 8(%0), %%r9;"
++ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
-+ " adoxq 16(%1), %%r10;"
-+ " mulxq 56(%1), %%r11, %%rax;"
++ " adoxq 16(%0), %%r10;"
++ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
-+ " adoxq 24(%1), %%r11;"
++ " adoxq 24(%0), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
@@ -16238,40 +16229,47 @@ exit 0
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
-+ " movq %%r9, 8(%0);"
++ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
-+ " movq %%r10, 16(%0);"
++ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
-+ " movq %%r11, 24(%0);"
++ " movq %%r11, 24(%1);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
-+ " movq %%r8, 0(%0);"
-+ : "+&r" (tmp), "+&r" (f), "+&r" (out)
-+ :
-+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
-+ );
++ " movq %%r8, 0(%1);"
++ : "+&r"(f), "+&r"(tmp)
++ : "r"(out)
++ : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
++ "%r13", "%r14", "%r15", "memory", "cc");
+}
+
+/* Computes two field squarings:
-+ * out[0] <- f[0] * f[0]
-+ * out[1] <- f[1] * f[1]
++ * out[0] <- f[0] * f[0]
++ * out[1] <- f[1] * f[1]
+ * Uses the 16-element buffer tmp for intermediate results */
+static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
+{
+ asm volatile(
+ /* Step 1: Compute all partial products */
-+ " movq 0(%1), %%rdx;" /* f[0] */
-+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
-+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
-+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
-+ " movq 24(%1), %%rdx;" /* f[3] */
-+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
-+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
-+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
-+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
++ " movq 0(%0), %%rdx;" /* f[0] */
++ " mulxq 8(%0), %%r8, %%r14;"
++ " xor %%r15d, %%r15d;" /* f[1]*f[0] */
++ " mulxq 16(%0), %%r9, %%r10;"
++ " adcx %%r14, %%r9;" /* f[2]*f[0] */
++ " mulxq 24(%0), %%rax, %%rcx;"
++ " adcx %%rax, %%r10;" /* f[3]*f[0] */
++ " movq 24(%0), %%rdx;" /* f[3] */
++ " mulxq 8(%0), %%r11, %%rbx;"
++ " adcx %%rcx, %%r11;" /* f[1]*f[3] */
++ " mulxq 16(%0), %%rax, %%r13;"
++ " adcx %%rax, %%rbx;" /* f[2]*f[3] */
++ " movq 8(%0), %%rdx;"
++ " adcx %%r15, %%r13;" /* f1 */
++ " mulxq 16(%0), %%rax, %%rcx;"
++ " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15d, %%r15d;"
@@ -16289,29 +16287,47 @@ exit 0
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
-+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
-+ " movq %%rax, 0(%0);"
-+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
-+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
-+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
-+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
-+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
-+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
-+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
-+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
-+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
-+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
++ " movq 0(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
++ " movq %%rax, 0(%1);"
++ " add %%rcx, %%r8;"
++ " movq %%r8, 8(%1);"
++ " movq 8(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
++ " adcx %%rax, %%r9;"
++ " movq %%r9, 16(%1);"
++ " adcx %%rcx, %%r10;"
++ " movq %%r10, 24(%1);"
++ " movq 16(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
++ " adcx %%rax, %%r11;"
++ " movq %%r11, 32(%1);"
++ " adcx %%rcx, %%rbx;"
++ " movq %%rbx, 40(%1);"
++ " movq 24(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
++ " adcx %%rax, %%r13;"
++ " movq %%r13, 48(%1);"
++ " adcx %%rcx, %%r14;"
++ " movq %%r14, 56(%1);"
+
+ /* Step 1: Compute all partial products */
-+ " movq 32(%1), %%rdx;" /* f[0] */
-+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
-+ " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
-+ " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
-+ " movq 56(%1), %%rdx;" /* f[3] */
-+ " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
-+ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
-+ " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
-+ " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
++ " movq 32(%0), %%rdx;" /* f[0] */
++ " mulxq 40(%0), %%r8, %%r14;"
++ " xor %%r15d, %%r15d;" /* f[1]*f[0] */
++ " mulxq 48(%0), %%r9, %%r10;"
++ " adcx %%r14, %%r9;" /* f[2]*f[0] */
++ " mulxq 56(%0), %%rax, %%rcx;"
++ " adcx %%rax, %%r10;" /* f[3]*f[0] */
++ " movq 56(%0), %%rdx;" /* f[3] */
++ " mulxq 40(%0), %%r11, %%rbx;"
++ " adcx %%rcx, %%r11;" /* f[1]*f[3] */
++ " mulxq 48(%0), %%rax, %%r13;"
++ " adcx %%rax, %%rbx;" /* f[2]*f[3] */
++ " movq 40(%0), %%rdx;"
++ " adcx %%r15, %%r13;" /* f1 */
++ " mulxq 48(%0), %%rax, %%rcx;"
++ " mov $0, %%r14;" /* f[2]*f[1] */
+
+ /* Step 2: Compute two parallel carry chains */
+ " xor %%r15d, %%r15d;"
@@ -16329,37 +16345,48 @@ exit 0
+ " adcx %%r14, %%r14;"
+
+ /* Step 3: Compute intermediate squares */
-+ " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
-+ " movq %%rax, 64(%0);"
-+ " add %%rcx, %%r8;" " movq %%r8, 72(%0);"
-+ " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
-+ " adcx %%rax, %%r9;" " movq %%r9, 80(%0);"
-+ " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
-+ " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
-+ " adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
-+ " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);"
-+ " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
-+ " adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
-+ " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
++ " movq 32(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
++ " movq %%rax, 64(%1);"
++ " add %%rcx, %%r8;"
++ " movq %%r8, 72(%1);"
++ " movq 40(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
++ " adcx %%rax, %%r9;"
++ " movq %%r9, 80(%1);"
++ " adcx %%rcx, %%r10;"
++ " movq %%r10, 88(%1);"
++ " movq 48(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
++ " adcx %%rax, %%r11;"
++ " movq %%r11, 96(%1);"
++ " adcx %%rcx, %%rbx;"
++ " movq %%rbx, 104(%1);"
++ " movq 56(%0), %%rdx;"
++ " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
++ " adcx %%rax, %%r13;"
++ " movq %%r13, 112(%1);"
++ " adcx %%rcx, %%r14;"
++ " movq %%r14, 120(%1);"
+
+ /* Line up pointers */
-+ " mov %0, %1;"
-+ " mov %2, %0;"
++ " mov %1, %0;"
++ " mov %2, %1;"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
-+ " mulxq 32(%1), %%r8, %%r13;"
++ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %%ecx, %%ecx;"
-+ " adoxq 0(%1), %%r8;"
-+ " mulxq 40(%1), %%r9, %%rbx;"
++ " adoxq 0(%0), %%r8;"
++ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
-+ " adoxq 8(%1), %%r9;"
-+ " mulxq 48(%1), %%r10, %%r13;"
++ " adoxq 8(%0), %%r9;"
++ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
-+ " adoxq 16(%1), %%r10;"
-+ " mulxq 56(%1), %%r11, %%rax;"
++ " adoxq 16(%0), %%r10;"
++ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
-+ " adoxq 24(%1), %%r11;"
++ " adoxq 24(%0), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
@@ -16367,32 +16394,32 @@ exit 0
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
-+ " movq %%r9, 8(%0);"
++ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
-+ " movq %%r10, 16(%0);"
++ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
-+ " movq %%r11, 24(%0);"
++ " movq %%r11, 24(%1);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
-+ " movq %%r8, 0(%0);"
++ " movq %%r8, 0(%1);"
+
+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
+ " mov $38, %%rdx;"
-+ " mulxq 96(%1), %%r8, %%r13;"
++ " mulxq 96(%0), %%r8, %%r13;"
+ " xor %%ecx, %%ecx;"
-+ " adoxq 64(%1), %%r8;"
-+ " mulxq 104(%1), %%r9, %%rbx;"
++ " adoxq 64(%0), %%r8;"
++ " mulxq 104(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
-+ " adoxq 72(%1), %%r9;"
-+ " mulxq 112(%1), %%r10, %%r13;"
++ " adoxq 72(%0), %%r9;"
++ " mulxq 112(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
-+ " adoxq 80(%1), %%r10;"
-+ " mulxq 120(%1), %%r11, %%rax;"
++ " adoxq 80(%0), %%r10;"
++ " mulxq 120(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
-+ " adoxq 88(%1), %%r11;"
++ " adoxq 88(%0), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
@@ -16400,21 +16427,21 @@ exit 0
+ /* Step 2: Fold the carry back into dst */
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
-+ " movq %%r9, 40(%0);"
++ " movq %%r9, 40(%1);"
+ " adcx %%rcx, %%r10;"
-+ " movq %%r10, 48(%0);"
++ " movq %%r10, 48(%1);"
+ " adcx %%rcx, %%r11;"
-+ " movq %%r11, 56(%0);"
++ " movq %%r11, 56(%1);"
+
+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
-+ " movq %%r8, 32(%0);"
-+ : "+&r" (tmp), "+&r" (f), "+&r" (out)
-+ :
-+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
-+ );
++ " movq %%r8, 32(%1);"
++ : "+&r"(f), "+&r"(tmp)
++ : "r"(out)
++ : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
++ "%r13", "%r14", "%r15", "memory", "cc");
+}
+
+static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
@@ -17156,7 +17183,7 @@ exit 0
+static void __exit curve25519_mod_exit(void)
+{
+ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
-+ (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
++ static_branch_likely(&curve25519_use_bmi2_adx))
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
@@ -36707,7 +36734,7 @@ exit 0
+ return exact;
+}
+
-+static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
++static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node)
+{
+ node->parent_bit_packed = (unsigned long)parent | bit;
+ rcu_assign_pointer(*parent, node);
@@ -37293,7 +37320,7 @@ exit 0
+#endif /* _WG_COOKIE_H */
--- b/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
-@@ -0,0 +1,457 @@
+@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -37315,6 +37342,7 @@ exit 0
+#include <linux/if_arp.h>
+#include <linux/icmp.h>
+#include <linux/suspend.h>
++#include <net/dst_metadata.h>
+#include <net/icmp.h>
+#include <net/rtnetlink.h>
+#include <net/ip_tunnels.h>
@@ -37394,6 +37422,7 @@ exit 0
+{
+ struct wg_device *wg = netdev_priv(dev);
+ struct wg_peer *peer;
++ struct sk_buff *skb;
+
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
@@ -37404,7 +37433,9 @@ exit 0
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+ }
+ mutex_unlock(&wg->device_update_lock);
-+ skb_queue_purge(&wg->incoming_handshakes);
++ while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL)
++ kfree_skb(skb);
++ atomic_set(&wg->handshake_queue_len, 0);
+ wg_socket_reinit(wg, NULL, NULL);
+ return 0;
+}
@@ -37445,7 +37476,7 @@ exit 0
+ goto err_peer;
+ }
+
-+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
++ mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ __skb_queue_head_init(&packets);
+ if (!skb_is_gso(skb)) {
@@ -37531,14 +37562,13 @@ exit 0
+ destroy_workqueue(wg->handshake_receive_wq);
+ destroy_workqueue(wg->handshake_send_wq);
+ destroy_workqueue(wg->packet_crypt_wq);
-+ wg_packet_queue_free(&wg->decrypt_queue);
-+ wg_packet_queue_free(&wg->encrypt_queue);
++ wg_packet_queue_free(&wg->handshake_queue, true);
++ wg_packet_queue_free(&wg->decrypt_queue, false);
++ wg_packet_queue_free(&wg->encrypt_queue, false);
+ rcu_barrier(); /* Wait for all the peers to be actually freed. */
+ wg_ratelimiter_uninit();
+ memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
-+ skb_queue_purge(&wg->incoming_handshakes);
+ free_percpu(dev->tstats);
-+ free_percpu(wg->incoming_handshakes_worker);
+ kvfree(wg->index_hashtable);
+ kvfree(wg->peer_hashtable);
+ mutex_unlock(&wg->device_update_lock);
@@ -37594,7 +37624,6 @@ exit 0
+ init_rwsem(&wg->static_identity.lock);
+ mutex_init(&wg->socket_update_lock);
+ mutex_init(&wg->device_update_lock);
-+ skb_queue_head_init(&wg->incoming_handshakes);
+ wg_allowedips_init(&wg->peer_allowedips);
+ wg_cookie_checker_init(&wg->cookie_checker, wg);
+ INIT_LIST_HEAD(&wg->peer_list);
@@ -37612,16 +37641,10 @@ exit 0
+ if (!dev->tstats)
+ goto err_free_index_hashtable;
+
-+ wg->incoming_handshakes_worker =
-+ wg_packet_percpu_multicore_worker_alloc(
-+ wg_packet_handshake_receive_worker, wg);
-+ if (!wg->incoming_handshakes_worker)
-+ goto err_free_tstats;
-+
+ wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
+ WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
+ if (!wg->handshake_receive_wq)
-+ goto err_free_incoming_handshakes;
++ goto err_free_tstats;
+
+ wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
+ WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
@@ -37643,10 +37666,15 @@ exit 0
+ if (ret < 0)
+ goto err_free_encrypt_queue;
+
-+ ret = wg_ratelimiter_init();
++ ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker,
++ MAX_QUEUED_INCOMING_HANDSHAKES);
+ if (ret < 0)
+ goto err_free_decrypt_queue;
+
++ ret = wg_ratelimiter_init();
++ if (ret < 0)
++ goto err_free_handshake_queue;
++
+ ret = register_netdevice(dev);
+ if (ret < 0)
+ goto err_uninit_ratelimiter;
@@ -37663,18 +37691,18 @@ exit 0
+
+err_uninit_ratelimiter:
+ wg_ratelimiter_uninit();
++err_free_handshake_queue:
++ wg_packet_queue_free(&wg->handshake_queue, false);
+err_free_decrypt_queue:
-+ wg_packet_queue_free(&wg->decrypt_queue);
++ wg_packet_queue_free(&wg->decrypt_queue, false);
+err_free_encrypt_queue:
-+ wg_packet_queue_free(&wg->encrypt_queue);
++ wg_packet_queue_free(&wg->encrypt_queue, false);
+err_destroy_packet_crypt:
+ destroy_workqueue(wg->packet_crypt_wq);
+err_destroy_handshake_send:
+ destroy_workqueue(wg->handshake_send_wq);
+err_destroy_handshake_receive:
+ destroy_workqueue(wg->handshake_receive_wq);
-+err_free_incoming_handshakes:
-+ free_percpu(wg->incoming_handshakes_worker);
+err_free_tstats:
+ free_percpu(dev->tstats);
+err_free_index_hashtable:
@@ -37694,6 +37722,7 @@ exit 0
+static void wg_netns_pre_exit(struct net *net)
+{
+ struct wg_device *wg;
++ struct wg_peer *peer;
+
+ rtnl_lock();
+ list_for_each_entry(wg, &device_list, device_list) {
@@ -37703,6 +37732,8 @@ exit 0
+ mutex_lock(&wg->device_update_lock);
+ rcu_assign_pointer(wg->creating_net, NULL);
+ wg_socket_reinit(wg, NULL, NULL);
++ list_for_each_entry(peer, &wg->peer_list, peer_list)
++ wg_socket_clear_peer_endpoint_src(peer);
+ mutex_unlock(&wg->device_update_lock);
+ }
+ }
@@ -37753,7 +37784,7 @@ exit 0
+}
--- b/drivers/net/wireguard/device.h
+++ b/drivers/net/wireguard/device.h
-@@ -0,0 +1,65 @@
+@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -37795,21 +37826,18 @@ exit 0
+
+struct wg_device {
+ struct net_device *dev;
-+ struct crypt_queue encrypt_queue, decrypt_queue;
++ struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue;
+ struct sock __rcu *sock4, *sock6;
+ struct net __rcu *creating_net;
+ struct noise_static_identity static_identity;
-+ struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
-+ struct workqueue_struct *packet_crypt_wq;
-+ struct sk_buff_head incoming_handshakes;
-+ int incoming_handshake_cpu;
-+ struct multicore_worker __percpu *incoming_handshakes_worker;
++ struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq;
+ struct cookie_checker cookie_checker;
+ struct pubkey_hashtable *peer_hashtable;
+ struct index_hashtable *index_hashtable;
+ struct allowedips peer_allowedips;
+ struct mutex device_update_lock, socket_update_lock;
+ struct list_head device_list, peer_list;
++ atomic_t handshake_queue_len;
+ unsigned int num_peers, device_update_gen;
+ u32 fwmark;
+ u16 incoming_port;
@@ -37841,7 +37869,7 @@ exit 0
+#include <linux/genetlink.h>
+#include <net/rtnetlink.h>
+
-+static int __init mod_init(void)
++static int __init wg_mod_init(void)
+{
+ int ret;
+
@@ -37884,7 +37912,7 @@ exit 0
+ return ret;
+}
+
-+static void __exit mod_exit(void)
++static void __exit wg_mod_exit(void)
+{
+ wg_genetlink_uninit();
+ wg_device_uninit();
@@ -37892,8 +37920,8 @@ exit 0
+ wg_allowedips_slab_uninit();
+}
+
-+module_init(mod_init);
-+module_exit(mod_exit);
++module_init(wg_mod_init);
++module_exit(wg_mod_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("WireGuard secure network tunnel");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
@@ -38697,7 +38725,7 @@ exit 0
+#endif /* _WG_NETLINK_H */
--- b/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
-@@ -0,0 +1,828 @@
+@@ -0,0 +1,861 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -39002,6 +39030,41 @@ exit 0
+ static_identity->static_public, private_key);
+}
+
++static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen)
++{
++ struct blake2s_state state;
++ u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
++ u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
++ int i;
++
++ if (keylen > BLAKE2S_BLOCK_SIZE) {
++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
++ blake2s_update(&state, key, keylen);
++ blake2s_final(&state, x_key);
++ } else
++ memcpy(x_key, key, keylen);
++
++ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
++ x_key[i] ^= 0x36;
++
++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
++ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
++ blake2s_update(&state, in, inlen);
++ blake2s_final(&state, i_hash);
++
++ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
++ x_key[i] ^= 0x5c ^ 0x36;
++
++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
++ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
++ blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
++ blake2s_final(&state, i_hash);
++
++ memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
++ memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
++ memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
++}
++
+/* This is Hugo Krawczyk's HKDF:
+ * - https://eprint.iacr.org/2010/264.pdf
+ * - https://tools.ietf.org/html/rfc5869
@@ -39022,14 +39085,14 @@ exit 0
+ ((third_len || third_dst) && (!second_len || !second_dst))));
+
+ /* Extract entropy from data into secret */
-+ blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
++ hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
+
+ if (!first_dst || !first_len)
+ goto out;
+
+ /* Expand first key: key = secret, data = 0x1 */
+ output[0] = 1;
-+ blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
++ hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
+ memcpy(first_dst, output, first_len);
+
+ if (!second_dst || !second_len)
@@ -39037,8 +39100,7 @@ exit 0
+
+ /* Expand second key: key = secret, data = first-key || 0x2 */
+ output[BLAKE2S_HASH_SIZE] = 2;
-+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
-+ BLAKE2S_HASH_SIZE);
++ hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
+ memcpy(second_dst, output, second_len);
+
+ if (!third_dst || !third_len)
@@ -39046,8 +39108,7 @@ exit 0
+
+ /* Expand third key: key = secret, data = second-key || 0x3 */
+ output[BLAKE2S_HASH_SIZE] = 3;
-+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
-+ BLAKE2S_HASH_SIZE);
++ hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
+ memcpy(third_dst, output, third_len);
+
+out:
@@ -40294,13 +40355,14 @@ exit 0
+#endif /* _WG_PEERLOOKUP_H */
--- b/drivers/net/wireguard/queueing.c
+++ b/drivers/net/wireguard/queueing.c
-@@ -0,0 +1,107 @@
+@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
++#include <linux/skb_array.h>
+
+struct multicore_worker __percpu *
+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
@@ -40335,11 +40397,11 @@ exit 0
+ return 0;
+}
+
-+void wg_packet_queue_free(struct crypt_queue *queue)
++void wg_packet_queue_free(struct crypt_queue *queue, bool purge)
+{
+ free_percpu(queue->worker);
-+ WARN_ON(!__ptr_ring_empty(&queue->ring));
-+ ptr_ring_cleanup(&queue->ring, NULL);
++ WARN_ON(!purge && !__ptr_ring_empty(&queue->ring));
++ ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL);
+}
+
+#define NEXT(skb) ((skb)->prev)
@@ -40430,7 +40492,7 @@ exit 0
+/* queueing.c APIs: */
+int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ unsigned int len);
-+void wg_packet_queue_free(struct crypt_queue *queue);
++void wg_packet_queue_free(struct crypt_queue *queue, bool purge);
+struct multicore_worker __percpu *
+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
+
@@ -40619,7 +40681,7 @@ exit 0
+#endif
+
+#endif /* _WG_QUEUEING_H */
---- /dev/null
+--- b/drivers/net/wireguard/ratelimiter.c
+++ b/drivers/net/wireguard/ratelimiter.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
@@ -40800,12 +40862,12 @@ exit 0
+ (1U << 14) / sizeof(struct hlist_head)));
+ max_entries = table_size * 8;
+
-+ table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
++ table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);
+ if (unlikely(!table_v4))
+ goto err_kmemcache;
+
+#if IS_ENABLED(CONFIG_IPV6)
-+ table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
++ table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);
+ if (unlikely(!table_v6)) {
+ kvfree(table_v4);
+ goto err_kmemcache;
@@ -40869,7 +40931,7 @@ exit 0
+#endif /* _WG_RATELIMITER_H */
--- b/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
-@@ -0,0 +1,586 @@
+@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -40988,8 +41050,8 @@ exit 0
+ return;
+ }
+
-+ under_load = skb_queue_len(&wg->incoming_handshakes) >=
-+ MAX_QUEUED_INCOMING_HANDSHAKES / 8;
++ under_load = atomic_read(&wg->handshake_queue_len) >=
++ MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+ if (under_load) {
+ last_under_load = ktime_get_coarse_boottime_ns();
+ } else if (last_under_load) {
@@ -41084,13 +41146,14 @@ exit 0
+
+void wg_packet_handshake_receive_worker(struct work_struct *work)
+{
-+ struct wg_device *wg = container_of(work, struct multicore_worker,
-+ work)->ptr;
++ struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
++ struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue);
+ struct sk_buff *skb;
+
-+ while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
++ while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ wg_receive_handshake_packet(wg, skb);
+ dev_kfree_skb(skb);
++ atomic_dec(&wg->handshake_queue_len);
+ cond_resched();
+ }
+}
@@ -41425,22 +41488,28 @@ exit 0
+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
-+ int cpu;
-+
-+ if (skb_queue_len(&wg->incoming_handshakes) >
-+ MAX_QUEUED_INCOMING_HANDSHAKES ||
-+ unlikely(!rng_is_initialized())) {
++ int cpu, ret = -EBUSY;
++
++ if (unlikely(!rng_is_initialized()))
++ goto drop;
++ if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) {
++ if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) {
++ ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb);
++ spin_unlock_bh(&wg->handshake_queue.ring.producer_lock);
++ }
++ } else
++ ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb);
++ if (ret) {
++ drop:
+ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
+ }
-+ skb_queue_tail(&wg->incoming_handshakes, skb);
-+ /* Queues up a call to packet_process_queued_handshake_
-+ * packets(skb):
-+ */
-+ cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
++ atomic_inc(&wg->handshake_queue_len);
++ cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu);
++ /* Queues up a call to packet_process_queued_handshake_packets(skb): */
+ queue_work_on(cpu, wg->handshake_receive_wq,
-+ &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
++ &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work);
+ break;
+ }
+ case cpu_to_le32(MESSAGE_DATA):
@@ -42896,7 +42965,7 @@ exit 0
+}
--- b/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
-@@ -0,0 +1,436 @@
+@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -43059,6 +43128,7 @@ exit 0
+ rcu_read_unlock_bh();
+ return ret;
+#else
++ kfree_skb(skb);
+ return -EAFNOSUPPORT;
+#endif
+}
@@ -43140,7 +43210,7 @@ exit 0
+ endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ endpoint->src4.s_addr = ip_hdr(skb)->daddr;
+ endpoint->src_if4 = skb->skb_iif;
-+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
++ } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) {
+ endpoint->addr6.sin6_family = AF_INET6;
+ endpoint->addr6.sin6_port = udp_hdr(skb)->source;
+ endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
@@ -43183,7 +43253,7 @@ exit 0
+ peer->endpoint.addr4 = endpoint->addr4;
+ peer->endpoint.src4 = endpoint->src4;
+ peer->endpoint.src_if4 = endpoint->src_if4;
-+ } else if (endpoint->addr.sa_family == AF_INET6) {
++ } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) {
+ peer->endpoint.addr6 = endpoint->addr6;
+ peer->endpoint.src6 = endpoint->src6;
+ } else {
@@ -43207,7 +43277,7 @@ exit 0
+{
+ write_lock_bh(&peer->endpoint_lock);
+ memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
-+ dst_cache_reset(&peer->endpoint_cache);
++ dst_cache_reset_now(&peer->endpoint_cache);
+ write_unlock_bh(&peer->endpoint_lock);
+}
+
@@ -43865,7 +43935,7 @@ exit 0
+#endif /* _WG_UAPI_WIREGUARD_H */
--- b/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
-@@ -0,0 +1,636 @@
+@@ -0,0 +1,674 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
@@ -43890,10 +43960,12 @@ exit 0
+# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
+# details on how this is accomplished.
+set -e
++shopt -s extglob
+
+exec 3>&1
+export LANG=C
+export WG_HIDE_KEYS=never
++NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]}
+netns0="wg-test-$$-0"
+netns1="wg-test-$$-1"
+netns2="wg-test-$$-2"
@@ -44011,17 +44083,15 @@ exit 0
+ n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+
+ # TCP over IPv4, in parallel
-+ for max in 4 5 50; do
-+ local pids=( )
-+ for ((i=0; i < max; ++i)) do
-+ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
-+ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
-+ done
-+ for ((i=0; i < max; ++i)) do
-+ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
-+ done
-+ wait "${pids[@]}"
++ local pids=( ) i
++ for ((i=0; i < NPROC; ++i)) do
++ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
++ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
+ done
++ for ((i=0; i < NPROC; ++i)) do
++ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
++ done
++ wait "${pids[@]}"
+}
+
+[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
@@ -44144,7 +44214,23 @@ exit 0
+n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
+ip2 link del wg0
+ip2 link del wg1
-+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
++read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
++! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
++sleep 1
++read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
++if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then
++ errstart=$'\x1b[37m\x1b[41m\x1b[1m'
++ errend=$'\x1b[0m'
++ echo "${errstart} ${errend}"
++ echo "${errstart} E R R O R ${errend}"
++ echo "${errstart} ${errend}"
++ echo "${errstart} This architecture does not do the right thing ${errend}"
++ echo "${errstart} with cross-namespace routing loops. This test ${errend}"
++ echo "${errstart} has thus technically failed but, as this issue ${errend}"
++ echo "${errstart} is as yet unsolved, these tests will continue ${errend}"
++ echo "${errstart} onward. :( ${errend}"
++ echo "${errstart} ${errend}"
++fi
+
+ip0 link del wg1
+ip1 link del wg0
@@ -44477,6 +44563,28 @@ exit 0
+kill $ncat_pid
+ip0 link del wg0
+
++# Ensure that dst_cache references don't outlive netns lifetime
++ip1 link add dev wg0 type wireguard
++ip2 link add dev wg0 type wireguard
++configure_peers
++ip1 link add veth1 type veth peer name veth2
++ip1 link set veth2 netns $netns2
++ip1 addr add fd00:aa::1/64 dev veth1
++ip2 addr add fd00:aa::2/64 dev veth2
++ip1 link set veth1 up
++ip2 link set veth2 up
++waitiface $netns1 veth1
++waitiface $netns2 veth2
++ip1 -6 route add default dev veth1 via fd00:aa::2
++ip2 -6 route add default dev veth2 via fd00:aa::1
++n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
++n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
++n1 ping6 -c 1 fd00::2
++pp ip netns delete $netns1
++pp ip netns delete $netns2
++pp ip netns add $netns1
++pp ip netns add $netns2
++
+# Ensure there aren't circular reference loops
+ip1 link add wg1 type wireguard
+ip2 link add wg2 type wireguard
@@ -44495,47 +44603,47 @@ exit 0
+done < /dev/kmsg
+alldeleted=1
+for object in "${!objects[@]}"; do
-+ if [[ ${objects["$object"]} != *createddestroyed ]]; then
++ if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then
+ echo "Error: $object: merely ${objects["$object"]}" >&3
+ alldeleted=0
+ fi
+done
+[[ $alldeleted -eq 1 ]]
+pretty "" "Objects that were created were also destroyed."
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/.gitignore
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
-@@ -0,0 +1,2 @@
+@@ -0,0 +1,4 @@
++# SPDX-License-Identifier: GPL-2.0-only
+build/
+distfiles/
++ccache/
--- b/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
-@@ -0,0 +1,377 @@
+@@ -0,0 +1,422 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+
+PWD := $(shell pwd)
+
-+CHOST := $(shell gcc -dumpmachine)
-+HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
-+ifneq (,$(ARCH))
-+CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
-+ifeq (,$(CBUILD))
-+$(error The toolchain for $(ARCH) is not installed)
-+endif
-+else
-+CBUILD := $(CHOST)
-+ARCH := $(firstword $(subst -, ,$(CBUILD)))
-+endif
-+
+# Set these from the environment to override
+KERNEL_PATH ?= $(PWD)/../../../../..
+BUILD_PATH ?= $(PWD)/build/$(ARCH)
+DISTFILES_PATH ?= $(PWD)/distfiles
+NR_CPUS ?= 4
++ARCH ?=
++CBUILD := $(shell gcc -dumpmachine)
++HOST_ARCH := $(firstword $(subst -, ,$(CBUILD)))
++ifeq ($(ARCH),)
++ARCH := $(HOST_ARCH)
++endif
+
+MIRROR := https://download.wireguard.com/qemu-test/distfiles/
+
++KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
++rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
++WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
++
+default: qemu
+
+# variable name, tarball project name, version, tarball extension, default URI base
@@ -44548,42 +44656,33 @@ exit 0
+endef
+
+define file_download =
-+$(DISTFILES_PATH)/$(1):
++$(DISTFILES_PATH)/$(1): | $(4)
+ mkdir -p $(DISTFILES_PATH)
-+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
++ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
+endef
+
-+$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
-+$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
-+$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-+$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
-+$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
-+$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
++$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f))
++$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558))
++$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9))
++$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
++$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
+$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
-+
-+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
-+
-+export CFLAGS ?= -O3 -pipe
-+export LDFLAGS ?=
-+export CPPFLAGS := -I$(BUILD_PATH)/include
++$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
+
++export CFLAGS := -O3 -pipe
+ifeq ($(HOST_ARCH),$(ARCH))
-+CROSS_COMPILE_FLAG := --host=$(CHOST)
+CFLAGS += -march=native
-+STRIP := strip
-+else
-+$(info Cross compilation: building for $(CBUILD) using $(CHOST))
-+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
-+export CROSS_COMPILE=$(CBUILD)-
-+STRIP := $(CBUILD)-strip
+endif
++export LDFLAGS :=
++export CPPFLAGS :=
++
++QEMU_VPORT_RESULT :=
+ifeq ($(ARCH),aarch64)
++CHOST := aarch64-linux-musl
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
++QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
@@ -44591,9 +44690,11 @@ exit 0
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),aarch64_be)
++CHOST := aarch64_be-linux-musl
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
++QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
@@ -44601,9 +44702,11 @@ exit 0
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),arm)
++CHOST := arm-linux-musleabi
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
++QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
@@ -44611,9 +44714,11 @@ exit 0
+CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+endif
+else ifeq ($(ARCH),armeb)
++CHOST := armeb-linux-musleabi
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
++QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
@@ -44622,6 +44727,7 @@ exit 0
+LDFLAGS += -Wl,--be8
+endif
+else ifeq ($(ARCH),x86_64)
++CHOST := x86_64-linux-musl
+QEMU_ARCH := x86_64
+KERNEL_ARCH := x86_64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -44632,6 +44738,7 @@ exit 0
+CFLAGS += -march=skylake-avx512
+endif
+else ifeq ($(ARCH),i686)
++CHOST := i686-linux-musl
+QEMU_ARCH := i386
+KERNEL_ARCH := x86
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -44642,6 +44749,7 @@ exit 0
+CFLAGS += -march=prescott
+endif
+else ifeq ($(ARCH),mips64)
++CHOST := mips64-linux-musl
+QEMU_ARCH := mips64
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -44653,6 +44761,7 @@ exit 0
+CFLAGS += -march=mips64r2 -EB
+endif
+else ifeq ($(ARCH),mips64el)
++CHOST := mips64el-linux-musl
+QEMU_ARCH := mips64el
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -44664,6 +44773,7 @@ exit 0
+CFLAGS += -march=mips64r2 -EL
+endif
+else ifeq ($(ARCH),mips)
++CHOST := mips-linux-musl
+QEMU_ARCH := mips
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -44675,6 +44785,7 @@ exit 0
+CFLAGS += -march=mips32r2 -EB
+endif
+else ifeq ($(ARCH),mipsel)
++CHOST := mipsel-linux-musl
+QEMU_ARCH := mipsel
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -44685,7 +44796,18 @@ exit 0
+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
+CFLAGS += -march=mips32r2 -EL
+endif
++else ifeq ($(ARCH),powerpc64)
++CHOST := powerpc64-linux-musl
++QEMU_ARCH := ppc64
++KERNEL_ARCH := powerpc
++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
++ifeq ($(HOST_ARCH),$(ARCH))
++QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
++else
++QEMU_MACHINE := -machine pseries
++endif
+else ifeq ($(ARCH),powerpc64le)
++CHOST := powerpc64le-linux-musl
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -44694,8 +44816,8 @@ exit 0
+else
+QEMU_MACHINE := -machine pseries
+endif
-+CFLAGS += -mcpu=powerpc64le -mlong-double-64
+else ifeq ($(ARCH),powerpc)
++CHOST := powerpc-linux-musl
+QEMU_ARCH := ppc
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
@@ -44704,26 +44826,57 @@ exit 0
+else
+QEMU_MACHINE := -machine ppce500
+endif
-+CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
+else ifeq ($(ARCH),m68k)
++CHOST := m68k-linux-musl
+QEMU_ARCH := m68k
+KERNEL_ARCH := m68k
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
+ifeq ($(HOST_ARCH),$(ARCH))
-+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
++QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE)
+else
+QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+endif
++else ifeq ($(ARCH),s390x)
++CHOST := s390x-linux-musl
++QEMU_ARCH := s390x
++KERNEL_ARCH := s390
++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage
++KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config)
++QEMU_VPORT_RESULT := virtio-serial-ccw
++ifeq ($(HOST_ARCH),$(ARCH))
++QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
++else
++QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
++endif
+else
-+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
++$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, s390x)
+endif
+
-+REAL_CC := $(CBUILD)-gcc
-+MUSL_CC := $(BUILD_PATH)/musl-gcc
-+export CC := $(MUSL_CC)
-+USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
++TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz
++TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME)
++TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross
++TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/
++$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8))
++$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123))
+
++STRIP := $(CHOST)-strip
++CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
++$(info Building for $(CHOST) using $(CBUILD))
++export CROSS_COMPILE := $(CHOST)-
++export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
++export CC := $(CHOST)-gcc
++CCACHE_PATH := $(shell which ccache 2>/dev/null)
++ifneq ($(CCACHE_PATH),)
++export KBUILD_BUILD_TIMESTAMP := Fri Jun 5 15:58:00 CEST 2015
++export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH)
++export CCACHE_SLOPPINESS := file_macro,time_macros
++export CCACHE_DIR ?= $(PWD)/ccache
++endif
++
++USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed
++
++comma := ,
+build: $(KERNEL_BZIMAGE)
+qemu: $(KERNEL_BZIMAGE)
+ rm -f $(BUILD_PATH)/result
@@ -44734,13 +44887,14 @@ exit 0
+ $(QEMU_MACHINE) \
+ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
+ -serial stdio \
-+ -serial file:$(BUILD_PATH)/result \
++ -chardev file,path=$(BUILD_PATH)/result,id=result \
++ $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \
+ -no-reboot \
+ -monitor none \
+ -kernel $<
+ grep -Fq success $(BUILD_PATH)/result
+
-+$(BUILD_PATH)/init-cpio-spec.txt:
++$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init
+ mkdir -p $(BUILD_PATH)
+ echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
+ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
@@ -44758,10 +44912,10 @@ exit 0
+ echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
+ echo "slink /bin/ping6 ping 777 0 0" >> $@
+ echo "dir /lib 755 0 0" >> $@
-+ echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
-+ echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
++ echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@
++ echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@
+
-+$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
++$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config
+ mkdir -p $(KERNEL_BUILD_PATH)
+ cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
+ printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
@@ -44770,29 +44924,24 @@ exit 0
+ cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
+ $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
+
-+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
++$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
+
-+$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
-+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
++$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
++ rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
++ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+ touch $@
+
-+$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
++$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
-+ cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
-+ $(MAKE) -C $(MUSL_PATH)
-+ $(STRIP) -s $@
-+
-+$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
-+ $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
++ $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so
++ifneq ($(CCACHE_PATH),)
++ mkdir -p $(TOOLCHAIN_PATH)/bin/ccache
++ ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC)
++endif
+ touch $@
+
-+$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
-+ sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
-+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
-+ chmod +x $(BUILD_PATH)/musl-gcc
-+
+$(IPERF_PATH)/.installed: $(IPERF_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
@@ -44801,6 +44950,7 @@ exit 0
+ touch $@
+
+$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
++ cd $(IPERF_PATH) && autoreconf -fi
+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
+ $(MAKE) -C $(IPERF_PATH)
+ $(STRIP) -s $@
@@ -44816,7 +44966,7 @@ exit 0
+
+$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
+ mkdir -p $(BUILD_PATH)
-+ $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
++ $(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+ $(STRIP) -s $@
+
+$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
@@ -44835,15 +44985,15 @@ exit 0
+ touch $@
+
+$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
-+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
++ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
+ $(MAKE) -C $(BASH_PATH)
+ $(STRIP) -s $@
+
+$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
-+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
-+ printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
++ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk
++ printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+ touch $@
+
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
@@ -44882,60 +45032,78 @@ exit 0
+distclean: clean
+ rm -rf $(DISTFILES_PATH)
+
++cacheclean: clean
++ifneq ($(CCACHE_DIR),)
++ rm -rf $(CCACHE_DIR)
++endif
++
+menuconfig: $(KERNEL_BUILD_PATH)/.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
+
-+.PHONY: qemu build clean distclean menuconfig
++.PHONY: qemu build clean distclean cacheclean menuconfig
+.DELETE_ON_ERROR:
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
-@@ -0,0 +1,5 @@
+@@ -0,0 +1,8 @@
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
++CONFIG_VIRTIO_MENU=y
++CONFIG_VIRTIO_MMIO=y
++CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
++CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1280
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
-@@ -0,0 +1,6 @@
+@@ -0,0 +1,9 @@
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
++CONFIG_VIRTIO_MENU=y
++CONFIG_VIRTIO_MMIO=y
++CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
++CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1280
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/arm.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
-@@ -0,0 +1,9 @@
+@@ -0,0 +1,12 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
++CONFIG_VIRTIO_MENU=y
++CONFIG_VIRTIO_MMIO=y
++CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
++CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1024
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
-@@ -0,0 +1,10 @@
+@@ -0,0 +1,13 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
++CONFIG_VIRTIO_MENU=y
++CONFIG_VIRTIO_MMIO=y
++CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
++CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_FRAME_WARN=1024
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/i686.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
-@@ -0,0 +1,5 @@
+@@ -0,0 +1,6 @@
++CONFIG_ACPI=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1024
--- b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -44947,9 +45115,9 @@ exit 0
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1024
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/mips.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -0,0 +1,11 @@
+CONFIG_CPU_MIPS32_R2=y
@@ -44961,9 +45129,9 @@ exit 0
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1024
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -0,0 +1,14 @@
+CONFIG_64BIT=y
@@ -44978,9 +45146,9 @@ exit 0
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1280
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -0,0 +1,15 @@
+CONFIG_64BIT=y
@@ -44996,9 +45164,9 @@ exit 0
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1280
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -0,0 +1,12 @@
+CONFIG_CPU_MIPS32_R2=y
@@ -45011,9 +45179,9 @@ exit 0
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1024
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -0,0 +1,10 @@
+CONFIG_PPC_QEMU_E500=y
@@ -45024,7 +45192,7 @@ exit 0
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_MATH_EMULATION=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1024
--- b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -45038,19 +45206,20 @@ exit 0
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
++CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
-@@ -0,0 +1,5 @@
+@@ -0,0 +1,6 @@
++CONFIG_ACPI=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
-+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1280
---- /dev/null
+--- b/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -0,0 +1,67 @@
+CONFIG_LOCALVERSION="-debug"
@@ -45105,7 +45274,7 @@ exit 0
+CONFIG_TRACE_IRQFLAGS=y
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_LIST=y
-+CONFIG_DEBUG_PI_LIST=y
++CONFIG_DEBUG_PLIST=y
+CONFIG_PROVE_RCU=y
+CONFIG_SPARSE_RCU_POINTER=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=21
@@ -45122,7 +45291,7 @@ exit 0
+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
--- b/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
-@@ -0,0 +1,284 @@
+@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
@@ -45181,26 +45350,14 @@ exit 0
+
+static void seed_rng(void)
+{
-+ int fd;
-+ struct {
-+ int entropy_count;
-+ int buffer_size;
-+ unsigned char buffer[256];
-+ } entropy = {
-+ .entropy_count = sizeof(entropy.buffer) * 8,
-+ .buffer_size = sizeof(entropy.buffer),
-+ .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
-+ };
++ int bits = 256, fd;
+
-+ if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
-+ panic("mknod(/dev/urandom)");
-+ fd = open("/dev/urandom", O_WRONLY);
++ pretty_message("[+] Fake seeding RNG...");
++ fd = open("/dev/random", O_WRONLY);
+ if (fd < 0)
-+ panic("open(urandom)");
-+ for (int i = 0; i < 256; ++i) {
-+ if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
-+ panic("ioctl(urandom)");
-+ }
++ panic("open(random)");
++ if (ioctl(fd, RNDADDTOENTCNT, &bits) < 0)
++ panic("ioctl(RNDADDTOENTCNT)");
+ close(fd);
+}
+
@@ -45247,12 +45404,6 @@ exit 0
+ panic("write(exception-trace)");
+ close(fd);
+ }
-+ fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
-+ if (fd >= 0) {
-+ if (write(fd, "1\n", 2) != 2)
-+ panic("write(panic_on_warn)");
-+ close(fd);
-+ }
+}
+
+static void kmod_selftests(void)
@@ -45395,10 +45546,10 @@ exit 0
+
+int main(int argc, char *argv[])
+{
-+ seed_rng();
+ ensure_console();
+ print_banner();
+ mount_filesystems();
++ seed_rng();
+ kmod_selftests();
+ enable_logging();
+ clear_leaks();
@@ -45409,7 +45560,7 @@ exit 0
+}
--- b/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
-@@ -0,0 +1,89 @@
+@@ -0,0 +1,90 @@
+CONFIG_LOCALVERSION=""
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
@@ -45479,6 +45630,7 @@ exit 0
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
++CONFIG_LOG_BUF_SHIFT=18
+CONFIG_PRINTK_TIME=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
@@ -45540,3 +45692,73 @@ exit 0
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+poly1305-core.S
+--- a/include/net/dst_cache.h
++++ b/include/net/dst_cache.h
+@@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache)
+ dst_cache->reset_ts = jiffies;
+ }
+
++/**
++ * dst_cache_reset_now - invalidate the cache contents immediately
++ * @dst_cache: the cache
++ *
++ * The caller must be sure there are no concurrent users, as this frees
++ * all dst_cache users immediately, rather than waiting for the next
++ * per-cpu usage like dst_cache_reset does. Most callers should use the
++ * higher speed lazily-freed dst_cache_reset function instead.
++ */
++void dst_cache_reset_now(struct dst_cache *dst_cache);
++
+ /**
+ * dst_cache_init - initialize the cache, allocating the required storage
+ * @dst_cache: the cache
+--- a/net/core/dst_cache.c
++++ b/net/core/dst_cache.c
+@@ -162,3 +162,22 @@ void dst_cache_destroy(struct dst_cache *dst_cache)
+ free_percpu(dst_cache->cache);
+ }
+ EXPORT_SYMBOL_GPL(dst_cache_destroy);
++
++void dst_cache_reset_now(struct dst_cache *dst_cache)
++{
++ int i;
++
++ if (!dst_cache->cache)
++ return;
++
++ dst_cache->reset_ts = jiffies;
++ for_each_possible_cpu(i) {
++ struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
++ struct dst_entry *dst = idst->dst;
++
++ idst->cookie = 0;
++ idst->dst = NULL;
++ dst_release(dst);
++ }
++}
++EXPORT_SYMBOL_GPL(dst_cache_reset_now);
+--- b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
++++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
+@@ -0,0 +1,13 @@
++CONFIG_PPC64=y
++CONFIG_PPC_PSERIES=y
++CONFIG_ALTIVEC=y
++CONFIG_VSX=y
++CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
++CONFIG_PPC_RADIX_MMU=y
++CONFIG_HVC_CONSOLE=y
++CONFIG_CPU_BIG_ENDIAN=y
++CONFIG_CMDLINE_BOOL=y
++CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
++CONFIG_SECTION_MISMATCH_WARN_ONLY=y
++CONFIG_FRAME_WARN=1280
++CONFIG_THREAD_SHIFT=14
+--- b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
++++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
+@@ -0,0 +1,6 @@
++CONFIG_SCLP_VT220_TTY=y
++CONFIG_SCLP_VT220_CONSOLE=y
++CONFIG_VIRTIO_MENU=y
++CONFIG_VIRTIO_CONSOLE=y
++CONFIG_S390_GUEST=y
++CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1"