kernel-5.4: backport latest patches for wireguard
These are the latest patches that just landed upstream for 5.13, will be backported by Greg into 5.10 (because of stable@), and are now in the 5.4 backport branch of wireguard: https://git.zx2c4.com/wireguard-linux/log/?h=backport-5.4.y Cc: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Tested-by: Stijn Segers <foss@volatilesystems.org>
This commit is contained in:
parent
79481c71dc
commit
2a3b2f59fe
12 changed files with 1514 additions and 0 deletions
|
@ -0,0 +1,60 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
|
||||||
|
Date: Thu, 11 Mar 2021 21:50:47 -0700
|
||||||
|
Subject: [PATCH] crypto: mips/poly1305 - enable for all MIPS processors
|
||||||
|
|
||||||
|
commit 6c810cf20feef0d4338e9b424ab7f2644a8b353e upstream.
|
||||||
|
|
||||||
|
The MIPS Poly1305 implementation is generic MIPS code written such as to
|
||||||
|
support down to the original MIPS I and MIPS III ISA for the 32-bit and
|
||||||
|
64-bit variant respectively. Lift the current limitation then to enable
|
||||||
|
code for MIPSr1 ISA or newer processors only and have it available for
|
||||||
|
all MIPS processors.
|
||||||
|
|
||||||
|
Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
|
||||||
|
Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation")
|
||||||
|
Cc: stable@vger.kernel.org # v5.5+
|
||||||
|
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
arch/mips/crypto/Makefile | 4 ++--
|
||||||
|
crypto/Kconfig | 2 +-
|
||||||
|
drivers/net/Kconfig | 2 +-
|
||||||
|
3 files changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
--- a/arch/mips/crypto/Makefile
|
||||||
|
+++ b/arch/mips/crypto/Makefile
|
||||||
|
@@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to
|
||||||
|
obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
|
||||||
|
poly1305-mips-y := poly1305-core.o poly1305-glue.o
|
||||||
|
|
||||||
|
-perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
|
||||||
|
-perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
|
||||||
|
+perlasm-flavour-$(CONFIG_32BIT) := o32
|
||||||
|
+perlasm-flavour-$(CONFIG_64BIT) := 64
|
||||||
|
|
||||||
|
quiet_cmd_perlasm = PERLASM $@
|
||||||
|
cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
|
||||||
|
--- a/crypto/Kconfig
|
||||||
|
+++ b/crypto/Kconfig
|
||||||
|
@@ -740,7 +740,7 @@ config CRYPTO_POLY1305_X86_64
|
||||||
|
|
||||||
|
config CRYPTO_POLY1305_MIPS
|
||||||
|
tristate "Poly1305 authenticator algorithm (MIPS optimized)"
|
||||||
|
- depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
|
||||||
|
+ depends on MIPS
|
||||||
|
select CRYPTO_ARCH_HAVE_LIB_POLY1305
|
||||||
|
|
||||||
|
config CRYPTO_MD4
|
||||||
|
--- a/drivers/net/Kconfig
|
||||||
|
+++ b/drivers/net/Kconfig
|
||||||
|
@@ -92,7 +92,7 @@ config WIREGUARD
|
||||||
|
select CRYPTO_POLY1305_ARM if ARM
|
||||||
|
select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
|
||||||
|
select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
|
||||||
|
- select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
|
||||||
|
+ select CRYPTO_POLY1305_MIPS if MIPS
|
||||||
|
help
|
||||||
|
WireGuard is a secure, fast, and easy to use replacement for IPSec
|
||||||
|
that uses modern cryptography and clever networking tricks. It's
|
|
@ -0,0 +1,24 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||||
|
Date: Sat, 27 Mar 2021 19:39:43 -0700
|
||||||
|
Subject: [PATCH] crypto: mips: add poly1305-core.S to .gitignore
|
||||||
|
|
||||||
|
commit dc92d0df51dc61de88bf6f4884a17bf73d5c6326 upstream.
|
||||||
|
|
||||||
|
poly1305-core.S is an auto-generated file, so it should be ignored.
|
||||||
|
|
||||||
|
Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation")
|
||||||
|
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
|
||||||
|
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||||
|
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
arch/mips/crypto/.gitignore | 2 ++
|
||||||
|
1 file changed, 2 insertions(+)
|
||||||
|
create mode 100644 arch/mips/crypto/.gitignore
|
||||||
|
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/arch/mips/crypto/.gitignore
|
||||||
|
@@ -0,0 +1,2 @@
|
||||||
|
+# SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
+poly1305-core.S
|
|
@ -0,0 +1,172 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Date: Mon, 22 Mar 2021 18:05:15 +0100
|
||||||
|
Subject: [PATCH] crypto: poly1305 - fix poly1305_core_setkey() declaration
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
commit 8d195e7a8ada68928f2aedb2c18302a4518fe68e upstream.
|
||||||
|
|
||||||
|
gcc-11 points out a mismatch between the declaration and the definition
|
||||||
|
of poly1305_core_setkey():
|
||||||
|
|
||||||
|
lib/crypto/poly1305-donna32.c:13:67: error: argument 2 of type ‘const u8[16]’ {aka ‘const unsigned char[16]’} with mismatched bound [-Werror=array-parameter=]
|
||||||
|
13 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
|
||||||
|
| ~~~~~~~~~^~~~~~~~~~~
|
||||||
|
In file included from lib/crypto/poly1305-donna32.c:11:
|
||||||
|
include/crypto/internal/poly1305.h:21:68: note: previously declared as ‘const u8 *’ {aka ‘const unsigned char *’}
|
||||||
|
21 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key);
|
||||||
|
|
||||||
|
This is harmless in principle, as the calling conventions are the same,
|
||||||
|
but the more specific prototype allows better type checking in the
|
||||||
|
caller.
|
||||||
|
|
||||||
|
Change the declaration to match the actual function definition.
|
||||||
|
The poly1305_simd_init() is a bit suspicious here, as it previously
|
||||||
|
had a 32-byte argument type, but looks like it needs to take the
|
||||||
|
16-byte POLY1305_BLOCK_SIZE array instead.
|
||||||
|
|
||||||
|
Fixes: 1c08a104360f ("crypto: poly1305 - add new 32 and 64-bit generic versions")
|
||||||
|
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
|
||||||
|
Reviewed-by: Eric Biggers <ebiggers@google.com>
|
||||||
|
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
arch/arm/crypto/poly1305-glue.c | 2 +-
|
||||||
|
arch/arm64/crypto/poly1305-glue.c | 2 +-
|
||||||
|
arch/mips/crypto/poly1305-glue.c | 2 +-
|
||||||
|
arch/x86/crypto/poly1305_glue.c | 6 +++---
|
||||||
|
include/crypto/internal/poly1305.h | 3 ++-
|
||||||
|
include/crypto/poly1305.h | 6 ++++--
|
||||||
|
lib/crypto/poly1305-donna32.c | 3 ++-
|
||||||
|
lib/crypto/poly1305-donna64.c | 3 ++-
|
||||||
|
lib/crypto/poly1305.c | 3 ++-
|
||||||
|
9 files changed, 18 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
--- a/arch/arm/crypto/poly1305-glue.c
|
||||||
|
+++ b/arch/arm/crypto/poly1305-glue.c
|
||||||
|
@@ -29,7 +29,7 @@ void __weak poly1305_blocks_neon(void *s
|
||||||
|
|
||||||
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||||
|
|
||||||
|
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
|
||||||
|
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
|
||||||
|
{
|
||||||
|
poly1305_init_arm(&dctx->h, key);
|
||||||
|
dctx->s[0] = get_unaligned_le32(key + 16);
|
||||||
|
--- a/arch/arm64/crypto/poly1305-glue.c
|
||||||
|
+++ b/arch/arm64/crypto/poly1305-glue.c
|
||||||
|
@@ -25,7 +25,7 @@ asmlinkage void poly1305_emit(void *stat
|
||||||
|
|
||||||
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||||
|
|
||||||
|
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
|
||||||
|
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
|
||||||
|
{
|
||||||
|
poly1305_init_arm64(&dctx->h, key);
|
||||||
|
dctx->s[0] = get_unaligned_le32(key + 16);
|
||||||
|
--- a/arch/mips/crypto/poly1305-glue.c
|
||||||
|
+++ b/arch/mips/crypto/poly1305-glue.c
|
||||||
|
@@ -17,7 +17,7 @@ asmlinkage void poly1305_init_mips(void
|
||||||
|
asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
|
||||||
|
asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce);
|
||||||
|
|
||||||
|
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
|
||||||
|
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
|
||||||
|
{
|
||||||
|
poly1305_init_mips(&dctx->h, key);
|
||||||
|
dctx->s[0] = get_unaligned_le32(key + 16);
|
||||||
|
--- a/arch/x86/crypto/poly1305_glue.c
|
||||||
|
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||||
|
@@ -15,7 +15,7 @@
|
||||||
|
#include <asm/simd.h>
|
||||||
|
|
||||||
|
asmlinkage void poly1305_init_x86_64(void *ctx,
|
||||||
|
- const u8 key[POLY1305_KEY_SIZE]);
|
||||||
|
+ const u8 key[POLY1305_BLOCK_SIZE]);
|
||||||
|
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
|
||||||
|
const size_t len, const u32 padbit);
|
||||||
|
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
|
||||||
|
@@ -80,7 +80,7 @@ static void convert_to_base2_64(void *ct
|
||||||
|
state->is_base2_26 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE])
|
||||||
|
+static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE])
|
||||||
|
{
|
||||||
|
poly1305_init_x86_64(ctx, key);
|
||||||
|
}
|
||||||
|
@@ -128,7 +128,7 @@ static void poly1305_simd_emit(void *ctx
|
||||||
|
poly1305_emit_avx(ctx, mac, nonce);
|
||||||
|
}
|
||||||
|
|
||||||
|
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
|
||||||
|
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
|
||||||
|
{
|
||||||
|
poly1305_simd_init(&dctx->h, key);
|
||||||
|
dctx->s[0] = get_unaligned_le32(&key[16]);
|
||||||
|
--- a/include/crypto/internal/poly1305.h
|
||||||
|
+++ b/include/crypto/internal/poly1305.h
|
||||||
|
@@ -18,7 +18,8 @@
|
||||||
|
* only the ε-almost-∆-universal hash function (not the full MAC) is computed.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key);
|
||||||
|
+void poly1305_core_setkey(struct poly1305_core_key *key,
|
||||||
|
+ const u8 raw_key[POLY1305_BLOCK_SIZE]);
|
||||||
|
static inline void poly1305_core_init(struct poly1305_state *state)
|
||||||
|
{
|
||||||
|
*state = (struct poly1305_state){};
|
||||||
|
--- a/include/crypto/poly1305.h
|
||||||
|
+++ b/include/crypto/poly1305.h
|
||||||
|
@@ -58,8 +58,10 @@ struct poly1305_desc_ctx {
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
-void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
|
||||||
|
-void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key);
|
||||||
|
+void poly1305_init_arch(struct poly1305_desc_ctx *desc,
|
||||||
|
+ const u8 key[POLY1305_KEY_SIZE]);
|
||||||
|
+void poly1305_init_generic(struct poly1305_desc_ctx *desc,
|
||||||
|
+ const u8 key[POLY1305_KEY_SIZE]);
|
||||||
|
|
||||||
|
static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key)
|
||||||
|
{
|
||||||
|
--- a/lib/crypto/poly1305-donna32.c
|
||||||
|
+++ b/lib/crypto/poly1305-donna32.c
|
||||||
|
@@ -10,7 +10,8 @@
|
||||||
|
#include <asm/unaligned.h>
|
||||||
|
#include <crypto/internal/poly1305.h>
|
||||||
|
|
||||||
|
-void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
|
||||||
|
+void poly1305_core_setkey(struct poly1305_core_key *key,
|
||||||
|
+ const u8 raw_key[POLY1305_BLOCK_SIZE])
|
||||||
|
{
|
||||||
|
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||||
|
key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff;
|
||||||
|
--- a/lib/crypto/poly1305-donna64.c
|
||||||
|
+++ b/lib/crypto/poly1305-donna64.c
|
||||||
|
@@ -12,7 +12,8 @@
|
||||||
|
|
||||||
|
typedef __uint128_t u128;
|
||||||
|
|
||||||
|
-void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
|
||||||
|
+void poly1305_core_setkey(struct poly1305_core_key *key,
|
||||||
|
+ const u8 raw_key[POLY1305_BLOCK_SIZE])
|
||||||
|
{
|
||||||
|
u64 t0, t1;
|
||||||
|
|
||||||
|
--- a/lib/crypto/poly1305.c
|
||||||
|
+++ b/lib/crypto/poly1305.c
|
||||||
|
@@ -12,7 +12,8 @@
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <asm/unaligned.h>
|
||||||
|
|
||||||
|
-void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
|
||||||
|
+void poly1305_init_generic(struct poly1305_desc_ctx *desc,
|
||||||
|
+ const u8 key[POLY1305_KEY_SIZE])
|
||||||
|
{
|
||||||
|
poly1305_core_setkey(&desc->core_r, key);
|
||||||
|
desc->s[0] = get_unaligned_le32(key + 16);
|
|
@ -0,0 +1,29 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:30 +0200
|
||||||
|
Subject: [PATCH] wireguard: selftests: remove old conntrack kconfig value
|
||||||
|
|
||||||
|
commit acf2492b51c9a3c4dfb947f4d3477a86d315150f upstream.
|
||||||
|
|
||||||
|
On recent kernels, this config symbol is no longer used.
|
||||||
|
|
||||||
|
Reported-by: Rui Salvaterra <rsalvaterra@gmail.com>
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
tools/testing/selftests/wireguard/qemu/kernel.config | 1 -
|
||||||
|
1 file changed, 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
|
||||||
|
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
|
||||||
|
@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y
|
||||||
|
CONFIG_NETFILTER_XT_NAT=y
|
||||||
|
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
|
||||||
|
CONFIG_NETFILTER_XT_MARK=y
|
||||||
|
-CONFIG_NF_CONNTRACK_IPV4=y
|
||||||
|
CONFIG_NF_NAT_IPV4=y
|
||||||
|
CONFIG_IP_NF_IPTABLES=y
|
||||||
|
CONFIG_IP_NF_FILTER=y
|
|
@ -0,0 +1,31 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:31 +0200
|
||||||
|
Subject: [PATCH] wireguard: selftests: make sure rp_filter is disabled on
|
||||||
|
vethc
|
||||||
|
|
||||||
|
commit f8873d11d4121aad35024f9379e431e0c83abead upstream.
|
||||||
|
|
||||||
|
Some distros may enable strict rp_filter by default, which will prevent
|
||||||
|
vethc from receiving the packets with an unrouteable reverse path address.
|
||||||
|
|
||||||
|
Reported-by: Hangbin Liu <liuhangbin@gmail.com>
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
tools/testing/selftests/wireguard/netns.sh | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
--- a/tools/testing/selftests/wireguard/netns.sh
|
||||||
|
+++ b/tools/testing/selftests/wireguard/netns.sh
|
||||||
|
@@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_pref
|
||||||
|
ip1 -4 route add default dev wg0 table 51820
|
||||||
|
ip1 -4 rule add not fwmark 51820 table 51820
|
||||||
|
ip1 -4 rule add table main suppress_prefixlength 0
|
||||||
|
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter'
|
||||||
|
# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
|
||||||
|
n1 ping -W 1 -c 100 -f 192.168.99.7
|
||||||
|
n1 ping -W 1 -c 100 -f abab::1111
|
|
@ -0,0 +1,33 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:32 +0200
|
||||||
|
Subject: [PATCH] wireguard: do not use -O3
|
||||||
|
|
||||||
|
commit cc5060ca0285efe2728bced399a1955a7ce808b2 upstream.
|
||||||
|
|
||||||
|
Apparently, various versions of gcc have O3-related miscompiles. Looking
|
||||||
|
at the difference between -O2 and -O3 for gcc 11 doesn't indicate
|
||||||
|
miscompiles, but the difference also doesn't seem so significant for
|
||||||
|
performance that it's worth risking.
|
||||||
|
|
||||||
|
Link: https://lore.kernel.org/lkml/CAHk-=wjuoGyxDhAF8SsrTkN0-YfCx7E6jUN3ikC_tn2AKWTTsA@mail.gmail.com/
|
||||||
|
Link: https://lore.kernel.org/lkml/CAHmME9otB5Wwxp7H8bR_i2uH2esEMvoBMC8uEXBMH9p0q1s6Bw@mail.gmail.com/
|
||||||
|
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
drivers/net/wireguard/Makefile | 3 +--
|
||||||
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/Makefile
|
||||||
|
+++ b/drivers/net/wireguard/Makefile
|
||||||
|
@@ -1,5 +1,4 @@
|
||||||
|
-ccflags-y := -O3
|
||||||
|
-ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
|
||||||
|
+ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
|
||||||
|
ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
|
||||||
|
wireguard-y := main.o
|
||||||
|
wireguard-y += noise.o
|
|
@ -0,0 +1,66 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:33 +0200
|
||||||
|
Subject: [PATCH] wireguard: use synchronize_net rather than synchronize_rcu
|
||||||
|
|
||||||
|
commit 24b70eeeb4f46c09487f8155239ebfb1f875774a upstream.
|
||||||
|
|
||||||
|
Many of the synchronization points are sometimes called under the rtnl
|
||||||
|
lock, which means we should use synchronize_net rather than
|
||||||
|
synchronize_rcu. Under the hood, this expands to using the expedited
|
||||||
|
flavor of function in the event that rtnl is held, in order to not stall
|
||||||
|
other concurrent changes.
|
||||||
|
|
||||||
|
This fixes some very, very long delays when removing multiple peers at
|
||||||
|
once, which would cause some operations to take several minutes.
|
||||||
|
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
drivers/net/wireguard/peer.c | 6 +++---
|
||||||
|
drivers/net/wireguard/socket.c | 2 +-
|
||||||
|
2 files changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/peer.c
|
||||||
|
+++ b/drivers/net/wireguard/peer.c
|
||||||
|
@@ -88,7 +88,7 @@ static void peer_make_dead(struct wg_pee
|
||||||
|
/* Mark as dead, so that we don't allow jumping contexts after. */
|
||||||
|
WRITE_ONCE(peer->is_dead, true);
|
||||||
|
|
||||||
|
- /* The caller must now synchronize_rcu() for this to take effect. */
|
||||||
|
+ /* The caller must now synchronize_net() for this to take effect. */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void peer_remove_after_dead(struct wg_peer *peer)
|
||||||
|
@@ -160,7 +160,7 @@ void wg_peer_remove(struct wg_peer *peer
|
||||||
|
lockdep_assert_held(&peer->device->device_update_lock);
|
||||||
|
|
||||||
|
peer_make_dead(peer);
|
||||||
|
- synchronize_rcu();
|
||||||
|
+ synchronize_net();
|
||||||
|
peer_remove_after_dead(peer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -178,7 +178,7 @@ void wg_peer_remove_all(struct wg_device
|
||||||
|
peer_make_dead(peer);
|
||||||
|
list_add_tail(&peer->peer_list, &dead_peers);
|
||||||
|
}
|
||||||
|
- synchronize_rcu();
|
||||||
|
+ synchronize_net();
|
||||||
|
list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
|
||||||
|
peer_remove_after_dead(peer);
|
||||||
|
}
|
||||||
|
--- a/drivers/net/wireguard/socket.c
|
||||||
|
+++ b/drivers/net/wireguard/socket.c
|
||||||
|
@@ -430,7 +430,7 @@ void wg_socket_reinit(struct wg_device *
|
||||||
|
if (new4)
|
||||||
|
wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
|
||||||
|
mutex_unlock(&wg->socket_update_lock);
|
||||||
|
- synchronize_rcu();
|
||||||
|
+ synchronize_net();
|
||||||
|
sock_free(old4);
|
||||||
|
sock_free(old6);
|
||||||
|
}
|
|
@ -0,0 +1,125 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:34 +0200
|
||||||
|
Subject: [PATCH] wireguard: peer: allocate in kmem_cache
|
||||||
|
|
||||||
|
commit a4e9f8e3287c9eb6bf70df982870980dd3341863 upstream.
|
||||||
|
|
||||||
|
With deployments having upwards of 600k peers now, this somewhat heavy
|
||||||
|
structure could benefit from more fine-grained allocations.
|
||||||
|
Specifically, instead of using a 2048-byte slab for a 1544-byte object,
|
||||||
|
we can now use 1544-byte objects directly, thus saving almost 25%
|
||||||
|
per-peer, or with 600k peers, that's a savings of 303 MiB. This also
|
||||||
|
makes wireguard's memory usage more transparent in tools like slabtop
|
||||||
|
and /proc/slabinfo.
|
||||||
|
|
||||||
|
Fixes: 8b5553ace83c ("wireguard: queueing: get rid of per-peer ring buffers")
|
||||||
|
Suggested-by: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Suggested-by: Matthew Wilcox <willy@infradead.org>
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
drivers/net/wireguard/main.c | 7 +++++++
|
||||||
|
drivers/net/wireguard/peer.c | 21 +++++++++++++++++----
|
||||||
|
drivers/net/wireguard/peer.h | 3 +++
|
||||||
|
3 files changed, 27 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/main.c
|
||||||
|
+++ b/drivers/net/wireguard/main.c
|
||||||
|
@@ -28,6 +28,10 @@ static int __init mod_init(void)
|
||||||
|
#endif
|
||||||
|
wg_noise_init();
|
||||||
|
|
||||||
|
+ ret = wg_peer_init();
|
||||||
|
+ if (ret < 0)
|
||||||
|
+ goto err_peer;
|
||||||
|
+
|
||||||
|
ret = wg_device_init();
|
||||||
|
if (ret < 0)
|
||||||
|
goto err_device;
|
||||||
|
@@ -44,6 +48,8 @@ static int __init mod_init(void)
|
||||||
|
err_netlink:
|
||||||
|
wg_device_uninit();
|
||||||
|
err_device:
|
||||||
|
+ wg_peer_uninit();
|
||||||
|
+err_peer:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -51,6 +57,7 @@ static void __exit mod_exit(void)
|
||||||
|
{
|
||||||
|
wg_genetlink_uninit();
|
||||||
|
wg_device_uninit();
|
||||||
|
+ wg_peer_uninit();
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(mod_init);
|
||||||
|
--- a/drivers/net/wireguard/peer.c
|
||||||
|
+++ b/drivers/net/wireguard/peer.c
|
||||||
|
@@ -15,6 +15,7 @@
|
||||||
|
#include <linux/rcupdate.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
|
||||||
|
+static struct kmem_cache *peer_cache;
|
||||||
|
static atomic64_t peer_counter = ATOMIC64_INIT(0);
|
||||||
|
|
||||||
|
struct wg_peer *wg_peer_create(struct wg_device *wg,
|
||||||
|
@@ -29,10 +30,10 @@ struct wg_peer *wg_peer_create(struct wg
|
||||||
|
if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
|
||||||
|
- peer = kzalloc(sizeof(*peer), GFP_KERNEL);
|
||||||
|
+ peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL);
|
||||||
|
if (unlikely(!peer))
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
- if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
|
||||||
|
+ if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)))
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
peer->device = wg;
|
||||||
|
@@ -64,7 +65,7 @@ struct wg_peer *wg_peer_create(struct wg
|
||||||
|
return peer;
|
||||||
|
|
||||||
|
err:
|
||||||
|
- kfree(peer);
|
||||||
|
+ kmem_cache_free(peer_cache, peer);
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -193,7 +194,8 @@ static void rcu_release(struct rcu_head
|
||||||
|
/* The final zeroing takes care of clearing any remaining handshake key
|
||||||
|
* material and other potentially sensitive information.
|
||||||
|
*/
|
||||||
|
- kzfree(peer);
|
||||||
|
+ memzero_explicit(peer, sizeof(*peer));
|
||||||
|
+ kmem_cache_free(peer_cache, peer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kref_release(struct kref *refcount)
|
||||||
|
@@ -225,3 +227,14 @@ void wg_peer_put(struct wg_peer *peer)
|
||||||
|
return;
|
||||||
|
kref_put(&peer->refcount, kref_release);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+int __init wg_peer_init(void)
|
||||||
|
+{
|
||||||
|
+ peer_cache = KMEM_CACHE(wg_peer, 0);
|
||||||
|
+ return peer_cache ? 0 : -ENOMEM;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void wg_peer_uninit(void)
|
||||||
|
+{
|
||||||
|
+ kmem_cache_destroy(peer_cache);
|
||||||
|
+}
|
||||||
|
--- a/drivers/net/wireguard/peer.h
|
||||||
|
+++ b/drivers/net/wireguard/peer.h
|
||||||
|
@@ -80,4 +80,7 @@ void wg_peer_put(struct wg_peer *peer);
|
||||||
|
void wg_peer_remove(struct wg_peer *peer);
|
||||||
|
void wg_peer_remove_all(struct wg_device *wg);
|
||||||
|
|
||||||
|
+int wg_peer_init(void);
|
||||||
|
+void wg_peer_uninit(void);
|
||||||
|
+
|
||||||
|
#endif /* _WG_PEER_H */
|
|
@ -0,0 +1,43 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:35 +0200
|
||||||
|
Subject: [PATCH] wireguard: allowedips: initialize list head in selftest
|
||||||
|
|
||||||
|
commit 46cfe8eee285cde465b420637507884551f5d7ca upstream.
|
||||||
|
|
||||||
|
The randomized trie tests weren't initializing the dummy peer list head,
|
||||||
|
resulting in a NULL pointer dereference when used. Fix this by
|
||||||
|
initializing it in the randomized trie test, just like we do for the
|
||||||
|
static unit test.
|
||||||
|
|
||||||
|
While we're at it, all of the other strings like this have the word
|
||||||
|
"self-test", so add it to the missing place here.
|
||||||
|
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
drivers/net/wireguard/selftest/allowedips.c | 3 ++-
|
||||||
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/selftest/allowedips.c
|
||||||
|
+++ b/drivers/net/wireguard/selftest/allowedips.c
|
||||||
|
@@ -296,6 +296,7 @@ static __init bool randomized_test(void)
|
||||||
|
goto free;
|
||||||
|
}
|
||||||
|
kref_init(&peers[i]->refcount);
|
||||||
|
+ INIT_LIST_HEAD(&peers[i]->allowedips_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_lock(&mutex);
|
||||||
|
@@ -333,7 +334,7 @@ static __init bool randomized_test(void)
|
||||||
|
if (wg_allowedips_insert_v4(&t,
|
||||||
|
(struct in_addr *)mutated,
|
||||||
|
cidr, peer, &mutex) < 0) {
|
||||||
|
- pr_err("allowedips random malloc: FAIL\n");
|
||||||
|
+ pr_err("allowedips random self-test malloc: FAIL\n");
|
||||||
|
goto free_locked;
|
||||||
|
}
|
||||||
|
if (horrible_allowedips_insert_v4(&h,
|
|
@ -0,0 +1,237 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:36 +0200
|
||||||
|
Subject: [PATCH] wireguard: allowedips: remove nodes in O(1)
|
||||||
|
|
||||||
|
commit f634f418c227c912e7ea95a3299efdc9b10e4022 upstream.
|
||||||
|
|
||||||
|
Previously, deleting peers would require traversing the entire trie in
|
||||||
|
order to rebalance nodes and safely free them. This meant that removing
|
||||||
|
1000 peers from a trie with a half million nodes would take an extremely
|
||||||
|
long time, during which we're holding the rtnl lock. Large-scale users
|
||||||
|
were reporting 200ms latencies added to the networking stack as a whole
|
||||||
|
every time their userspace software would queue up significant removals.
|
||||||
|
That's a serious situation.
|
||||||
|
|
||||||
|
This commit fixes that by maintaining a double pointer to the parent's
|
||||||
|
bit pointer for each node, and then using the already existing node list
|
||||||
|
belonging to each peer to go directly to the node, fix up its pointers,
|
||||||
|
and free it with RCU. This means removal is O(1) instead of O(n), and we
|
||||||
|
don't use gobs of stack.
|
||||||
|
|
||||||
|
The removal algorithm has the same downside as the code that it fixes:
|
||||||
|
it won't collapse needlessly long runs of fillers. We can enhance that
|
||||||
|
in the future if it ever becomes a problem. This commit documents that
|
||||||
|
limitation with a TODO comment in code, a small but meaningful
|
||||||
|
improvement over the prior situation.
|
||||||
|
|
||||||
|
Currently the biggest flaw, which the next commit addresses, is that
|
||||||
|
because this increases the node size on 64-bit machines from 60 bytes to
|
||||||
|
68 bytes. 60 rounds up to 64, but 68 rounds up to 128. So we wind up
|
||||||
|
using twice as much memory per node, because of power-of-two
|
||||||
|
allocations, which is a big bummer. We'll need to figure something out
|
||||||
|
there.
|
||||||
|
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
drivers/net/wireguard/allowedips.c | 132 ++++++++++++-----------------
|
||||||
|
drivers/net/wireguard/allowedips.h | 9 +-
|
||||||
|
2 files changed, 57 insertions(+), 84 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/allowedips.c
|
||||||
|
+++ b/drivers/net/wireguard/allowedips.c
|
||||||
|
@@ -66,60 +66,6 @@ static void root_remove_peer_lists(struc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void walk_remove_by_peer(struct allowedips_node __rcu **top,
|
||||||
|
- struct wg_peer *peer, struct mutex *lock)
|
||||||
|
-{
|
||||||
|
-#define REF(p) rcu_access_pointer(p)
|
||||||
|
-#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
|
||||||
|
-#define PUSH(p) ({ \
|
||||||
|
- WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
|
||||||
|
- stack[len++] = p; \
|
||||||
|
- })
|
||||||
|
-
|
||||||
|
- struct allowedips_node __rcu **stack[128], **nptr;
|
||||||
|
- struct allowedips_node *node, *prev;
|
||||||
|
- unsigned int len;
|
||||||
|
-
|
||||||
|
- if (unlikely(!peer || !REF(*top)))
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
|
||||||
|
- nptr = stack[len - 1];
|
||||||
|
- node = DEREF(nptr);
|
||||||
|
- if (!node) {
|
||||||
|
- --len;
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
- if (!prev || REF(prev->bit[0]) == node ||
|
||||||
|
- REF(prev->bit[1]) == node) {
|
||||||
|
- if (REF(node->bit[0]))
|
||||||
|
- PUSH(&node->bit[0]);
|
||||||
|
- else if (REF(node->bit[1]))
|
||||||
|
- PUSH(&node->bit[1]);
|
||||||
|
- } else if (REF(node->bit[0]) == prev) {
|
||||||
|
- if (REF(node->bit[1]))
|
||||||
|
- PUSH(&node->bit[1]);
|
||||||
|
- } else {
|
||||||
|
- if (rcu_dereference_protected(node->peer,
|
||||||
|
- lockdep_is_held(lock)) == peer) {
|
||||||
|
- RCU_INIT_POINTER(node->peer, NULL);
|
||||||
|
- list_del_init(&node->peer_list);
|
||||||
|
- if (!node->bit[0] || !node->bit[1]) {
|
||||||
|
- rcu_assign_pointer(*nptr, DEREF(
|
||||||
|
- &node->bit[!REF(node->bit[0])]));
|
||||||
|
- kfree_rcu(node, rcu);
|
||||||
|
- node = DEREF(nptr);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- --len;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
-#undef REF
|
||||||
|
-#undef DEREF
|
||||||
|
-#undef PUSH
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static unsigned int fls128(u64 a, u64 b)
|
||||||
|
{
|
||||||
|
return a ? fls64(a) + 64U : fls64(b);
|
||||||
|
@@ -224,6 +170,7 @@ static int add(struct allowedips_node __
|
||||||
|
RCU_INIT_POINTER(node->peer, peer);
|
||||||
|
list_add_tail(&node->peer_list, &peer->allowedips_list);
|
||||||
|
copy_and_assign_cidr(node, key, cidr, bits);
|
||||||
|
+ rcu_assign_pointer(node->parent_bit, trie);
|
||||||
|
rcu_assign_pointer(*trie, node);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -243,9 +190,9 @@ static int add(struct allowedips_node __
|
||||||
|
if (!node) {
|
||||||
|
down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
|
||||||
|
} else {
|
||||||
|
- down = rcu_dereference_protected(CHOOSE_NODE(node, key),
|
||||||
|
- lockdep_is_held(lock));
|
||||||
|
+ down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
|
||||||
|
if (!down) {
|
||||||
|
+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
|
||||||
|
rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -254,29 +201,37 @@ static int add(struct allowedips_node __
|
||||||
|
parent = node;
|
||||||
|
|
||||||
|
if (newnode->cidr == cidr) {
|
||||||
|
+ rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
|
||||||
|
rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
|
||||||
|
- if (!parent)
|
||||||
|
+ if (!parent) {
|
||||||
|
+ rcu_assign_pointer(newnode->parent_bit, trie);
|
||||||
|
rcu_assign_pointer(*trie, newnode);
|
||||||
|
- else
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
|
||||||
|
- newnode);
|
||||||
|
- } else {
|
||||||
|
- node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||||
|
- if (unlikely(!node)) {
|
||||||
|
- list_del(&newnode->peer_list);
|
||||||
|
- kfree(newnode);
|
||||||
|
- return -ENOMEM;
|
||||||
|
+ } else {
|
||||||
|
+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
|
||||||
|
+ rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
|
||||||
|
}
|
||||||
|
- INIT_LIST_HEAD(&node->peer_list);
|
||||||
|
- copy_and_assign_cidr(node, newnode->bits, cidr, bits);
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||||
|
+ if (unlikely(!node)) {
|
||||||
|
+ list_del(&newnode->peer_list);
|
||||||
|
+ kfree(newnode);
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+ }
|
||||||
|
+ INIT_LIST_HEAD(&node->peer_list);
|
||||||
|
+ copy_and_assign_cidr(node, newnode->bits, cidr, bits);
|
||||||
|
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
|
||||||
|
- if (!parent)
|
||||||
|
- rcu_assign_pointer(*trie, node);
|
||||||
|
- else
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
|
||||||
|
- node);
|
||||||
|
+ rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
|
||||||
|
+ rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
|
||||||
|
+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
|
||||||
|
+ rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
|
||||||
|
+ if (!parent) {
|
||||||
|
+ rcu_assign_pointer(node->parent_bit, trie);
|
||||||
|
+ rcu_assign_pointer(*trie, node);
|
||||||
|
+ } else {
|
||||||
|
+ rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
|
||||||
|
+ rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -335,9 +290,30 @@ int wg_allowedips_insert_v6(struct allow
|
||||||
|
void wg_allowedips_remove_by_peer(struct allowedips *table,
|
||||||
|
struct wg_peer *peer, struct mutex *lock)
|
||||||
|
{
|
||||||
|
+ struct allowedips_node *node, *child, *tmp;
|
||||||
|
+
|
||||||
|
+ if (list_empty(&peer->allowedips_list))
|
||||||
|
+ return;
|
||||||
|
++table->seq;
|
||||||
|
- walk_remove_by_peer(&table->root4, peer, lock);
|
||||||
|
- walk_remove_by_peer(&table->root6, peer, lock);
|
||||||
|
+ list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) {
|
||||||
|
+ list_del_init(&node->peer_list);
|
||||||
|
+ RCU_INIT_POINTER(node->peer, NULL);
|
||||||
|
+ if (node->bit[0] && node->bit[1])
|
||||||
|
+ continue;
|
||||||
|
+ child = rcu_dereference_protected(
|
||||||
|
+ node->bit[!rcu_access_pointer(node->bit[0])],
|
||||||
|
+ lockdep_is_held(lock));
|
||||||
|
+ if (child)
|
||||||
|
+ child->parent_bit = node->parent_bit;
|
||||||
|
+ *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
|
||||||
|
+ kfree_rcu(node, rcu);
|
||||||
|
+
|
||||||
|
+ /* TODO: Note that we currently don't walk up and down in order to
|
||||||
|
+ * free any potential filler nodes. This means that this function
|
||||||
|
+ * doesn't free up as much as it could, which could be revisited
|
||||||
|
+ * at some point.
|
||||||
|
+ */
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
|
||||||
|
--- a/drivers/net/wireguard/allowedips.h
|
||||||
|
+++ b/drivers/net/wireguard/allowedips.h
|
||||||
|
@@ -15,14 +15,11 @@ struct wg_peer;
|
||||||
|
struct allowedips_node {
|
||||||
|
struct wg_peer __rcu *peer;
|
||||||
|
struct allowedips_node __rcu *bit[2];
|
||||||
|
- /* While it may seem scandalous that we waste space for v4,
|
||||||
|
- * we're alloc'ing to the nearest power of 2 anyway, so this
|
||||||
|
- * doesn't actually make a difference.
|
||||||
|
- */
|
||||||
|
- u8 bits[16] __aligned(__alignof(u64));
|
||||||
|
u8 cidr, bit_at_a, bit_at_b, bitlen;
|
||||||
|
+ u8 bits[16] __aligned(__alignof(u64));
|
||||||
|
|
||||||
|
- /* Keep rarely used list at bottom to be beyond cache line. */
|
||||||
|
+ /* Keep rarely used members at bottom to be beyond cache line. */
|
||||||
|
+ struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */
|
||||||
|
union {
|
||||||
|
struct list_head peer_list;
|
||||||
|
struct rcu_head rcu;
|
|
@ -0,0 +1,173 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:37 +0200
|
||||||
|
Subject: [PATCH] wireguard: allowedips: allocate nodes in kmem_cache
|
||||||
|
|
||||||
|
commit dc680de28ca849dfe589dc15ac56d22505f0ef11 upstream.
|
||||||
|
|
||||||
|
The previous commit moved from O(n) to O(1) for removal, but in the
|
||||||
|
process introduced an additional pointer member to a struct that
|
||||||
|
increased the size from 60 to 68 bytes, putting nodes in the 128-byte
|
||||||
|
slab. With deployed systems having as many as 2 million nodes, this
|
||||||
|
represents a significant doubling in memory usage (128 MiB -> 256 MiB).
|
||||||
|
Fix this by using our own kmem_cache, that's sized exactly right. This
|
||||||
|
also makes wireguard's memory usage more transparent in tools like
|
||||||
|
slabtop and /proc/slabinfo.
|
||||||
|
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Suggested-by: Arnd Bergmann <arnd@arndb.de>
|
||||||
|
Suggested-by: Matthew Wilcox <willy@infradead.org>
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
drivers/net/wireguard/allowedips.c | 31 ++++++++++++++++++++++++------
|
||||||
|
drivers/net/wireguard/allowedips.h | 5 ++++-
|
||||||
|
drivers/net/wireguard/main.c | 10 +++++++++-
|
||||||
|
3 files changed, 38 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/allowedips.c
|
||||||
|
+++ b/drivers/net/wireguard/allowedips.c
|
||||||
|
@@ -6,6 +6,8 @@
|
||||||
|
#include "allowedips.h"
|
||||||
|
#include "peer.h"
|
||||||
|
|
||||||
|
+static struct kmem_cache *node_cache;
|
||||||
|
+
|
||||||
|
static void swap_endian(u8 *dst, const u8 *src, u8 bits)
|
||||||
|
{
|
||||||
|
if (bits == 32) {
|
||||||
|
@@ -40,6 +42,11 @@ static void push_rcu(struct allowedips_n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void node_free_rcu(struct rcu_head *rcu)
|
||||||
|
+{
|
||||||
|
+ kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void root_free_rcu(struct rcu_head *rcu)
|
||||||
|
{
|
||||||
|
struct allowedips_node *node, *stack[128] = {
|
||||||
|
@@ -49,7 +56,7 @@ static void root_free_rcu(struct rcu_hea
|
||||||
|
while (len > 0 && (node = stack[--len])) {
|
||||||
|
push_rcu(stack, node->bit[0], &len);
|
||||||
|
push_rcu(stack, node->bit[1], &len);
|
||||||
|
- kfree(node);
|
||||||
|
+ kmem_cache_free(node_cache, node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -164,7 +171,7 @@ static int add(struct allowedips_node __
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!rcu_access_pointer(*trie)) {
|
||||||
|
- node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||||
|
+ node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
|
||||||
|
if (unlikely(!node))
|
||||||
|
return -ENOMEM;
|
||||||
|
RCU_INIT_POINTER(node->peer, peer);
|
||||||
|
@@ -180,7 +187,7 @@ static int add(struct allowedips_node __
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
- newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
|
||||||
|
+ newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL);
|
||||||
|
if (unlikely(!newnode))
|
||||||
|
return -ENOMEM;
|
||||||
|
RCU_INIT_POINTER(newnode->peer, peer);
|
||||||
|
@@ -213,10 +220,10 @@ static int add(struct allowedips_node __
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
- node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||||
|
+ node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
|
||||||
|
if (unlikely(!node)) {
|
||||||
|
list_del(&newnode->peer_list);
|
||||||
|
- kfree(newnode);
|
||||||
|
+ kmem_cache_free(node_cache, newnode);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
INIT_LIST_HEAD(&node->peer_list);
|
||||||
|
@@ -306,7 +313,7 @@ void wg_allowedips_remove_by_peer(struct
|
||||||
|
if (child)
|
||||||
|
child->parent_bit = node->parent_bit;
|
||||||
|
*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
|
||||||
|
- kfree_rcu(node, rcu);
|
||||||
|
+ call_rcu(&node->rcu, node_free_rcu);
|
||||||
|
|
||||||
|
/* TODO: Note that we currently don't walk up and down in order to
|
||||||
|
* free any potential filler nodes. This means that this function
|
||||||
|
@@ -350,4 +357,16 @@ struct wg_peer *wg_allowedips_lookup_src
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+int __init wg_allowedips_slab_init(void)
|
||||||
|
+{
|
||||||
|
+ node_cache = KMEM_CACHE(allowedips_node, 0);
|
||||||
|
+ return node_cache ? 0 : -ENOMEM;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void wg_allowedips_slab_uninit(void)
|
||||||
|
+{
|
||||||
|
+ rcu_barrier();
|
||||||
|
+ kmem_cache_destroy(node_cache);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#include "selftest/allowedips.c"
|
||||||
|
--- a/drivers/net/wireguard/allowedips.h
|
||||||
|
+++ b/drivers/net/wireguard/allowedips.h
|
||||||
|
@@ -19,7 +19,7 @@ struct allowedips_node {
|
||||||
|
u8 bits[16] __aligned(__alignof(u64));
|
||||||
|
|
||||||
|
/* Keep rarely used members at bottom to be beyond cache line. */
|
||||||
|
- struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */
|
||||||
|
+ struct allowedips_node *__rcu *parent_bit;
|
||||||
|
union {
|
||||||
|
struct list_head peer_list;
|
||||||
|
struct rcu_head rcu;
|
||||||
|
@@ -53,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src
|
||||||
|
bool wg_allowedips_selftest(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+int wg_allowedips_slab_init(void);
|
||||||
|
+void wg_allowedips_slab_uninit(void);
|
||||||
|
+
|
||||||
|
#endif /* _WG_ALLOWEDIPS_H */
|
||||||
|
--- a/drivers/net/wireguard/main.c
|
||||||
|
+++ b/drivers/net/wireguard/main.c
|
||||||
|
@@ -21,10 +21,15 @@ static int __init mod_init(void)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
+ ret = wg_allowedips_slab_init();
|
||||||
|
+ if (ret < 0)
|
||||||
|
+ goto err_allowedips;
|
||||||
|
+
|
||||||
|
#ifdef DEBUG
|
||||||
|
+ ret = -ENOTRECOVERABLE;
|
||||||
|
if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
|
||||||
|
!wg_ratelimiter_selftest())
|
||||||
|
- return -ENOTRECOVERABLE;
|
||||||
|
+ goto err_peer;
|
||||||
|
#endif
|
||||||
|
wg_noise_init();
|
||||||
|
|
||||||
|
@@ -50,6 +55,8 @@ err_netlink:
|
||||||
|
err_device:
|
||||||
|
wg_peer_uninit();
|
||||||
|
err_peer:
|
||||||
|
+ wg_allowedips_slab_uninit();
|
||||||
|
+err_allowedips:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -58,6 +65,7 @@ static void __exit mod_exit(void)
|
||||||
|
wg_genetlink_uninit();
|
||||||
|
wg_device_uninit();
|
||||||
|
wg_peer_uninit();
|
||||||
|
+ wg_allowedips_slab_uninit();
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(mod_init);
|
|
@ -0,0 +1,521 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||||
|
Date: Fri, 4 Jun 2021 17:17:38 +0200
|
||||||
|
Subject: [PATCH] wireguard: allowedips: free empty intermediate nodes when
|
||||||
|
removing single node
|
||||||
|
|
||||||
|
commit bf7b042dc62a31f66d3a41dd4dfc7806f267b307 upstream.
|
||||||
|
|
||||||
|
When removing single nodes, it's possible that that node's parent is an
|
||||||
|
empty intermediate node, in which case, it too should be removed.
|
||||||
|
Otherwise the trie fills up and never is fully emptied, leading to
|
||||||
|
gradual memory leaks over time for tries that are modified often. There
|
||||||
|
was originally code to do this, but was removed during refactoring in
|
||||||
|
2016 and never reworked. Now that we have proper parent pointers from
|
||||||
|
the previous commits, we can implement this properly.
|
||||||
|
|
||||||
|
In order to reduce branching and expensive comparisons, we want to keep
|
||||||
|
the double pointer for parent assignment (which lets us easily chain up
|
||||||
|
to the root), but we still need to actually get the parent's base
|
||||||
|
address. So encode the bit number into the last two bits of the pointer,
|
||||||
|
and pack and unpack it as needed. This is a little bit clumsy but is the
|
||||||
|
fastest and less memory wasteful of the compromises. Note that we align
|
||||||
|
the root struct here to a minimum of 4, because it's embedded into a
|
||||||
|
larger struct, and we're relying on having the bottom two bits for our
|
||||||
|
flag, which would only be 16-bit aligned on m68k.
|
||||||
|
|
||||||
|
The existing macro-based helpers were a bit unwieldy for adding the bit
|
||||||
|
packing to, so this commit replaces them with safer and clearer ordinary
|
||||||
|
functions.
|
||||||
|
|
||||||
|
We add a test to the randomized/fuzzer part of the selftests, to free
|
||||||
|
the randomized tries by-peer, refuzz it, and repeat, until it's supposed
|
||||||
|
to be empty, and then then see if that actually resulted in the whole
|
||||||
|
thing being emptied. That combined with kmemcheck should hopefully make
|
||||||
|
sure this commit is doing what it should. Along the way this resulted in
|
||||||
|
various other cleanups of the tests and fixes for recent graphviz.
|
||||||
|
|
||||||
|
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||||
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||||
|
---
|
||||||
|
drivers/net/wireguard/allowedips.c | 102 ++++++------
|
||||||
|
drivers/net/wireguard/allowedips.h | 4 +-
|
||||||
|
drivers/net/wireguard/selftest/allowedips.c | 162 ++++++++++----------
|
||||||
|
3 files changed, 137 insertions(+), 131 deletions(-)
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/allowedips.c
|
||||||
|
+++ b/drivers/net/wireguard/allowedips.c
|
||||||
|
@@ -30,8 +30,11 @@ static void copy_and_assign_cidr(struct
|
||||||
|
node->bitlen = bits;
|
||||||
|
memcpy(node->bits, src, bits / 8U);
|
||||||
|
}
|
||||||
|
-#define CHOOSE_NODE(parent, key) \
|
||||||
|
- parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
|
||||||
|
+
|
||||||
|
+static inline u8 choose(struct allowedips_node *node, const u8 *key)
|
||||||
|
+{
|
||||||
|
+ return (key[node->bit_at_a] >> node->bit_at_b) & 1;
|
||||||
|
+}
|
||||||
|
|
||||||
|
static void push_rcu(struct allowedips_node **stack,
|
||||||
|
struct allowedips_node __rcu *p, unsigned int *len)
|
||||||
|
@@ -112,7 +115,7 @@ static struct allowedips_node *find_node
|
||||||
|
found = node;
|
||||||
|
if (node->cidr == bits)
|
||||||
|
break;
|
||||||
|
- node = rcu_dereference_bh(CHOOSE_NODE(node, key));
|
||||||
|
+ node = rcu_dereference_bh(node->bit[choose(node, key)]);
|
||||||
|
}
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
@@ -144,8 +147,7 @@ static bool node_placement(struct allowe
|
||||||
|
u8 cidr, u8 bits, struct allowedips_node **rnode,
|
||||||
|
struct mutex *lock)
|
||||||
|
{
|
||||||
|
- struct allowedips_node *node = rcu_dereference_protected(trie,
|
||||||
|
- lockdep_is_held(lock));
|
||||||
|
+ struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
|
||||||
|
struct allowedips_node *parent = NULL;
|
||||||
|
bool exact = false;
|
||||||
|
|
||||||
|
@@ -155,13 +157,24 @@ static bool node_placement(struct allowe
|
||||||
|
exact = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
- node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
|
||||||
|
- lockdep_is_held(lock));
|
||||||
|
+ node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
|
||||||
|
}
|
||||||
|
*rnode = parent;
|
||||||
|
return exact;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
|
||||||
|
+{
|
||||||
|
+ node->parent_bit_packed = (unsigned long)parent | bit;
|
||||||
|
+ rcu_assign_pointer(*parent, node);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
|
||||||
|
+{
|
||||||
|
+ u8 bit = choose(parent, node->bits);
|
||||||
|
+ connect_node(&parent->bit[bit], bit, node);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
|
||||||
|
u8 cidr, struct wg_peer *peer, struct mutex *lock)
|
||||||
|
{
|
||||||
|
@@ -177,8 +190,7 @@ static int add(struct allowedips_node __
|
||||||
|
RCU_INIT_POINTER(node->peer, peer);
|
||||||
|
list_add_tail(&node->peer_list, &peer->allowedips_list);
|
||||||
|
copy_and_assign_cidr(node, key, cidr, bits);
|
||||||
|
- rcu_assign_pointer(node->parent_bit, trie);
|
||||||
|
- rcu_assign_pointer(*trie, node);
|
||||||
|
+ connect_node(trie, 2, node);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (node_placement(*trie, key, cidr, bits, &node, lock)) {
|
||||||
|
@@ -197,10 +209,10 @@ static int add(struct allowedips_node __
|
||||||
|
if (!node) {
|
||||||
|
down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
|
||||||
|
} else {
|
||||||
|
- down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
|
||||||
|
+ const u8 bit = choose(node, key);
|
||||||
|
+ down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
|
||||||
|
if (!down) {
|
||||||
|
- rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
|
||||||
|
+ connect_node(&node->bit[bit], bit, newnode);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -208,15 +220,11 @@ static int add(struct allowedips_node __
|
||||||
|
parent = node;
|
||||||
|
|
||||||
|
if (newnode->cidr == cidr) {
|
||||||
|
- rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
|
||||||
|
- if (!parent) {
|
||||||
|
- rcu_assign_pointer(newnode->parent_bit, trie);
|
||||||
|
- rcu_assign_pointer(*trie, newnode);
|
||||||
|
- } else {
|
||||||
|
- rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
|
||||||
|
- }
|
||||||
|
+ choose_and_connect_node(newnode, down);
|
||||||
|
+ if (!parent)
|
||||||
|
+ connect_node(trie, 2, newnode);
|
||||||
|
+ else
|
||||||
|
+ choose_and_connect_node(parent, newnode);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -229,17 +237,12 @@ static int add(struct allowedips_node __
|
||||||
|
INIT_LIST_HEAD(&node->peer_list);
|
||||||
|
copy_and_assign_cidr(node, newnode->bits, cidr, bits);
|
||||||
|
|
||||||
|
- rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
|
||||||
|
- rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
|
||||||
|
- if (!parent) {
|
||||||
|
- rcu_assign_pointer(node->parent_bit, trie);
|
||||||
|
- rcu_assign_pointer(*trie, node);
|
||||||
|
- } else {
|
||||||
|
- rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
|
||||||
|
- rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
|
||||||
|
- }
|
||||||
|
+ choose_and_connect_node(node, down);
|
||||||
|
+ choose_and_connect_node(node, newnode);
|
||||||
|
+ if (!parent)
|
||||||
|
+ connect_node(trie, 2, node);
|
||||||
|
+ else
|
||||||
|
+ choose_and_connect_node(parent, node);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -297,7 +300,8 @@ int wg_allowedips_insert_v6(struct allow
|
||||||
|
void wg_allowedips_remove_by_peer(struct allowedips *table,
|
||||||
|
struct wg_peer *peer, struct mutex *lock)
|
||||||
|
{
|
||||||
|
- struct allowedips_node *node, *child, *tmp;
|
||||||
|
+ struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
|
||||||
|
+ bool free_parent;
|
||||||
|
|
||||||
|
if (list_empty(&peer->allowedips_list))
|
||||||
|
return;
|
||||||
|
@@ -307,19 +311,29 @@ void wg_allowedips_remove_by_peer(struct
|
||||||
|
RCU_INIT_POINTER(node->peer, NULL);
|
||||||
|
if (node->bit[0] && node->bit[1])
|
||||||
|
continue;
|
||||||
|
- child = rcu_dereference_protected(
|
||||||
|
- node->bit[!rcu_access_pointer(node->bit[0])],
|
||||||
|
- lockdep_is_held(lock));
|
||||||
|
+ child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
|
||||||
|
+ lockdep_is_held(lock));
|
||||||
|
if (child)
|
||||||
|
- child->parent_bit = node->parent_bit;
|
||||||
|
- *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
|
||||||
|
+ child->parent_bit_packed = node->parent_bit_packed;
|
||||||
|
+ parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
|
||||||
|
+ *parent_bit = child;
|
||||||
|
+ parent = (void *)parent_bit -
|
||||||
|
+ offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
|
||||||
|
+ free_parent = !rcu_access_pointer(node->bit[0]) &&
|
||||||
|
+ !rcu_access_pointer(node->bit[1]) &&
|
||||||
|
+ (node->parent_bit_packed & 3) <= 1 &&
|
||||||
|
+ !rcu_access_pointer(parent->peer);
|
||||||
|
+ if (free_parent)
|
||||||
|
+ child = rcu_dereference_protected(
|
||||||
|
+ parent->bit[!(node->parent_bit_packed & 1)],
|
||||||
|
+ lockdep_is_held(lock));
|
||||||
|
call_rcu(&node->rcu, node_free_rcu);
|
||||||
|
-
|
||||||
|
- /* TODO: Note that we currently don't walk up and down in order to
|
||||||
|
- * free any potential filler nodes. This means that this function
|
||||||
|
- * doesn't free up as much as it could, which could be revisited
|
||||||
|
- * at some point.
|
||||||
|
- */
|
||||||
|
+ if (!free_parent)
|
||||||
|
+ continue;
|
||||||
|
+ if (child)
|
||||||
|
+ child->parent_bit_packed = parent->parent_bit_packed;
|
||||||
|
+ *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
|
||||||
|
+ call_rcu(&parent->rcu, node_free_rcu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
--- a/drivers/net/wireguard/allowedips.h
|
||||||
|
+++ b/drivers/net/wireguard/allowedips.h
|
||||||
|
@@ -19,7 +19,7 @@ struct allowedips_node {
|
||||||
|
u8 bits[16] __aligned(__alignof(u64));
|
||||||
|
|
||||||
|
/* Keep rarely used members at bottom to be beyond cache line. */
|
||||||
|
- struct allowedips_node *__rcu *parent_bit;
|
||||||
|
+ unsigned long parent_bit_packed;
|
||||||
|
union {
|
||||||
|
struct list_head peer_list;
|
||||||
|
struct rcu_head rcu;
|
||||||
|
@@ -30,7 +30,7 @@ struct allowedips {
|
||||||
|
struct allowedips_node __rcu *root4;
|
||||||
|
struct allowedips_node __rcu *root6;
|
||||||
|
u64 seq;
|
||||||
|
-};
|
||||||
|
+} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */
|
||||||
|
|
||||||
|
void wg_allowedips_init(struct allowedips *table);
|
||||||
|
void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
|
||||||
|
--- a/drivers/net/wireguard/selftest/allowedips.c
|
||||||
|
+++ b/drivers/net/wireguard/selftest/allowedips.c
|
||||||
|
@@ -19,32 +19,22 @@
|
||||||
|
|
||||||
|
#include <linux/siphash.h>
|
||||||
|
|
||||||
|
-static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
|
||||||
|
- u8 cidr)
|
||||||
|
-{
|
||||||
|
- swap_endian(dst, src, bits);
|
||||||
|
- memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
|
||||||
|
- if (cidr)
|
||||||
|
- dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static __init void print_node(struct allowedips_node *node, u8 bits)
|
||||||
|
{
|
||||||
|
char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
|
||||||
|
- char *fmt_declaration = KERN_DEBUG
|
||||||
|
- "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||||
|
+ char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||||
|
+ u8 ip1[16], ip2[16], cidr1, cidr2;
|
||||||
|
char *style = "dotted";
|
||||||
|
- u8 ip1[16], ip2[16];
|
||||||
|
u32 color = 0;
|
||||||
|
|
||||||
|
+ if (node == NULL)
|
||||||
|
+ return;
|
||||||
|
if (bits == 32) {
|
||||||
|
fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
|
||||||
|
- fmt_declaration = KERN_DEBUG
|
||||||
|
- "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||||
|
+ fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||||
|
} else if (bits == 128) {
|
||||||
|
fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
|
||||||
|
- fmt_declaration = KERN_DEBUG
|
||||||
|
- "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||||
|
+ fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||||
|
}
|
||||||
|
if (node->peer) {
|
||||||
|
hsiphash_key_t key = { { 0 } };
|
||||||
|
@@ -55,24 +45,20 @@ static __init void print_node(struct all
|
||||||
|
hsiphash_1u32(0xabad1dea, &key) % 200;
|
||||||
|
style = "bold";
|
||||||
|
}
|
||||||
|
- swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
|
||||||
|
- printk(fmt_declaration, ip1, node->cidr, style, color);
|
||||||
|
+ wg_allowedips_read_node(node, ip1, &cidr1);
|
||||||
|
+ printk(fmt_declaration, ip1, cidr1, style, color);
|
||||||
|
if (node->bit[0]) {
|
||||||
|
- swap_endian_and_apply_cidr(ip2,
|
||||||
|
- rcu_dereference_raw(node->bit[0])->bits, bits,
|
||||||
|
- node->cidr);
|
||||||
|
- printk(fmt_connection, ip1, node->cidr, ip2,
|
||||||
|
- rcu_dereference_raw(node->bit[0])->cidr);
|
||||||
|
- print_node(rcu_dereference_raw(node->bit[0]), bits);
|
||||||
|
+ wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2);
|
||||||
|
+ printk(fmt_connection, ip1, cidr1, ip2, cidr2);
|
||||||
|
}
|
||||||
|
if (node->bit[1]) {
|
||||||
|
- swap_endian_and_apply_cidr(ip2,
|
||||||
|
- rcu_dereference_raw(node->bit[1])->bits,
|
||||||
|
- bits, node->cidr);
|
||||||
|
- printk(fmt_connection, ip1, node->cidr, ip2,
|
||||||
|
- rcu_dereference_raw(node->bit[1])->cidr);
|
||||||
|
- print_node(rcu_dereference_raw(node->bit[1]), bits);
|
||||||
|
+ wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2);
|
||||||
|
+ printk(fmt_connection, ip1, cidr1, ip2, cidr2);
|
||||||
|
}
|
||||||
|
+ if (node->bit[0])
|
||||||
|
+ print_node(rcu_dereference_raw(node->bit[0]), bits);
|
||||||
|
+ if (node->bit[1])
|
||||||
|
+ print_node(rcu_dereference_raw(node->bit[1]), bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
|
||||||
|
@@ -121,8 +107,8 @@ static __init inline union nf_inet_addr
|
||||||
|
{
|
||||||
|
union nf_inet_addr mask;
|
||||||
|
|
||||||
|
- memset(&mask, 0x00, 128 / 8);
|
||||||
|
- memset(&mask, 0xff, cidr / 8);
|
||||||
|
+ memset(&mask, 0, sizeof(mask));
|
||||||
|
+ memset(&mask.all, 0xff, cidr / 8);
|
||||||
|
if (cidr % 32)
|
||||||
|
mask.all[cidr / 32] = (__force u32)htonl(
|
||||||
|
(0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
|
||||||
|
@@ -149,42 +135,36 @@ horrible_mask_self(struct horrible_allow
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init inline bool
|
||||||
|
-horrible_match_v4(const struct horrible_allowedips_node *node,
|
||||||
|
- struct in_addr *ip)
|
||||||
|
+horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip)
|
||||||
|
{
|
||||||
|
return (ip->s_addr & node->mask.ip) == node->ip.ip;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init inline bool
|
||||||
|
-horrible_match_v6(const struct horrible_allowedips_node *node,
|
||||||
|
- struct in6_addr *ip)
|
||||||
|
+horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip)
|
||||||
|
{
|
||||||
|
- return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
|
||||||
|
- node->ip.ip6[0] &&
|
||||||
|
- (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
|
||||||
|
- node->ip.ip6[1] &&
|
||||||
|
- (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
|
||||||
|
- node->ip.ip6[2] &&
|
||||||
|
+ return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] &&
|
||||||
|
+ (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] &&
|
||||||
|
+ (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] &&
|
||||||
|
(ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init void
|
||||||
|
-horrible_insert_ordered(struct horrible_allowedips *table,
|
||||||
|
- struct horrible_allowedips_node *node)
|
||||||
|
+horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node)
|
||||||
|
{
|
||||||
|
struct horrible_allowedips_node *other = NULL, *where = NULL;
|
||||||
|
u8 my_cidr = horrible_mask_to_cidr(node->mask);
|
||||||
|
|
||||||
|
hlist_for_each_entry(other, &table->head, table) {
|
||||||
|
- if (!memcmp(&other->mask, &node->mask,
|
||||||
|
- sizeof(union nf_inet_addr)) &&
|
||||||
|
- !memcmp(&other->ip, &node->ip,
|
||||||
|
- sizeof(union nf_inet_addr)) &&
|
||||||
|
- other->ip_version == node->ip_version) {
|
||||||
|
+ if (other->ip_version == node->ip_version &&
|
||||||
|
+ !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) &&
|
||||||
|
+ !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) {
|
||||||
|
other->value = node->value;
|
||||||
|
kfree(node);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
+ }
|
||||||
|
+ hlist_for_each_entry(other, &table->head, table) {
|
||||||
|
where = other;
|
||||||
|
if (horrible_mask_to_cidr(other->mask) <= my_cidr)
|
||||||
|
break;
|
||||||
|
@@ -201,8 +181,7 @@ static __init int
|
||||||
|
horrible_allowedips_insert_v4(struct horrible_allowedips *table,
|
||||||
|
struct in_addr *ip, u8 cidr, void *value)
|
||||||
|
{
|
||||||
|
- struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
|
||||||
|
- GFP_KERNEL);
|
||||||
|
+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||||
|
|
||||||
|
if (unlikely(!node))
|
||||||
|
return -ENOMEM;
|
||||||
|
@@ -219,8 +198,7 @@ static __init int
|
||||||
|
horrible_allowedips_insert_v6(struct horrible_allowedips *table,
|
||||||
|
struct in6_addr *ip, u8 cidr, void *value)
|
||||||
|
{
|
||||||
|
- struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
|
||||||
|
- GFP_KERNEL);
|
||||||
|
+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||||
|
|
||||||
|
if (unlikely(!node))
|
||||||
|
return -ENOMEM;
|
||||||
|
@@ -234,39 +212,43 @@ horrible_allowedips_insert_v6(struct hor
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init void *
|
||||||
|
-horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
|
||||||
|
- struct in_addr *ip)
|
||||||
|
+horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip)
|
||||||
|
{
|
||||||
|
struct horrible_allowedips_node *node;
|
||||||
|
- void *ret = NULL;
|
||||||
|
|
||||||
|
hlist_for_each_entry(node, &table->head, table) {
|
||||||
|
- if (node->ip_version != 4)
|
||||||
|
- continue;
|
||||||
|
- if (horrible_match_v4(node, ip)) {
|
||||||
|
- ret = node->value;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
+ if (node->ip_version == 4 && horrible_match_v4(node, ip))
|
||||||
|
+ return node->value;
|
||||||
|
}
|
||||||
|
- return ret;
|
||||||
|
+ return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init void *
|
||||||
|
-horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
|
||||||
|
- struct in6_addr *ip)
|
||||||
|
+horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip)
|
||||||
|
{
|
||||||
|
struct horrible_allowedips_node *node;
|
||||||
|
- void *ret = NULL;
|
||||||
|
|
||||||
|
hlist_for_each_entry(node, &table->head, table) {
|
||||||
|
- if (node->ip_version != 6)
|
||||||
|
+ if (node->ip_version == 6 && horrible_match_v6(node, ip))
|
||||||
|
+ return node->value;
|
||||||
|
+ }
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+static __init void
|
||||||
|
+horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value)
|
||||||
|
+{
|
||||||
|
+ struct horrible_allowedips_node *node;
|
||||||
|
+ struct hlist_node *h;
|
||||||
|
+
|
||||||
|
+ hlist_for_each_entry_safe(node, h, &table->head, table) {
|
||||||
|
+ if (node->value != value)
|
||||||
|
continue;
|
||||||
|
- if (horrible_match_v6(node, ip)) {
|
||||||
|
- ret = node->value;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
+ hlist_del(&node->table);
|
||||||
|
+ kfree(node);
|
||||||
|
}
|
||||||
|
- return ret;
|
||||||
|
+
|
||||||
|
}
|
||||||
|
|
||||||
|
static __init bool randomized_test(void)
|
||||||
|
@@ -397,23 +379,33 @@ static __init bool randomized_test(void)
|
||||||
|
print_tree(t.root6, 128);
|
||||||
|
}
|
||||||
|
|
||||||
|
- for (i = 0; i < NUM_QUERIES; ++i) {
|
||||||
|
- prandom_bytes(ip, 4);
|
||||||
|
- if (lookup(t.root4, 32, ip) !=
|
||||||
|
- horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
|
||||||
|
- pr_err("allowedips random self-test: FAIL\n");
|
||||||
|
- goto free;
|
||||||
|
+ for (j = 0;; ++j) {
|
||||||
|
+ for (i = 0; i < NUM_QUERIES; ++i) {
|
||||||
|
+ prandom_bytes(ip, 4);
|
||||||
|
+ if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
|
||||||
|
+ horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip);
|
||||||
|
+ pr_err("allowedips random v4 self-test: FAIL\n");
|
||||||
|
+ goto free;
|
||||||
|
+ }
|
||||||
|
+ prandom_bytes(ip, 16);
|
||||||
|
+ if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
|
||||||
|
+ pr_err("allowedips random v6 self-test: FAIL\n");
|
||||||
|
+ goto free;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
+ if (j >= NUM_PEERS)
|
||||||
|
+ break;
|
||||||
|
+ mutex_lock(&mutex);
|
||||||
|
+ wg_allowedips_remove_by_peer(&t, peers[j], &mutex);
|
||||||
|
+ mutex_unlock(&mutex);
|
||||||
|
+ horrible_allowedips_remove_by_value(&h, peers[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
- for (i = 0; i < NUM_QUERIES; ++i) {
|
||||||
|
- prandom_bytes(ip, 16);
|
||||||
|
- if (lookup(t.root6, 128, ip) !=
|
||||||
|
- horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
|
||||||
|
- pr_err("allowedips random self-test: FAIL\n");
|
||||||
|
- goto free;
|
||||||
|
- }
|
||||||
|
+ if (t.root4 || t.root6) {
|
||||||
|
+ pr_err("allowedips random self-test removal: FAIL\n");
|
||||||
|
+ goto free;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
ret = true;
|
||||||
|
|
||||||
|
free:
|
Loading…
Reference in a new issue