tailscale/derp/xdp/headers/bpf_helpers.h

/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __BPF_HELPERS__
#define __BPF_HELPERS__

/*
 * Note that bpf programs need to include either
 * vmlinux.h (auto-generated from BTF) or linux/types.h
 * in advance since bpf_helper_defs.h uses such types
 * as __u64.
 */
#include "bpf_helper_defs.h"

#define __uint(name, val) int (*name)[val]
#define __type(name, val) typeof(val) *name
#define __array(name, val) typeof(val) *name[]
#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name

/*
 * Helper macro to place programs, maps, license in
 * different sections in elf_bpf file. Section names
 * are interpreted by libbpf depending on the context (BPF programs, BPF maps,
 * extern variables, etc).
 * To allow use of SEC() with externs (e.g., for extern .maps declarations),
 * make sure __attribute__((unused)) doesn't trigger compilation warning.
 */
#if __GNUC__ && !__clang__

/*
 * Pragma macros are broken on GCC
 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578
 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400
 */
#define SEC(name) __attribute__((section(name), used))

#else

#define SEC(name) \
	_Pragma("GCC diagnostic push")					    \
	_Pragma("GCC diagnostic ignored \"-Wignored-attributes\"")	    \
	__attribute__((section(name), used))				    \
	_Pragma("GCC diagnostic pop")					    \

#endif

/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
#undef __always_inline
#define __always_inline inline __attribute__((always_inline))

#ifndef __noinline
#define __noinline __attribute__((noinline))
#endif
#ifndef __weak
#define __weak __attribute__((weak))
#endif

/*
 * Use __hidden attribute to mark a non-static BPF subprogram effectively
 * static for BPF verifier's verification algorithm purposes, allowing more
 * extensive and permissive BPF verification process, taking into account
 * subprogram's caller context.
 */
#define __hidden __attribute__((visibility("hidden")))

/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
 * any system-level headers (such as stddef.h, linux/version.h, etc), and
 * commonly-used macros like NULL and KERNEL_VERSION aren't available through
 * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define
 * them on their own. So as a convenience, provide such definitions here.
 */
#ifndef NULL
#define NULL ((void *)0)
#endif

#ifndef KERNEL_VERSION
#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
#endif

/*
 * Helper macros to manipulate data structures
 */

/* offsetof() definition that uses __builtin_offset() might not preserve field
 * offset CO-RE relocation properly, so force-redefine offsetof() using
 * old-school approach which works with CO-RE correctly
 */
#undef offsetof
#define offsetof(type, member)	((unsigned long)&((type *)0)->member)

/* redefined container_of() to ensure we use the above offsetof() macro */
#undef container_of
#define container_of(ptr, type, member)				\
	({							\
		void *__mptr = (void *)(ptr);			\
		((type *)(__mptr - offsetof(type, member)));	\
	})

/*
 * Compiler (optimization) barrier.
 */
#ifndef barrier
#define barrier() asm volatile("" ::: "memory")
#endif

/* Variable-specific compiler (optimization) barrier. It's a no-op which makes
 * compiler believe that there is some black box modification of a given
 * variable and thus prevents compiler from making extra assumption about its
 * value and potential simplifications and optimizations on this variable.
 *
 * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of
 * a variable, making some code patterns unverifiable. Putting barrier_var()
 * in place will ensure that cast is performed before the barrier_var()
 * invocation, because compiler has to pessimistically assume that embedded
 * asm section might perform some extra operations on that variable.
 *
 * This is a variable-specific variant of more global barrier().
 */
#ifndef barrier_var
#define barrier_var(var) asm volatile("" : "+r"(var))
#endif

/*
 * Helper macro to throw a compilation error if __bpf_unreachable() gets
 * built into the resulting code. This works given BPF back end does not
 * implement __builtin_trap(). This is useful to assert that certain paths
 * of the program code are never used and hence eliminated by the compiler.
 *
 * For example, consider a switch statement that covers known cases used by
 * the program. __bpf_unreachable() can then reside in the default case. If
 * the program gets extended such that a case is not covered in the switch
 * statement, then it will throw a build error due to the default case not
 * being compiled out.
 */
#ifndef __bpf_unreachable
# define __bpf_unreachable()	__builtin_trap()
#endif

/*
 * Helper function to perform a tail call with a constant/immediate map slot.
 */
#if __clang_major__ >= 8 && defined(__bpf__)
static __always_inline void
bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
{
	if (!__builtin_constant_p(slot))
		__bpf_unreachable();

	/*
	 * Provide a hard guarantee that LLVM won't optimize setting r2 (map
	 * pointer) and r3 (constant map index) from _different paths_ ending
	 * up at the _same_ call insn as otherwise we won't be able to use the
	 * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
	 * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
	 * tracking for prog array pokes") for details on verifier tracking.
	 *
	 * Note on clobber list: we need to stay in-line with BPF calling
	 * convention, so even if we don't end up using r0, r4, r5, we need
	 * to mark them as clobber so that LLVM doesn't end up using them
	 * before / after the call.
	 */
	asm volatile("r1 = %[ctx]\n\t"
		     "r2 = %[map]\n\t"
		     "r3 = %[slot]\n\t"
		     "call 12"
		     :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
		     : "r0", "r1", "r2", "r3", "r4", "r5");
}
#endif

enum libbpf_pin_type {
	LIBBPF_PIN_NONE,
	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
	LIBBPF_PIN_BY_NAME,
};

enum libbpf_tristate {
	TRI_NO = 0,
	TRI_YES = 1,
	TRI_MODULE = 2,
};

#define __kconfig __attribute__((section(".kconfig")))
#define __ksym __attribute__((section(".ksyms")))
#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
#define __kptr __attribute__((btf_type_tag("kptr")))
#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr")))

#define bpf_ksym_exists(sym) ({									\
	_Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak");	\
	!!sym;											\
})

#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx")))
#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
#define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))
#define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))
#define __arg_arena __attribute((btf_decl_tag("arg:arena")))

#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
#endif
#ifndef ___bpf_apply
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
#endif
#ifndef ___bpf_nth
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
#endif
#ifndef ___bpf_narg
#define ___bpf_narg(...) \
	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#endif

#define ___bpf_fill0(arr, p, x) do {} while (0)
#define ___bpf_fill1(arr, p, x) arr[p] = x
#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
#define ___bpf_fill(arr, args...) \
	___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)

/*
 * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
 * in a structure.
 */
#define BPF_SEQ_PRINTF(seq, fmt, args...)			\
({								\
	static const char ___fmt[] = fmt;			\
	unsigned long long ___param[___bpf_narg(args)];		\
								\
	_Pragma("GCC diagnostic push")				\
	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
	___bpf_fill(___param, args);				\
	_Pragma("GCC diagnostic pop")				\
								\
	bpf_seq_printf(seq, ___fmt, sizeof(___fmt),		\
		       ___param, sizeof(___param));		\
})

/*
 * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
 * an array of u64.
 */
#define BPF_SNPRINTF(out, out_size, fmt, args...)		\
({								\
	static const char ___fmt[] = fmt;			\
	unsigned long long ___param[___bpf_narg(args)];		\
								\
	_Pragma("GCC diagnostic push")				\
	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
	___bpf_fill(___param, args);				\
	_Pragma("GCC diagnostic pop")				\
								\
	bpf_snprintf(out, out_size, ___fmt,			\
		     ___param, sizeof(___param));		\
})

#ifdef BPF_NO_GLOBAL_DATA
#define BPF_PRINTK_FMT_MOD
#else
#define BPF_PRINTK_FMT_MOD static const
#endif

#define __bpf_printk(fmt, ...)				\
({							\
	BPF_PRINTK_FMT_MOD char ____fmt[] = fmt;	\
	bpf_trace_printk(____fmt, sizeof(____fmt),	\
			 ##__VA_ARGS__);		\
})

/*
 * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments
 * instead of an array of u64.
 */
#define __bpf_vprintk(fmt, args...)				\
({								\
	static const char ___fmt[] = fmt;			\
	unsigned long long ___param[___bpf_narg(args)];		\
								\
	_Pragma("GCC diagnostic push")				\
	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
	___bpf_fill(___param, args);				\
	_Pragma("GCC diagnostic pop")				\
								\
	bpf_trace_vprintk(___fmt, sizeof(___fmt),		\
			  ___param, sizeof(___param));		\
})

/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
 * Otherwise use __bpf_vprintk
 */
#define ___bpf_pick_printk(...) \
	___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk,	\
		   __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk,		\
		   __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\
		   __bpf_printk /*1*/, __bpf_printk /*0*/)

/* Helper macro to print out debug messages */
#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args)

struct bpf_iter_num;

extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak __ksym;
extern int *bpf_iter_num_next(struct bpf_iter_num *it) __weak __ksym;
extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym;

#ifndef bpf_for_each
/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for
 * using BPF open-coded iterators without having to write mundane explicit
 * low-level loop logic. Instead, it provides for()-like generic construct
 * that can be used pretty naturally. E.g., for some hypothetical cgroup
 * iterator, you'd write:
 *
 * struct cgroup *cg, *parent_cg = <...>;
 *
 * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) {
 *     bpf_printk("Child cgroup id = %d", cg->cgroup_id);
 *     if (cg->cgroup_id == 123)
 *         break;
 * }
 *
 * I.e., it looks almost like high-level for each loop in other languages,
 * supports continue/break, and is verifiable by BPF verifier.
 *
 * For iterating integers, the difference betwen bpf_for_each(num, i, N, M)
 * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to
 * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int
 * *`, not just `int`. So for integers bpf_for() is more convenient.
 *
 * Note: this macro relies on C99 feature of allowing to declare variables
 * inside for() loop, bound to for() loop lifetime. It also utilizes GCC
 * extension: __attribute__((cleanup(<func>))), supported by both GCC and
 * Clang.
 */
#define bpf_for_each(type, cur, args...) for (							\
	/* initialize and define destructor */							\
	struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */,	\
						    cleanup(bpf_iter_##type##_destroy))),	\
	/* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */		\
			       *___p __attribute__((unused)) = (				\
					bpf_iter_##type##_new(&___it, ##args),			\
	/* this is a workaround for Clang bug: it currently doesn't emit BTF */			\
	/* for bpf_iter_##type##_destroy() when used from cleanup() attribute */		\
					(void)bpf_iter_##type##_destroy, (void *)0);		\
	/* iteration and termination check */							\
	(((cur) = bpf_iter_##type##_next(&___it)));						\
)
#endif /* bpf_for_each */

#ifndef bpf_for
/* bpf_for(i, start, end) implements a for()-like looping construct that sets
 * provided integer variable *i* to values starting from *start* through,
 * but not including, *end*. It also proves to BPF verifier that *i* belongs
 * to range [start, end), so this can be used for accessing arrays without
 * extra checks.
 *
 * Note: *start* and *end* are assumed to be expressions with no side effects
 * and whose values do not change throughout bpf_for() loop execution. They do
 * not have to be statically known or constant, though.
 *
 * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
 * loop bound variables and cleanup attribute, supported by GCC and Clang.
 */
#define bpf_for(i, start, end) for (								\
	/* initialize and define destructor */							\
	struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */	\
						 cleanup(bpf_iter_num_destroy))),		\
	/* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */		\
			    *___p __attribute__((unused)) = (					\
				bpf_iter_num_new(&___it, (start), (end)),			\
	/* this is a workaround for Clang bug: it currently doesn't emit BTF */			\
	/* for bpf_iter_num_destroy() when used from cleanup() attribute */			\
				(void)bpf_iter_num_destroy, (void *)0);				\
	({											\
		/* iteration step */								\
		int *___t = bpf_iter_num_next(&___it);						\
		/* termination and bounds check */						\
		(___t && ((i) = *___t, (i) >= (start) && (i) < (end)));				\
	});											\
)
#endif /* bpf_for */

#ifndef bpf_repeat
/* bpf_repeat(N) performs N iterations without exposing iteration number
 *
 * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
 * loop bound variables and cleanup attribute, supported by GCC and Clang.
 */
#define bpf_repeat(N) for (									\
	/* initialize and define destructor */							\
	struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */	\
						 cleanup(bpf_iter_num_destroy))),		\
	/* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */		\
			    *___p __attribute__((unused)) = (					\
				bpf_iter_num_new(&___it, 0, (N)),				\
	/* this is a workaround for Clang bug: it currently doesn't emit BTF */			\
	/* for bpf_iter_num_destroy() when used from cleanup() attribute */			\
				(void)bpf_iter_num_destroy, (void *)0);				\
	bpf_iter_num_next(&___it);								\
	/* nothing here  */									\
)
#endif /* bpf_repeat */

#endif
derp/xdp,cmd/xdpderper: initial skeleton (#12390) This commit introduces a userspace program for managing an experimental eBPF XDP STUN server program. derp/xdp contains the eBPF pseudo-C along with a Go pkg for loading it and exporting its metrics. cmd/xdpderper is a package main user of derp/xdp. Updates tailscale/corp#20689 Signed-off-by: Jordan Whited <jordan@tailscale.com> 5 months ago			`/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */`
			`#ifndef __BPF_HELPERS__`
			`#define __BPF_HELPERS__`

			`/*`
			`* Note that bpf programs need to include either`
			`* vmlinux.h (auto-generated from BTF) or linux/types.h`
			`* in advance since bpf_helper_defs.h uses such types`
			`* as __u64.`
			`*/`
			`#include "bpf_helper_defs.h"`

			`#define __uint(name, val) int (*name)[val]`
			`#define __type(name, val) typeof(val) *name`
			`#define __array(name, val) typeof(val) *name[]`
			`#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name`

			`/*`
			`* Helper macro to place programs, maps, license in`
			`* different sections in elf_bpf file. Section names`
			`* are interpreted by libbpf depending on the context (BPF programs, BPF maps,`
			`* extern variables, etc).`
			`* To allow use of SEC() with externs (e.g., for extern .maps declarations),`
			`* make sure __attribute__((unused)) doesn't trigger compilation warning.`
			`*/`
			`#if __GNUC__ && !__clang__`

			`/*`
			`* Pragma macros are broken on GCC`
			`* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578`
			`* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400`
			`*/`
			`#define SEC(name) __attribute__((section(name), used))`

			`#else`

			`#define SEC(name) \`
			`_Pragma("GCC diagnostic push") \`
			`_Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \`
			`__attribute__((section(name), used)) \`
			`_Pragma("GCC diagnostic pop") \`

			`#endif`

			`/* Avoid 'linux/stddef.h' definition of '__always_inline'. */`
			`#undef __always_inline`
			`#define __always_inline inline __attribute__((always_inline))`

			`#ifndef __noinline`
			`#define __noinline __attribute__((noinline))`
			`#endif`
			`#ifndef __weak`
			`#define __weak __attribute__((weak))`
			`#endif`

			`/*`
			`* Use __hidden attribute to mark a non-static BPF subprogram effectively`
			`* static for BPF verifier's verification algorithm purposes, allowing more`
			`* extensive and permissive BPF verification process, taking into account`
			`* subprogram's caller context.`
			`*/`
			`#define __hidden __attribute__((visibility("hidden")))`

			`/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include`
			`* any system-level headers (such as stddef.h, linux/version.h, etc), and`
			`* commonly-used macros like NULL and KERNEL_VERSION aren't available through`
			`* vmlinux.h. This just adds unnecessary hurdles and forces users to re-define`
			`* them on their own. So as a convenience, provide such definitions here.`
			`*/`
			`#ifndef NULL`
			`#define NULL ((void *)0)`
			`#endif`

			`#ifndef KERNEL_VERSION`
			`#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))`
			`#endif`

			`/*`
			`* Helper macros to manipulate data structures`
			`*/`

			`/* offsetof() definition that uses __builtin_offset() might not preserve field`
			`* offset CO-RE relocation properly, so force-redefine offsetof() using`
			`* old-school approach which works with CO-RE correctly`
			`*/`
			`#undef offsetof`
			`#define offsetof(type, member) ((unsigned long)&((type *)0)->member)`

			`/* redefined container_of() to ensure we use the above offsetof() macro */`
			`#undef container_of`
			`#define container_of(ptr, type, member) \`
			`({ \`
			`void __mptr = (void )(ptr); \`
			`((type *)(__mptr - offsetof(type, member))); \`
			`})`

			`/*`
			`* Compiler (optimization) barrier.`
			`*/`
			`#ifndef barrier`
			`#define barrier() asm volatile("" ::: "memory")`
			`#endif`

			`/* Variable-specific compiler (optimization) barrier. It's a no-op which makes`
			`* compiler believe that there is some black box modification of a given`
			`* variable and thus prevents compiler from making extra assumption about its`
			`* value and potential simplifications and optimizations on this variable.`
			`*`
			`* E.g., compiler might often delay or even omit 32-bit to 64-bit casting of`
			`* a variable, making some code patterns unverifiable. Putting barrier_var()`
			`* in place will ensure that cast is performed before the barrier_var()`
			`* invocation, because compiler has to pessimistically assume that embedded`
			`* asm section might perform some extra operations on that variable.`
			`*`
			`* This is a variable-specific variant of more global barrier().`
			`*/`
			`#ifndef barrier_var`
			`#define barrier_var(var) asm volatile("" : "+r"(var))`
			`#endif`

			`/*`
			`* Helper macro to throw a compilation error if __bpf_unreachable() gets`
			`* built into the resulting code. This works given BPF back end does not`
			`* implement __builtin_trap(). This is useful to assert that certain paths`
			`* of the program code are never used and hence eliminated by the compiler.`
			`*`
			`* For example, consider a switch statement that covers known cases used by`
			`* the program. __bpf_unreachable() can then reside in the default case. If`
			`* the program gets extended such that a case is not covered in the switch`
			`* statement, then it will throw a build error due to the default case not`
			`* being compiled out.`
			`*/`
			`#ifndef __bpf_unreachable`
			`# define __bpf_unreachable() __builtin_trap()`
			`#endif`

			`/*`
			`* Helper function to perform a tail call with a constant/immediate map slot.`
			`*/`
			`#if __clang_major__ >= 8 && defined(__bpf__)`
			`static __always_inline void`
			`bpf_tail_call_static(void ctx, const void map, const __u32 slot)`
			`{`
			`if (!__builtin_constant_p(slot))`
			`__bpf_unreachable();`

			`/*`
			`* Provide a hard guarantee that LLVM won't optimize setting r2 (map`
			`* pointer) and r3 (constant map index) from _different paths_ ending`
			`* up at the _same_ call insn as otherwise we won't be able to use the`
			`* jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel`
			`* given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key`
			`* tracking for prog array pokes") for details on verifier tracking.`
			`*`
			`* Note on clobber list: we need to stay in-line with BPF calling`
			`* convention, so even if we don't end up using r0, r4, r5, we need`
			`* to mark them as clobber so that LLVM doesn't end up using them`
			`* before / after the call.`
			`*/`
			`asm volatile("r1 = %[ctx]\n\t"`
			`"r2 = %[map]\n\t"`
			`"r3 = %[slot]\n\t"`
			`"call 12"`
			`:: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)`
			`: "r0", "r1", "r2", "r3", "r4", "r5");`
			`}`
			`#endif`

			`enum libbpf_pin_type {`
			`LIBBPF_PIN_NONE,`
			`/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */`
			`LIBBPF_PIN_BY_NAME,`
			`};`

			`enum libbpf_tristate {`
			`TRI_NO = 0,`
			`TRI_YES = 1,`
			`TRI_MODULE = 2,`
			`};`

			`#define __kconfig __attribute__((section(".kconfig")))`
			`#define __ksym __attribute__((section(".ksyms")))`
			`#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))`
			`#define __kptr __attribute__((btf_type_tag("kptr")))`
			`#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr")))`

			`#define bpf_ksym_exists(sym) ({ \`
			`_Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \`
			`!!sym; \`
			`})`

			`#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx")))`
			`#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))`
			`#define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))`
			`#define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))`
			`#define __arg_arena __attribute((btf_decl_tag("arg:arena")))`

			`#ifndef ___bpf_concat`
			`#define ___bpf_concat(a, b) a ## b`
			`#endif`
			`#ifndef ___bpf_apply`
			`#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)`
			`#endif`
			`#ifndef ___bpf_nth`
			`#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N`
			`#endif`
			`#ifndef ___bpf_narg`
			`#define ___bpf_narg(...) \`
			`___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)`
			`#endif`

			`#define ___bpf_fill0(arr, p, x) do {} while (0)`
			`#define ___bpf_fill1(arr, p, x) arr[p] = x`
			`#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)`
			`#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)`
			`#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)`
			`#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)`
			`#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)`
			`#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)`
			`#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)`
			`#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)`
			`#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)`
			`#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)`
			`#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)`
			`#define ___bpf_fill(arr, args...) \`
			`___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)`

			`/*`
			`* BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values`
			`* in a structure.`
			`*/`
			`#define BPF_SEQ_PRINTF(seq, fmt, args...) \`
			`({ \`
			`static const char ___fmt[] = fmt; \`
			`unsigned long long ___param[___bpf_narg(args)]; \`
			`\`
			`_Pragma("GCC diagnostic push") \`
			`_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \`
			`___bpf_fill(___param, args); \`
			`_Pragma("GCC diagnostic pop") \`
			`\`
			`bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \`
			`___param, sizeof(___param)); \`
			`})`

			`/*`
			`* BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of`
			`* an array of u64.`
			`*/`
			`#define BPF_SNPRINTF(out, out_size, fmt, args...) \`
			`({ \`
			`static const char ___fmt[] = fmt; \`
			`unsigned long long ___param[___bpf_narg(args)]; \`
			`\`
			`_Pragma("GCC diagnostic push") \`
			`_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \`
			`___bpf_fill(___param, args); \`
			`_Pragma("GCC diagnostic pop") \`
			`\`
			`bpf_snprintf(out, out_size, ___fmt, \`
			`___param, sizeof(___param)); \`
			`})`

			`#ifdef BPF_NO_GLOBAL_DATA`
			`#define BPF_PRINTK_FMT_MOD`
			`#else`
			`#define BPF_PRINTK_FMT_MOD static const`
			`#endif`

			`#define __bpf_printk(fmt, ...) \`
			`({ \`
			`BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \`
			`bpf_trace_printk(____fmt, sizeof(____fmt), \`
			`##__VA_ARGS__); \`
			`})`

			`/*`
			`* __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments`
			`* instead of an array of u64.`
			`*/`
			`#define __bpf_vprintk(fmt, args...) \`
			`({ \`
			`static const char ___fmt[] = fmt; \`
			`unsigned long long ___param[___bpf_narg(args)]; \`
			`\`
			`_Pragma("GCC diagnostic push") \`
			`_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \`
			`___bpf_fill(___param, args); \`
			`_Pragma("GCC diagnostic pop") \`
			`\`
			`bpf_trace_vprintk(___fmt, sizeof(___fmt), \`
			`___param, sizeof(___param)); \`
			`})`

			`/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args`
			`* Otherwise use __bpf_vprintk`
			`*/`
			`#define ___bpf_pick_printk(...) \`
			`___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \`
			`__bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \`
			`__bpf_vprintk, __bpf_vprintk, __bpf_printk /3/, __bpf_printk /2/,\`
			`__bpf_printk /1/, __bpf_printk /0/)`

			`/* Helper macro to print out debug messages */`
			`#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args)`

			`struct bpf_iter_num;`

			`extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak __ksym;`
			`extern int bpf_iter_num_next(struct bpf_iter_num it) __weak __ksym;`
			`extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym;`

			`#ifndef bpf_for_each`
			`/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for`
			`* using BPF open-coded iterators without having to write mundane explicit`
			`* low-level loop logic. Instead, it provides for()-like generic construct`
			`* that can be used pretty naturally. E.g., for some hypothetical cgroup`
			`* iterator, you'd write:`
			`*`
			`* struct cgroup cg, parent_cg = <...>;`
			`*`
			`* bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) {`
			`* bpf_printk("Child cgroup id = %d", cg->cgroup_id);`
			`* if (cg->cgroup_id == 123)`
			`* break;`
			`* }`
			`*`
			`* I.e., it looks almost like high-level for each loop in other languages,`
			`* supports continue/break, and is verifiable by BPF verifier.`
			`*`
			`* For iterating integers, the difference betwen bpf_for_each(num, i, N, M)`
			`* and bpf_for(i, N, M) is in that bpf_for() provides additional proof to`
			* verifier that i is in [N, M) range, and in bpf_for_each() case i is `int
			* *`, not just `int`. So for integers bpf_for() is more convenient.
			`*`
			`* Note: this macro relies on C99 feature of allowing to declare variables`
			`* inside for() loop, bound to for() loop lifetime. It also utilizes GCC`
			`* extension: __attribute__((cleanup(<func>))), supported by both GCC and`
			`* Clang.`
			`*/`
			`#define bpf_for_each(type, cur, args...) for ( \`
			`/* initialize and define destructor */ \`
			`struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \`
			`cleanup(bpf_iter_##type##_destroy))), \`
			`/* ___p pointer is just to call bpf_iter_##type##_new() once to init ___it */ \`
			`*___p __attribute__((unused)) = ( \`
			`bpf_iter_##type##_new(&___it, ##args), \`
			`/* this is a workaround for Clang bug: it currently doesn't emit BTF */ \`
			`/* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \`
			`(void)bpf_iter_##type##_destroy, (void *)0); \`
			`/* iteration and termination check */ \`
			`(((cur) = bpf_iter_##type##_next(&___it))); \`
			`)`
			`#endif /* bpf_for_each */`

			`#ifndef bpf_for`
			`/* bpf_for(i, start, end) implements a for()-like looping construct that sets`
			`* provided integer variable i to values starting from start through,`
			`* but not including, end. It also proves to BPF verifier that i belongs`
			`* to range [start, end), so this can be used for accessing arrays without`
			`* extra checks.`
			`*`
			`* Note: start and end are assumed to be expressions with no side effects`
			`* and whose values do not change throughout bpf_for() loop execution. They do`
			`* not have to be statically known or constant, though.`
			`*`
			`* Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()`
			`* loop bound variables and cleanup attribute, supported by GCC and Clang.`
			`*/`
			`#define bpf_for(i, start, end) for ( \`
			`/* initialize and define destructor */ \`
			`struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \`
			`cleanup(bpf_iter_num_destroy))), \`
			`/* ___p pointer is necessary to call bpf_iter_num_new() once to init ___it */ \`
			`*___p __attribute__((unused)) = ( \`
			`bpf_iter_num_new(&___it, (start), (end)), \`
			`/* this is a workaround for Clang bug: it currently doesn't emit BTF */ \`
			`/* for bpf_iter_num_destroy() when used from cleanup() attribute */ \`
			`(void)bpf_iter_num_destroy, (void *)0); \`
			`({ \`
			`/* iteration step */ \`
			`int *___t = bpf_iter_num_next(&___it); \`
			`/* termination and bounds check */ \`
			`(___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \`
			`}); \`
			`)`
			`#endif /* bpf_for */`

			`#ifndef bpf_repeat`
			`/* bpf_repeat(N) performs N iterations without exposing iteration number`
			`*`
			`* Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()`
			`* loop bound variables and cleanup attribute, supported by GCC and Clang.`
			`*/`
			`#define bpf_repeat(N) for ( \`
			`/* initialize and define destructor */ \`
			`struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \`
			`cleanup(bpf_iter_num_destroy))), \`
			`/* ___p pointer is necessary to call bpf_iter_num_new() once to init ___it */ \`
			`*___p __attribute__((unused)) = ( \`
			`bpf_iter_num_new(&___it, 0, (N)), \`
			`/* this is a workaround for Clang bug: it currently doesn't emit BTF */ \`
			`/* for bpf_iter_num_destroy() when used from cleanup() attribute */ \`
			`(void)bpf_iter_num_destroy, (void *)0); \`
			`bpf_iter_num_next(&___it); \`
			`/* nothing here */ \`
			`)`
			`#endif /* bpf_repeat */`

			`#endif`