1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/bpf_mem_alloc.h>
30 #include <net/xdp.h>
31 #include <linux/trace_events.h>
32 #include <linux/kallsyms.h>
33 
34 #include "disasm.h"
35 
36 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
37 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
38 	[_id] = & _name ## _verifier_ops,
39 #define BPF_MAP_TYPE(_id, _ops)
40 #define BPF_LINK_TYPE(_id, _name)
41 #include <linux/bpf_types.h>
42 #undef BPF_PROG_TYPE
43 #undef BPF_MAP_TYPE
44 #undef BPF_LINK_TYPE
45 };
46 
47 struct bpf_mem_alloc bpf_global_percpu_ma;
48 static bool bpf_global_percpu_ma_set;
49 
50 /* bpf_check() is a static code analyzer that walks eBPF program
51  * instruction by instruction and updates register/stack state.
52  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
53  *
54  * The first pass is depth-first-search to check that the program is a DAG.
55  * It rejects the following programs:
56  * - larger than BPF_MAXINSNS insns
57  * - if loop is present (detected via back-edge)
58  * - unreachable insns exist (shouldn't be a forest. program = one function)
59  * - out of bounds or malformed jumps
60  * The second pass is all possible path descent from the 1st insn.
61  * Since it's analyzing all paths through the program, the length of the
62  * analysis is limited to 64k insn, which may be hit even if total number of
63  * insn is less then 4K, but there are too many branches that change stack/regs.
64  * Number of 'branches to be analyzed' is limited to 1k
65  *
66  * On entry to each instruction, each register has a type, and the instruction
67  * changes the types of the registers depending on instruction semantics.
68  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
69  * copied to R1.
70  *
71  * All registers are 64-bit.
72  * R0 - return register
73  * R1-R5 argument passing registers
74  * R6-R9 callee saved registers
75  * R10 - frame pointer read-only
76  *
77  * At the start of BPF program the register R1 contains a pointer to bpf_context
78  * and has type PTR_TO_CTX.
79  *
80  * Verifier tracks arithmetic operations on pointers in case:
81  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
82  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
83  * 1st insn copies R10 (which has FRAME_PTR) type into R1
84  * and 2nd arithmetic instruction is pattern matched to recognize
85  * that it wants to construct a pointer to some element within stack.
86  * So after 2nd insn, the register R1 has type PTR_TO_STACK
87  * (and -20 constant is saved for further stack bounds checking).
88  * Meaning that this reg is a pointer to stack plus known immediate constant.
89  *
90  * Most of the time the registers have SCALAR_VALUE type, which
91  * means the register has some value, but it's not a valid pointer.
92  * (like pointer plus pointer becomes SCALAR_VALUE type)
93  *
94  * When verifier sees load or store instructions the type of base register
95  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
96  * four pointer types recognized by check_mem_access() function.
97  *
98  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
99  * and the range of [ptr, ptr + map's value_size) is accessible.
100  *
101  * registers used to pass values to function calls are checked against
102  * function argument constraints.
103  *
104  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
105  * It means that the register type passed to this function must be
106  * PTR_TO_STACK and it will be used inside the function as
107  * 'pointer to map element key'
108  *
109  * For example the argument constraints for bpf_map_lookup_elem():
110  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
111  *   .arg1_type = ARG_CONST_MAP_PTR,
112  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
113  *
114  * ret_type says that this function returns 'pointer to map elem value or null'
115  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
116  * 2nd argument should be a pointer to stack, which will be used inside
117  * the helper function as a pointer to map element key.
118  *
119  * On the kernel side the helper function looks like:
120  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
121  * {
122  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
123  *    void *key = (void *) (unsigned long) r2;
124  *    void *value;
125  *
126  *    here kernel can access 'key' and 'map' pointers safely, knowing that
127  *    [key, key + map->key_size) bytes are valid and were initialized on
128  *    the stack of eBPF program.
129  * }
130  *
131  * Corresponding eBPF program may look like:
132  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
133  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
134  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
135  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
136  * here verifier looks at prototype of map_lookup_elem() and sees:
137  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
138  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
139  *
140  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
141  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
142  * and were initialized prior to this call.
143  * If it's ok, then verifier allows this BPF_CALL insn and looks at
144  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
145  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
146  * returns either pointer to map value or NULL.
147  *
148  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
149  * insn, the register holding that pointer in the true branch changes state to
150  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
151  * branch. See check_cond_jmp_op().
152  *
153  * After the call R0 is set to return type of the function and registers R1-R5
154  * are set to NOT_INIT to indicate that they are no longer readable.
155  *
156  * The following reference types represent a potential reference to a kernel
157  * resource which, after first being allocated, must be checked and freed by
158  * the BPF program:
159  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
160  *
161  * When the verifier sees a helper call return a reference type, it allocates a
162  * pointer id for the reference and stores it in the current function state.
163  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
164  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
165  * passes through a NULL-check conditional. For the branch wherein the state is
166  * changed to CONST_IMM, the verifier releases the reference.
167  *
168  * For each helper function that allocates a reference, such as
169  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
170  * bpf_sk_release(). When a reference type passes into the release function,
171  * the verifier also releases the reference. If any unchecked or unreleased
172  * reference remains at the end of the program, the verifier rejects it.
173  */
174 
175 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
176 struct bpf_verifier_stack_elem {
177 	/* verifier state is 'st'
178 	 * before processing instruction 'insn_idx'
179 	 * and after processing instruction 'prev_insn_idx'
180 	 */
181 	struct bpf_verifier_state st;
182 	int insn_idx;
183 	int prev_insn_idx;
184 	struct bpf_verifier_stack_elem *next;
185 	/* length of verifier log at the time this state was pushed on stack */
186 	u32 log_pos;
187 };
188 
189 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
190 #define BPF_COMPLEXITY_LIMIT_STATES	64
191 
192 #define BPF_MAP_KEY_POISON	(1ULL << 63)
193 #define BPF_MAP_KEY_SEEN	(1ULL << 62)
194 
195 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
196 
197 #define BPF_PRIV_STACK_MIN_SIZE		64
198 
199 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx);
200 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id);
201 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
202 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
203 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
204 static int ref_set_non_owning(struct bpf_verifier_env *env,
205 			      struct bpf_reg_state *reg);
206 static void specialize_kfunc(struct bpf_verifier_env *env,
207 			     u32 func_id, u16 offset, unsigned long *addr);
208 static bool is_trusted_reg(const struct bpf_reg_state *reg);
209 
bpf_map_ptr_poisoned(const struct bpf_insn_aux_data * aux)210 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
211 {
212 	return aux->map_ptr_state.poison;
213 }
214 
bpf_map_ptr_unpriv(const struct bpf_insn_aux_data * aux)215 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
216 {
217 	return aux->map_ptr_state.unpriv;
218 }
219 
bpf_map_ptr_store(struct bpf_insn_aux_data * aux,struct bpf_map * map,bool unpriv,bool poison)220 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
221 			      struct bpf_map *map,
222 			      bool unpriv, bool poison)
223 {
224 	unpriv |= bpf_map_ptr_unpriv(aux);
225 	aux->map_ptr_state.unpriv = unpriv;
226 	aux->map_ptr_state.poison = poison;
227 	aux->map_ptr_state.map_ptr = map;
228 }
229 
bpf_map_key_poisoned(const struct bpf_insn_aux_data * aux)230 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
231 {
232 	return aux->map_key_state & BPF_MAP_KEY_POISON;
233 }
234 
bpf_map_key_unseen(const struct bpf_insn_aux_data * aux)235 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
236 {
237 	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
238 }
239 
bpf_map_key_immediate(const struct bpf_insn_aux_data * aux)240 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
241 {
242 	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
243 }
244 
bpf_map_key_store(struct bpf_insn_aux_data * aux,u64 state)245 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
246 {
247 	bool poisoned = bpf_map_key_poisoned(aux);
248 
249 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
250 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
251 }
252 
bpf_helper_call(const struct bpf_insn * insn)253 static bool bpf_helper_call(const struct bpf_insn *insn)
254 {
255 	return insn->code == (BPF_JMP | BPF_CALL) &&
256 	       insn->src_reg == 0;
257 }
258 
bpf_pseudo_call(const struct bpf_insn * insn)259 static bool bpf_pseudo_call(const struct bpf_insn *insn)
260 {
261 	return insn->code == (BPF_JMP | BPF_CALL) &&
262 	       insn->src_reg == BPF_PSEUDO_CALL;
263 }
264 
bpf_pseudo_kfunc_call(const struct bpf_insn * insn)265 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
266 {
267 	return insn->code == (BPF_JMP | BPF_CALL) &&
268 	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
269 }
270 
271 struct bpf_call_arg_meta {
272 	struct bpf_map *map_ptr;
273 	bool raw_mode;
274 	bool pkt_access;
275 	u8 release_regno;
276 	int regno;
277 	int access_size;
278 	int mem_size;
279 	u64 msize_max_value;
280 	int ref_obj_id;
281 	int dynptr_id;
282 	int map_uid;
283 	int func_id;
284 	struct btf *btf;
285 	u32 btf_id;
286 	struct btf *ret_btf;
287 	u32 ret_btf_id;
288 	u32 subprogno;
289 	struct btf_field *kptr_field;
290 	s64 const_map_key;
291 };
292 
293 struct bpf_kfunc_call_arg_meta {
294 	/* In parameters */
295 	struct btf *btf;
296 	u32 func_id;
297 	u32 kfunc_flags;
298 	const struct btf_type *func_proto;
299 	const char *func_name;
300 	/* Out parameters */
301 	u32 ref_obj_id;
302 	u8 release_regno;
303 	bool r0_rdonly;
304 	u32 ret_btf_id;
305 	u64 r0_size;
306 	u32 subprogno;
307 	struct {
308 		u64 value;
309 		bool found;
310 	} arg_constant;
311 
312 	/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
313 	 * generally to pass info about user-defined local kptr types to later
314 	 * verification logic
315 	 *   bpf_obj_drop/bpf_percpu_obj_drop
316 	 *     Record the local kptr type to be drop'd
317 	 *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
318 	 *     Record the local kptr type to be refcount_incr'd and use
319 	 *     arg_owning_ref to determine whether refcount_acquire should be
320 	 *     fallible
321 	 */
322 	struct btf *arg_btf;
323 	u32 arg_btf_id;
324 	bool arg_owning_ref;
325 
326 	struct {
327 		struct btf_field *field;
328 	} arg_list_head;
329 	struct {
330 		struct btf_field *field;
331 	} arg_rbtree_root;
332 	struct {
333 		enum bpf_dynptr_type type;
334 		u32 id;
335 		u32 ref_obj_id;
336 	} initialized_dynptr;
337 	struct {
338 		u8 spi;
339 		u8 frameno;
340 	} iter;
341 	struct {
342 		struct bpf_map *ptr;
343 		int uid;
344 	} map;
345 	u64 mem_size;
346 };
347 
348 struct btf *btf_vmlinux;
349 
btf_type_name(const struct btf * btf,u32 id)350 static const char *btf_type_name(const struct btf *btf, u32 id)
351 {
352 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
353 }
354 
355 static DEFINE_MUTEX(bpf_verifier_lock);
356 static DEFINE_MUTEX(bpf_percpu_ma_lock);
357 
verbose(void * private_data,const char * fmt,...)358 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
359 {
360 	struct bpf_verifier_env *env = private_data;
361 	va_list args;
362 
363 	if (!bpf_verifier_log_needed(&env->log))
364 		return;
365 
366 	va_start(args, fmt);
367 	bpf_verifier_vlog(&env->log, fmt, args);
368 	va_end(args);
369 }
370 
verbose_invalid_scalar(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_retval_range range,const char * ctx,const char * reg_name)371 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
372 				   struct bpf_reg_state *reg,
373 				   struct bpf_retval_range range, const char *ctx,
374 				   const char *reg_name)
375 {
376 	bool unknown = true;
377 
378 	verbose(env, "%s the register %s has", ctx, reg_name);
379 	if (reg->smin_value > S64_MIN) {
380 		verbose(env, " smin=%lld", reg->smin_value);
381 		unknown = false;
382 	}
383 	if (reg->smax_value < S64_MAX) {
384 		verbose(env, " smax=%lld", reg->smax_value);
385 		unknown = false;
386 	}
387 	if (unknown)
388 		verbose(env, " unknown scalar value");
389 	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
390 }
391 
reg_not_null(const struct bpf_reg_state * reg)392 static bool reg_not_null(const struct bpf_reg_state *reg)
393 {
394 	enum bpf_reg_type type;
395 
396 	type = reg->type;
397 	if (type_may_be_null(type))
398 		return false;
399 
400 	type = base_type(type);
401 	return type == PTR_TO_SOCKET ||
402 		type == PTR_TO_TCP_SOCK ||
403 		type == PTR_TO_MAP_VALUE ||
404 		type == PTR_TO_MAP_KEY ||
405 		type == PTR_TO_SOCK_COMMON ||
406 		(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
407 		type == PTR_TO_MEM;
408 }
409 
reg_btf_record(const struct bpf_reg_state * reg)410 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
411 {
412 	struct btf_record *rec = NULL;
413 	struct btf_struct_meta *meta;
414 
415 	if (reg->type == PTR_TO_MAP_VALUE) {
416 		rec = reg->map_ptr->record;
417 	} else if (type_is_ptr_alloc_obj(reg->type)) {
418 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
419 		if (meta)
420 			rec = meta->record;
421 	}
422 	return rec;
423 }
424 
subprog_is_global(const struct bpf_verifier_env * env,int subprog)425 static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
426 {
427 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
428 
429 	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
430 }
431 
subprog_name(const struct bpf_verifier_env * env,int subprog)432 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
433 {
434 	struct bpf_func_info *info;
435 
436 	if (!env->prog->aux->func_info)
437 		return "";
438 
439 	info = &env->prog->aux->func_info[subprog];
440 	return btf_type_name(env->prog->aux->btf, info->type_id);
441 }
442 
mark_subprog_exc_cb(struct bpf_verifier_env * env,int subprog)443 static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
444 {
445 	struct bpf_subprog_info *info = subprog_info(env, subprog);
446 
447 	info->is_cb = true;
448 	info->is_async_cb = true;
449 	info->is_exception_cb = true;
450 }
451 
subprog_is_exc_cb(struct bpf_verifier_env * env,int subprog)452 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
453 {
454 	return subprog_info(env, subprog)->is_exception_cb;
455 }
456 
reg_may_point_to_spin_lock(const struct bpf_reg_state * reg)457 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
458 {
459 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
460 }
461 
type_is_rdonly_mem(u32 type)462 static bool type_is_rdonly_mem(u32 type)
463 {
464 	return type & MEM_RDONLY;
465 }
466 
is_acquire_function(enum bpf_func_id func_id,const struct bpf_map * map)467 static bool is_acquire_function(enum bpf_func_id func_id,
468 				const struct bpf_map *map)
469 {
470 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
471 
472 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
473 	    func_id == BPF_FUNC_sk_lookup_udp ||
474 	    func_id == BPF_FUNC_skc_lookup_tcp ||
475 	    func_id == BPF_FUNC_ringbuf_reserve ||
476 	    func_id == BPF_FUNC_kptr_xchg)
477 		return true;
478 
479 	if (func_id == BPF_FUNC_map_lookup_elem &&
480 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
481 	     map_type == BPF_MAP_TYPE_SOCKHASH))
482 		return true;
483 
484 	return false;
485 }
486 
is_ptr_cast_function(enum bpf_func_id func_id)487 static bool is_ptr_cast_function(enum bpf_func_id func_id)
488 {
489 	return func_id == BPF_FUNC_tcp_sock ||
490 		func_id == BPF_FUNC_sk_fullsock ||
491 		func_id == BPF_FUNC_skc_to_tcp_sock ||
492 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
493 		func_id == BPF_FUNC_skc_to_udp6_sock ||
494 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
495 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
496 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
497 }
498 
is_dynptr_ref_function(enum bpf_func_id func_id)499 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
500 {
501 	return func_id == BPF_FUNC_dynptr_data;
502 }
503 
504 static bool is_sync_callback_calling_kfunc(u32 btf_id);
505 static bool is_async_callback_calling_kfunc(u32 btf_id);
506 static bool is_callback_calling_kfunc(u32 btf_id);
507 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
508 
509 static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id);
510 
is_sync_callback_calling_function(enum bpf_func_id func_id)511 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
512 {
513 	return func_id == BPF_FUNC_for_each_map_elem ||
514 	       func_id == BPF_FUNC_find_vma ||
515 	       func_id == BPF_FUNC_loop ||
516 	       func_id == BPF_FUNC_user_ringbuf_drain;
517 }
518 
is_async_callback_calling_function(enum bpf_func_id func_id)519 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
520 {
521 	return func_id == BPF_FUNC_timer_set_callback;
522 }
523 
is_callback_calling_function(enum bpf_func_id func_id)524 static bool is_callback_calling_function(enum bpf_func_id func_id)
525 {
526 	return is_sync_callback_calling_function(func_id) ||
527 	       is_async_callback_calling_function(func_id);
528 }
529 
is_sync_callback_calling_insn(struct bpf_insn * insn)530 static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
531 {
532 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
533 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
534 }
535 
is_async_callback_calling_insn(struct bpf_insn * insn)536 static bool is_async_callback_calling_insn(struct bpf_insn *insn)
537 {
538 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
539 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
540 }
541 
is_may_goto_insn(struct bpf_insn * insn)542 static bool is_may_goto_insn(struct bpf_insn *insn)
543 {
544 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
545 }
546 
is_may_goto_insn_at(struct bpf_verifier_env * env,int insn_idx)547 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
548 {
549 	return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
550 }
551 
is_storage_get_function(enum bpf_func_id func_id)552 static bool is_storage_get_function(enum bpf_func_id func_id)
553 {
554 	return func_id == BPF_FUNC_sk_storage_get ||
555 	       func_id == BPF_FUNC_inode_storage_get ||
556 	       func_id == BPF_FUNC_task_storage_get ||
557 	       func_id == BPF_FUNC_cgrp_storage_get;
558 }
559 
helper_multiple_ref_obj_use(enum bpf_func_id func_id,const struct bpf_map * map)560 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
561 					const struct bpf_map *map)
562 {
563 	int ref_obj_uses = 0;
564 
565 	if (is_ptr_cast_function(func_id))
566 		ref_obj_uses++;
567 	if (is_acquire_function(func_id, map))
568 		ref_obj_uses++;
569 	if (is_dynptr_ref_function(func_id))
570 		ref_obj_uses++;
571 
572 	return ref_obj_uses > 1;
573 }
574 
is_cmpxchg_insn(const struct bpf_insn * insn)575 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
576 {
577 	return BPF_CLASS(insn->code) == BPF_STX &&
578 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
579 	       insn->imm == BPF_CMPXCHG;
580 }
581 
__get_spi(s32 off)582 static int __get_spi(s32 off)
583 {
584 	return (-off - 1) / BPF_REG_SIZE;
585 }
586 
func(struct bpf_verifier_env * env,const struct bpf_reg_state * reg)587 static struct bpf_func_state *func(struct bpf_verifier_env *env,
588 				   const struct bpf_reg_state *reg)
589 {
590 	struct bpf_verifier_state *cur = env->cur_state;
591 
592 	return cur->frame[reg->frameno];
593 }
594 
is_spi_bounds_valid(struct bpf_func_state * state,int spi,int nr_slots)595 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
596 {
597        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
598 
599        /* We need to check that slots between [spi - nr_slots + 1, spi] are
600 	* within [0, allocated_stack).
601 	*
602 	* Please note that the spi grows downwards. For example, a dynptr
603 	* takes the size of two stack slots; the first slot will be at
604 	* spi and the second slot will be at spi - 1.
605 	*/
606        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
607 }
608 
stack_slot_obj_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * obj_kind,int nr_slots)609 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
610 			          const char *obj_kind, int nr_slots)
611 {
612 	int off, spi;
613 
614 	if (!tnum_is_const(reg->var_off)) {
615 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
616 		return -EINVAL;
617 	}
618 
619 	off = reg->off + reg->var_off.value;
620 	if (off % BPF_REG_SIZE) {
621 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
622 		return -EINVAL;
623 	}
624 
625 	spi = __get_spi(off);
626 	if (spi + 1 < nr_slots) {
627 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
628 		return -EINVAL;
629 	}
630 
631 	if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
632 		return -ERANGE;
633 	return spi;
634 }
635 
dynptr_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)636 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
637 {
638 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
639 }
640 
iter_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)641 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
642 {
643 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
644 }
645 
irq_flag_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)646 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
647 {
648 	return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
649 }
650 
arg_to_dynptr_type(enum bpf_arg_type arg_type)651 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
652 {
653 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
654 	case DYNPTR_TYPE_LOCAL:
655 		return BPF_DYNPTR_TYPE_LOCAL;
656 	case DYNPTR_TYPE_RINGBUF:
657 		return BPF_DYNPTR_TYPE_RINGBUF;
658 	case DYNPTR_TYPE_SKB:
659 		return BPF_DYNPTR_TYPE_SKB;
660 	case DYNPTR_TYPE_XDP:
661 		return BPF_DYNPTR_TYPE_XDP;
662 	default:
663 		return BPF_DYNPTR_TYPE_INVALID;
664 	}
665 }
666 
get_dynptr_type_flag(enum bpf_dynptr_type type)667 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
668 {
669 	switch (type) {
670 	case BPF_DYNPTR_TYPE_LOCAL:
671 		return DYNPTR_TYPE_LOCAL;
672 	case BPF_DYNPTR_TYPE_RINGBUF:
673 		return DYNPTR_TYPE_RINGBUF;
674 	case BPF_DYNPTR_TYPE_SKB:
675 		return DYNPTR_TYPE_SKB;
676 	case BPF_DYNPTR_TYPE_XDP:
677 		return DYNPTR_TYPE_XDP;
678 	default:
679 		return 0;
680 	}
681 }
682 
dynptr_type_refcounted(enum bpf_dynptr_type type)683 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
684 {
685 	return type == BPF_DYNPTR_TYPE_RINGBUF;
686 }
687 
688 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
689 			      enum bpf_dynptr_type type,
690 			      bool first_slot, int dynptr_id);
691 
692 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
693 				struct bpf_reg_state *reg);
694 
mark_dynptr_stack_regs(struct bpf_verifier_env * env,struct bpf_reg_state * sreg1,struct bpf_reg_state * sreg2,enum bpf_dynptr_type type)695 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
696 				   struct bpf_reg_state *sreg1,
697 				   struct bpf_reg_state *sreg2,
698 				   enum bpf_dynptr_type type)
699 {
700 	int id = ++env->id_gen;
701 
702 	__mark_dynptr_reg(sreg1, type, true, id);
703 	__mark_dynptr_reg(sreg2, type, false, id);
704 }
705 
mark_dynptr_cb_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_dynptr_type type)706 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
707 			       struct bpf_reg_state *reg,
708 			       enum bpf_dynptr_type type)
709 {
710 	__mark_dynptr_reg(reg, type, true, ++env->id_gen);
711 }
712 
713 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
714 				        struct bpf_func_state *state, int spi);
715 
mark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type,int insn_idx,int clone_ref_obj_id)716 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
717 				   enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
718 {
719 	struct bpf_func_state *state = func(env, reg);
720 	enum bpf_dynptr_type type;
721 	int spi, i, err;
722 
723 	spi = dynptr_get_spi(env, reg);
724 	if (spi < 0)
725 		return spi;
726 
727 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
728 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
729 	 * to ensure that for the following example:
730 	 *	[d1][d1][d2][d2]
731 	 * spi    3   2   1   0
732 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
733 	 * case they do belong to same dynptr, second call won't see slot_type
734 	 * as STACK_DYNPTR and will simply skip destruction.
735 	 */
736 	err = destroy_if_dynptr_stack_slot(env, state, spi);
737 	if (err)
738 		return err;
739 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
740 	if (err)
741 		return err;
742 
743 	for (i = 0; i < BPF_REG_SIZE; i++) {
744 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
745 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
746 	}
747 
748 	type = arg_to_dynptr_type(arg_type);
749 	if (type == BPF_DYNPTR_TYPE_INVALID)
750 		return -EINVAL;
751 
752 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
753 			       &state->stack[spi - 1].spilled_ptr, type);
754 
755 	if (dynptr_type_refcounted(type)) {
756 		/* The id is used to track proper releasing */
757 		int id;
758 
759 		if (clone_ref_obj_id)
760 			id = clone_ref_obj_id;
761 		else
762 			id = acquire_reference(env, insn_idx);
763 
764 		if (id < 0)
765 			return id;
766 
767 		state->stack[spi].spilled_ptr.ref_obj_id = id;
768 		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
769 	}
770 
771 	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
772 	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
773 
774 	return 0;
775 }
776 
invalidate_dynptr(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)777 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
778 {
779 	int i;
780 
781 	for (i = 0; i < BPF_REG_SIZE; i++) {
782 		state->stack[spi].slot_type[i] = STACK_INVALID;
783 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
784 	}
785 
786 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
787 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
788 
789 	/* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
790 	 *
791 	 * While we don't allow reading STACK_INVALID, it is still possible to
792 	 * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
793 	 * helpers or insns can do partial read of that part without failing,
794 	 * but check_stack_range_initialized, check_stack_read_var_off, and
795 	 * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
796 	 * the slot conservatively. Hence we need to prevent those liveness
797 	 * marking walks.
798 	 *
799 	 * This was not a problem before because STACK_INVALID is only set by
800 	 * default (where the default reg state has its reg->parent as NULL), or
801 	 * in clean_live_states after REG_LIVE_DONE (at which point
802 	 * mark_reg_read won't walk reg->parent chain), but not randomly during
803 	 * verifier state exploration (like we did above). Hence, for our case
804 	 * parentage chain will still be live (i.e. reg->parent may be
805 	 * non-NULL), while earlier reg->parent was NULL, so we need
806 	 * REG_LIVE_WRITTEN to screen off read marker propagation when it is
807 	 * done later on reads or by mark_dynptr_read as well to unnecessary
808 	 * mark registers in verifier state.
809 	 */
810 	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
811 	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
812 }
813 
unmark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg)814 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
815 {
816 	struct bpf_func_state *state = func(env, reg);
817 	int spi, ref_obj_id, i;
818 
819 	spi = dynptr_get_spi(env, reg);
820 	if (spi < 0)
821 		return spi;
822 
823 	if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
824 		invalidate_dynptr(env, state, spi);
825 		return 0;
826 	}
827 
828 	ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
829 
830 	/* If the dynptr has a ref_obj_id, then we need to invalidate
831 	 * two things:
832 	 *
833 	 * 1) Any dynptrs with a matching ref_obj_id (clones)
834 	 * 2) Any slices derived from this dynptr.
835 	 */
836 
837 	/* Invalidate any slices associated with this dynptr */
838 	WARN_ON_ONCE(release_reference(env, ref_obj_id));
839 
840 	/* Invalidate any dynptr clones */
841 	for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
842 		if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
843 			continue;
844 
845 		/* it should always be the case that if the ref obj id
846 		 * matches then the stack slot also belongs to a
847 		 * dynptr
848 		 */
849 		if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
850 			verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
851 			return -EFAULT;
852 		}
853 		if (state->stack[i].spilled_ptr.dynptr.first_slot)
854 			invalidate_dynptr(env, state, i);
855 	}
856 
857 	return 0;
858 }
859 
860 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
861 			       struct bpf_reg_state *reg);
862 
mark_reg_invalid(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)863 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
864 {
865 	if (!env->allow_ptr_leaks)
866 		__mark_reg_not_init(env, reg);
867 	else
868 		__mark_reg_unknown(env, reg);
869 }
870 
destroy_if_dynptr_stack_slot(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)871 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
872 				        struct bpf_func_state *state, int spi)
873 {
874 	struct bpf_func_state *fstate;
875 	struct bpf_reg_state *dreg;
876 	int i, dynptr_id;
877 
878 	/* We always ensure that STACK_DYNPTR is never set partially,
879 	 * hence just checking for slot_type[0] is enough. This is
880 	 * different for STACK_SPILL, where it may be only set for
881 	 * 1 byte, so code has to use is_spilled_reg.
882 	 */
883 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
884 		return 0;
885 
886 	/* Reposition spi to first slot */
887 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
888 		spi = spi + 1;
889 
890 	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
891 		verbose(env, "cannot overwrite referenced dynptr\n");
892 		return -EINVAL;
893 	}
894 
895 	mark_stack_slot_scratched(env, spi);
896 	mark_stack_slot_scratched(env, spi - 1);
897 
898 	/* Writing partially to one dynptr stack slot destroys both. */
899 	for (i = 0; i < BPF_REG_SIZE; i++) {
900 		state->stack[spi].slot_type[i] = STACK_INVALID;
901 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
902 	}
903 
904 	dynptr_id = state->stack[spi].spilled_ptr.id;
905 	/* Invalidate any slices associated with this dynptr */
906 	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
907 		/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
908 		if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
909 			continue;
910 		if (dreg->dynptr_id == dynptr_id)
911 			mark_reg_invalid(env, dreg);
912 	}));
913 
914 	/* Do not release reference state, we are destroying dynptr on stack,
915 	 * not using some helper to release it. Just reset register.
916 	 */
917 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
918 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
919 
920 	/* Same reason as unmark_stack_slots_dynptr above */
921 	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
922 	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
923 
924 	return 0;
925 }
926 
is_dynptr_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)927 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
928 {
929 	int spi;
930 
931 	if (reg->type == CONST_PTR_TO_DYNPTR)
932 		return false;
933 
934 	spi = dynptr_get_spi(env, reg);
935 
936 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
937 	 * error because this just means the stack state hasn't been updated yet.
938 	 * We will do check_mem_access to check and update stack bounds later.
939 	 */
940 	if (spi < 0 && spi != -ERANGE)
941 		return false;
942 
943 	/* We don't need to check if the stack slots are marked by previous
944 	 * dynptr initializations because we allow overwriting existing unreferenced
945 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
946 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
947 	 * touching are completely destructed before we reinitialize them for a new
948 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
949 	 * instead of delaying it until the end where the user will get "Unreleased
950 	 * reference" error.
951 	 */
952 	return true;
953 }
954 
is_dynptr_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)955 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
956 {
957 	struct bpf_func_state *state = func(env, reg);
958 	int i, spi;
959 
960 	/* This already represents first slot of initialized bpf_dynptr.
961 	 *
962 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
963 	 * check_func_arg_reg_off's logic, so we don't need to check its
964 	 * offset and alignment.
965 	 */
966 	if (reg->type == CONST_PTR_TO_DYNPTR)
967 		return true;
968 
969 	spi = dynptr_get_spi(env, reg);
970 	if (spi < 0)
971 		return false;
972 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
973 		return false;
974 
975 	for (i = 0; i < BPF_REG_SIZE; i++) {
976 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
977 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
978 			return false;
979 	}
980 
981 	return true;
982 }
983 
is_dynptr_type_expected(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type)984 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
985 				    enum bpf_arg_type arg_type)
986 {
987 	struct bpf_func_state *state = func(env, reg);
988 	enum bpf_dynptr_type dynptr_type;
989 	int spi;
990 
991 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
992 	if (arg_type == ARG_PTR_TO_DYNPTR)
993 		return true;
994 
995 	dynptr_type = arg_to_dynptr_type(arg_type);
996 	if (reg->type == CONST_PTR_TO_DYNPTR) {
997 		return reg->dynptr.type == dynptr_type;
998 	} else {
999 		spi = dynptr_get_spi(env, reg);
1000 		if (spi < 0)
1001 			return false;
1002 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1003 	}
1004 }
1005 
1006 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1007 
1008 static bool in_rcu_cs(struct bpf_verifier_env *env);
1009 
1010 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
1011 
mark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx,struct btf * btf,u32 btf_id,int nr_slots)1012 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1013 				 struct bpf_kfunc_call_arg_meta *meta,
1014 				 struct bpf_reg_state *reg, int insn_idx,
1015 				 struct btf *btf, u32 btf_id, int nr_slots)
1016 {
1017 	struct bpf_func_state *state = func(env, reg);
1018 	int spi, i, j, id;
1019 
1020 	spi = iter_get_spi(env, reg, nr_slots);
1021 	if (spi < 0)
1022 		return spi;
1023 
1024 	id = acquire_reference(env, insn_idx);
1025 	if (id < 0)
1026 		return id;
1027 
1028 	for (i = 0; i < nr_slots; i++) {
1029 		struct bpf_stack_state *slot = &state->stack[spi - i];
1030 		struct bpf_reg_state *st = &slot->spilled_ptr;
1031 
1032 		__mark_reg_known_zero(st);
1033 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1034 		if (is_kfunc_rcu_protected(meta)) {
1035 			if (in_rcu_cs(env))
1036 				st->type |= MEM_RCU;
1037 			else
1038 				st->type |= PTR_UNTRUSTED;
1039 		}
1040 		st->live |= REG_LIVE_WRITTEN;
1041 		st->ref_obj_id = i == 0 ? id : 0;
1042 		st->iter.btf = btf;
1043 		st->iter.btf_id = btf_id;
1044 		st->iter.state = BPF_ITER_STATE_ACTIVE;
1045 		st->iter.depth = 0;
1046 
1047 		for (j = 0; j < BPF_REG_SIZE; j++)
1048 			slot->slot_type[j] = STACK_ITER;
1049 
1050 		mark_stack_slot_scratched(env, spi - i);
1051 	}
1052 
1053 	return 0;
1054 }
1055 
unmark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1056 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1057 				   struct bpf_reg_state *reg, int nr_slots)
1058 {
1059 	struct bpf_func_state *state = func(env, reg);
1060 	int spi, i, j;
1061 
1062 	spi = iter_get_spi(env, reg, nr_slots);
1063 	if (spi < 0)
1064 		return spi;
1065 
1066 	for (i = 0; i < nr_slots; i++) {
1067 		struct bpf_stack_state *slot = &state->stack[spi - i];
1068 		struct bpf_reg_state *st = &slot->spilled_ptr;
1069 
1070 		if (i == 0)
1071 			WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1072 
1073 		__mark_reg_not_init(env, st);
1074 
1075 		/* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1076 		st->live |= REG_LIVE_WRITTEN;
1077 
1078 		for (j = 0; j < BPF_REG_SIZE; j++)
1079 			slot->slot_type[j] = STACK_INVALID;
1080 
1081 		mark_stack_slot_scratched(env, spi - i);
1082 	}
1083 
1084 	return 0;
1085 }
1086 
is_iter_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1087 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1088 				     struct bpf_reg_state *reg, int nr_slots)
1089 {
1090 	struct bpf_func_state *state = func(env, reg);
1091 	int spi, i, j;
1092 
1093 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1094 	 * will do check_mem_access to check and update stack bounds later, so
1095 	 * return true for that case.
1096 	 */
1097 	spi = iter_get_spi(env, reg, nr_slots);
1098 	if (spi == -ERANGE)
1099 		return true;
1100 	if (spi < 0)
1101 		return false;
1102 
1103 	for (i = 0; i < nr_slots; i++) {
1104 		struct bpf_stack_state *slot = &state->stack[spi - i];
1105 
1106 		for (j = 0; j < BPF_REG_SIZE; j++)
1107 			if (slot->slot_type[j] == STACK_ITER)
1108 				return false;
1109 	}
1110 
1111 	return true;
1112 }
1113 
is_iter_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct btf * btf,u32 btf_id,int nr_slots)1114 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1115 				   struct btf *btf, u32 btf_id, int nr_slots)
1116 {
1117 	struct bpf_func_state *state = func(env, reg);
1118 	int spi, i, j;
1119 
1120 	spi = iter_get_spi(env, reg, nr_slots);
1121 	if (spi < 0)
1122 		return -EINVAL;
1123 
1124 	for (i = 0; i < nr_slots; i++) {
1125 		struct bpf_stack_state *slot = &state->stack[spi - i];
1126 		struct bpf_reg_state *st = &slot->spilled_ptr;
1127 
1128 		if (st->type & PTR_UNTRUSTED)
1129 			return -EPROTO;
1130 		/* only main (first) slot has ref_obj_id set */
1131 		if (i == 0 && !st->ref_obj_id)
1132 			return -EINVAL;
1133 		if (i != 0 && st->ref_obj_id)
1134 			return -EINVAL;
1135 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1136 			return -EINVAL;
1137 
1138 		for (j = 0; j < BPF_REG_SIZE; j++)
1139 			if (slot->slot_type[j] != STACK_ITER)
1140 				return -EINVAL;
1141 	}
1142 
1143 	return 0;
1144 }
1145 
1146 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1147 static int release_irq_state(struct bpf_verifier_state *state, int id);
1148 
mark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx)1149 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1150 				     struct bpf_kfunc_call_arg_meta *meta,
1151 				     struct bpf_reg_state *reg, int insn_idx)
1152 {
1153 	struct bpf_func_state *state = func(env, reg);
1154 	struct bpf_stack_state *slot;
1155 	struct bpf_reg_state *st;
1156 	int spi, i, id;
1157 
1158 	spi = irq_flag_get_spi(env, reg);
1159 	if (spi < 0)
1160 		return spi;
1161 
1162 	id = acquire_irq_state(env, insn_idx);
1163 	if (id < 0)
1164 		return id;
1165 
1166 	slot = &state->stack[spi];
1167 	st = &slot->spilled_ptr;
1168 
1169 	__mark_reg_known_zero(st);
1170 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1171 	st->live |= REG_LIVE_WRITTEN;
1172 	st->ref_obj_id = id;
1173 
1174 	for (i = 0; i < BPF_REG_SIZE; i++)
1175 		slot->slot_type[i] = STACK_IRQ_FLAG;
1176 
1177 	mark_stack_slot_scratched(env, spi);
1178 	return 0;
1179 }
1180 
unmark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1181 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1182 {
1183 	struct bpf_func_state *state = func(env, reg);
1184 	struct bpf_stack_state *slot;
1185 	struct bpf_reg_state *st;
1186 	int spi, i, err;
1187 
1188 	spi = irq_flag_get_spi(env, reg);
1189 	if (spi < 0)
1190 		return spi;
1191 
1192 	slot = &state->stack[spi];
1193 	st = &slot->spilled_ptr;
1194 
1195 	err = release_irq_state(env->cur_state, st->ref_obj_id);
1196 	WARN_ON_ONCE(err && err != -EACCES);
1197 	if (err) {
1198 		int insn_idx = 0;
1199 
1200 		for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1201 			if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1202 				insn_idx = env->cur_state->refs[i].insn_idx;
1203 				break;
1204 			}
1205 		}
1206 
1207 		verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1208 			env->cur_state->active_irq_id, insn_idx);
1209 		return err;
1210 	}
1211 
1212 	__mark_reg_not_init(env, st);
1213 
1214 	/* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1215 	st->live |= REG_LIVE_WRITTEN;
1216 
1217 	for (i = 0; i < BPF_REG_SIZE; i++)
1218 		slot->slot_type[i] = STACK_INVALID;
1219 
1220 	mark_stack_slot_scratched(env, spi);
1221 	return 0;
1222 }
1223 
is_irq_flag_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1224 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1225 {
1226 	struct bpf_func_state *state = func(env, reg);
1227 	struct bpf_stack_state *slot;
1228 	int spi, i;
1229 
1230 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1231 	 * will do check_mem_access to check and update stack bounds later, so
1232 	 * return true for that case.
1233 	 */
1234 	spi = irq_flag_get_spi(env, reg);
1235 	if (spi == -ERANGE)
1236 		return true;
1237 	if (spi < 0)
1238 		return false;
1239 
1240 	slot = &state->stack[spi];
1241 
1242 	for (i = 0; i < BPF_REG_SIZE; i++)
1243 		if (slot->slot_type[i] == STACK_IRQ_FLAG)
1244 			return false;
1245 	return true;
1246 }
1247 
is_irq_flag_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1248 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1249 {
1250 	struct bpf_func_state *state = func(env, reg);
1251 	struct bpf_stack_state *slot;
1252 	struct bpf_reg_state *st;
1253 	int spi, i;
1254 
1255 	spi = irq_flag_get_spi(env, reg);
1256 	if (spi < 0)
1257 		return -EINVAL;
1258 
1259 	slot = &state->stack[spi];
1260 	st = &slot->spilled_ptr;
1261 
1262 	if (!st->ref_obj_id)
1263 		return -EINVAL;
1264 
1265 	for (i = 0; i < BPF_REG_SIZE; i++)
1266 		if (slot->slot_type[i] != STACK_IRQ_FLAG)
1267 			return -EINVAL;
1268 	return 0;
1269 }
1270 
1271 /* Check if given stack slot is "special":
1272  *   - spilled register state (STACK_SPILL);
1273  *   - dynptr state (STACK_DYNPTR);
1274  *   - iter state (STACK_ITER).
1275  *   - irq flag state (STACK_IRQ_FLAG)
1276  */
is_stack_slot_special(const struct bpf_stack_state * stack)1277 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1278 {
1279 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1280 
1281 	switch (type) {
1282 	case STACK_SPILL:
1283 	case STACK_DYNPTR:
1284 	case STACK_ITER:
1285 	case STACK_IRQ_FLAG:
1286 		return true;
1287 	case STACK_INVALID:
1288 	case STACK_MISC:
1289 	case STACK_ZERO:
1290 		return false;
1291 	default:
1292 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1293 		return true;
1294 	}
1295 }
1296 
1297 /* The reg state of a pointer or a bounded scalar was saved when
1298  * it was spilled to the stack.
1299  */
is_spilled_reg(const struct bpf_stack_state * stack)1300 static bool is_spilled_reg(const struct bpf_stack_state *stack)
1301 {
1302 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1303 }
1304 
is_spilled_scalar_reg(const struct bpf_stack_state * stack)1305 static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1306 {
1307 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
1308 	       stack->spilled_ptr.type == SCALAR_VALUE;
1309 }
1310 
is_spilled_scalar_reg64(const struct bpf_stack_state * stack)1311 static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
1312 {
1313 	return stack->slot_type[0] == STACK_SPILL &&
1314 	       stack->spilled_ptr.type == SCALAR_VALUE;
1315 }
1316 
1317 /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
1318  * case they are equivalent, or it's STACK_ZERO, in which case we preserve
1319  * more precise STACK_ZERO.
1320  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1321  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1322  * unnecessary as both are considered equivalent when loading data and pruning,
1323  * in case of unprivileged mode it will be incorrect to allow reads of invalid
1324  * slots.
1325  */
mark_stack_slot_misc(struct bpf_verifier_env * env,u8 * stype)1326 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1327 {
1328 	if (*stype == STACK_ZERO)
1329 		return;
1330 	if (*stype == STACK_INVALID)
1331 		return;
1332 	*stype = STACK_MISC;
1333 }
1334 
scrub_spilled_slot(u8 * stype)1335 static void scrub_spilled_slot(u8 *stype)
1336 {
1337 	if (*stype != STACK_INVALID)
1338 		*stype = STACK_MISC;
1339 }
1340 
1341 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1342  * small to hold src. This is different from krealloc since we don't want to preserve
1343  * the contents of dst.
1344  *
1345  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1346  * not be allocated.
1347  */
copy_array(void * dst,const void * src,size_t n,size_t size,gfp_t flags)1348 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1349 {
1350 	size_t alloc_bytes;
1351 	void *orig = dst;
1352 	size_t bytes;
1353 
1354 	if (ZERO_OR_NULL_PTR(src))
1355 		goto out;
1356 
1357 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1358 		return NULL;
1359 
1360 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1361 	dst = krealloc(orig, alloc_bytes, flags);
1362 	if (!dst) {
1363 		kfree(orig);
1364 		return NULL;
1365 	}
1366 
1367 	memcpy(dst, src, bytes);
1368 out:
1369 	return dst ? dst : ZERO_SIZE_PTR;
1370 }
1371 
1372 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1373  * small to hold new_n items. new items are zeroed out if the array grows.
1374  *
1375  * Contrary to krealloc_array, does not free arr if new_n is zero.
1376  */
realloc_array(void * arr,size_t old_n,size_t new_n,size_t size)1377 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1378 {
1379 	size_t alloc_size;
1380 	void *new_arr;
1381 
1382 	if (!new_n || old_n == new_n)
1383 		goto out;
1384 
1385 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1386 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1387 	if (!new_arr) {
1388 		kfree(arr);
1389 		return NULL;
1390 	}
1391 	arr = new_arr;
1392 
1393 	if (new_n > old_n)
1394 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1395 
1396 out:
1397 	return arr ? arr : ZERO_SIZE_PTR;
1398 }
1399 
copy_reference_state(struct bpf_verifier_state * dst,const struct bpf_verifier_state * src)1400 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1401 {
1402 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1403 			       sizeof(struct bpf_reference_state), GFP_KERNEL);
1404 	if (!dst->refs)
1405 		return -ENOMEM;
1406 
1407 	dst->acquired_refs = src->acquired_refs;
1408 	dst->active_locks = src->active_locks;
1409 	dst->active_preempt_locks = src->active_preempt_locks;
1410 	dst->active_rcu_lock = src->active_rcu_lock;
1411 	dst->active_irq_id = src->active_irq_id;
1412 	return 0;
1413 }
1414 
copy_stack_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1415 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1416 {
1417 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1418 
1419 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1420 				GFP_KERNEL);
1421 	if (!dst->stack)
1422 		return -ENOMEM;
1423 
1424 	dst->allocated_stack = src->allocated_stack;
1425 	return 0;
1426 }
1427 
resize_reference_state(struct bpf_verifier_state * state,size_t n)1428 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1429 {
1430 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1431 				    sizeof(struct bpf_reference_state));
1432 	if (!state->refs)
1433 		return -ENOMEM;
1434 
1435 	state->acquired_refs = n;
1436 	return 0;
1437 }
1438 
1439 /* Possibly update state->allocated_stack to be at least size bytes. Also
1440  * possibly update the function's high-water mark in its bpf_subprog_info.
1441  */
grow_stack_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int size)1442 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1443 {
1444 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1445 
1446 	/* The stack size is always a multiple of BPF_REG_SIZE. */
1447 	size = round_up(size, BPF_REG_SIZE);
1448 	n = size / BPF_REG_SIZE;
1449 
1450 	if (old_n >= n)
1451 		return 0;
1452 
1453 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1454 	if (!state->stack)
1455 		return -ENOMEM;
1456 
1457 	state->allocated_stack = size;
1458 
1459 	/* update known max for given subprogram */
1460 	if (env->subprog_info[state->subprogno].stack_depth < size)
1461 		env->subprog_info[state->subprogno].stack_depth = size;
1462 
1463 	return 0;
1464 }
1465 
1466 /* Acquire a pointer id from the env and update the state->refs to include
1467  * this new pointer reference.
1468  * On success, returns a valid pointer id to associate with the register
1469  * On failure, returns a negative errno.
1470  */
acquire_reference_state(struct bpf_verifier_env * env,int insn_idx)1471 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1472 {
1473 	struct bpf_verifier_state *state = env->cur_state;
1474 	int new_ofs = state->acquired_refs;
1475 	int err;
1476 
1477 	err = resize_reference_state(state, state->acquired_refs + 1);
1478 	if (err)
1479 		return NULL;
1480 	state->refs[new_ofs].insn_idx = insn_idx;
1481 
1482 	return &state->refs[new_ofs];
1483 }
1484 
acquire_reference(struct bpf_verifier_env * env,int insn_idx)1485 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
1486 {
1487 	struct bpf_reference_state *s;
1488 
1489 	s = acquire_reference_state(env, insn_idx);
1490 	if (!s)
1491 		return -ENOMEM;
1492 	s->type = REF_TYPE_PTR;
1493 	s->id = ++env->id_gen;
1494 	return s->id;
1495 }
1496 
acquire_lock_state(struct bpf_verifier_env * env,int insn_idx,enum ref_state_type type,int id,void * ptr)1497 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1498 			      int id, void *ptr)
1499 {
1500 	struct bpf_verifier_state *state = env->cur_state;
1501 	struct bpf_reference_state *s;
1502 
1503 	s = acquire_reference_state(env, insn_idx);
1504 	if (!s)
1505 		return -ENOMEM;
1506 	s->type = type;
1507 	s->id = id;
1508 	s->ptr = ptr;
1509 
1510 	state->active_locks++;
1511 	return 0;
1512 }
1513 
acquire_irq_state(struct bpf_verifier_env * env,int insn_idx)1514 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1515 {
1516 	struct bpf_verifier_state *state = env->cur_state;
1517 	struct bpf_reference_state *s;
1518 
1519 	s = acquire_reference_state(env, insn_idx);
1520 	if (!s)
1521 		return -ENOMEM;
1522 	s->type = REF_TYPE_IRQ;
1523 	s->id = ++env->id_gen;
1524 
1525 	state->active_irq_id = s->id;
1526 	return s->id;
1527 }
1528 
release_reference_state(struct bpf_verifier_state * state,int idx)1529 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1530 {
1531 	int last_idx;
1532 	size_t rem;
1533 
1534 	/* IRQ state requires the relative ordering of elements remaining the
1535 	 * same, since it relies on the refs array to behave as a stack, so that
1536 	 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1537 	 * the array instead of swapping the final element into the deleted idx.
1538 	 */
1539 	last_idx = state->acquired_refs - 1;
1540 	rem = state->acquired_refs - idx - 1;
1541 	if (last_idx && idx != last_idx)
1542 		memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1543 	memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1544 	state->acquired_refs--;
1545 	return;
1546 }
1547 
release_lock_state(struct bpf_verifier_state * state,int type,int id,void * ptr)1548 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1549 {
1550 	int i;
1551 
1552 	for (i = 0; i < state->acquired_refs; i++) {
1553 		if (state->refs[i].type != type)
1554 			continue;
1555 		if (state->refs[i].id == id && state->refs[i].ptr == ptr) {
1556 			release_reference_state(state, i);
1557 			state->active_locks--;
1558 			return 0;
1559 		}
1560 	}
1561 	return -EINVAL;
1562 }
1563 
release_irq_state(struct bpf_verifier_state * state,int id)1564 static int release_irq_state(struct bpf_verifier_state *state, int id)
1565 {
1566 	u32 prev_id = 0;
1567 	int i;
1568 
1569 	if (id != state->active_irq_id)
1570 		return -EACCES;
1571 
1572 	for (i = 0; i < state->acquired_refs; i++) {
1573 		if (state->refs[i].type != REF_TYPE_IRQ)
1574 			continue;
1575 		if (state->refs[i].id == id) {
1576 			release_reference_state(state, i);
1577 			state->active_irq_id = prev_id;
1578 			return 0;
1579 		} else {
1580 			prev_id = state->refs[i].id;
1581 		}
1582 	}
1583 	return -EINVAL;
1584 }
1585 
find_lock_state(struct bpf_verifier_state * state,enum ref_state_type type,int id,void * ptr)1586 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1587 						   int id, void *ptr)
1588 {
1589 	int i;
1590 
1591 	for (i = 0; i < state->acquired_refs; i++) {
1592 		struct bpf_reference_state *s = &state->refs[i];
1593 
1594 		if (s->type != type)
1595 			continue;
1596 
1597 		if (s->id == id && s->ptr == ptr)
1598 			return s;
1599 	}
1600 	return NULL;
1601 }
1602 
free_func_state(struct bpf_func_state * state)1603 static void free_func_state(struct bpf_func_state *state)
1604 {
1605 	if (!state)
1606 		return;
1607 	kfree(state->stack);
1608 	kfree(state);
1609 }
1610 
free_verifier_state(struct bpf_verifier_state * state,bool free_self)1611 static void free_verifier_state(struct bpf_verifier_state *state,
1612 				bool free_self)
1613 {
1614 	int i;
1615 
1616 	for (i = 0; i <= state->curframe; i++) {
1617 		free_func_state(state->frame[i]);
1618 		state->frame[i] = NULL;
1619 	}
1620 	kfree(state->refs);
1621 	if (free_self)
1622 		kfree(state);
1623 }
1624 
1625 /* copy verifier state from src to dst growing dst stack space
1626  * when necessary to accommodate larger src stack
1627  */
copy_func_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1628 static int copy_func_state(struct bpf_func_state *dst,
1629 			   const struct bpf_func_state *src)
1630 {
1631 	memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1632 	return copy_stack_state(dst, src);
1633 }
1634 
copy_verifier_state(struct bpf_verifier_state * dst_state,const struct bpf_verifier_state * src)1635 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1636 			       const struct bpf_verifier_state *src)
1637 {
1638 	struct bpf_func_state *dst;
1639 	int i, err;
1640 
1641 	/* if dst has more stack frames then src frame, free them, this is also
1642 	 * necessary in case of exceptional exits using bpf_throw.
1643 	 */
1644 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1645 		free_func_state(dst_state->frame[i]);
1646 		dst_state->frame[i] = NULL;
1647 	}
1648 	err = copy_reference_state(dst_state, src);
1649 	if (err)
1650 		return err;
1651 	dst_state->speculative = src->speculative;
1652 	dst_state->in_sleepable = src->in_sleepable;
1653 	dst_state->curframe = src->curframe;
1654 	dst_state->branches = src->branches;
1655 	dst_state->parent = src->parent;
1656 	dst_state->first_insn_idx = src->first_insn_idx;
1657 	dst_state->last_insn_idx = src->last_insn_idx;
1658 	dst_state->insn_hist_start = src->insn_hist_start;
1659 	dst_state->insn_hist_end = src->insn_hist_end;
1660 	dst_state->dfs_depth = src->dfs_depth;
1661 	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1662 	dst_state->used_as_loop_entry = src->used_as_loop_entry;
1663 	dst_state->may_goto_depth = src->may_goto_depth;
1664 	for (i = 0; i <= src->curframe; i++) {
1665 		dst = dst_state->frame[i];
1666 		if (!dst) {
1667 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1668 			if (!dst)
1669 				return -ENOMEM;
1670 			dst_state->frame[i] = dst;
1671 		}
1672 		err = copy_func_state(dst, src->frame[i]);
1673 		if (err)
1674 			return err;
1675 	}
1676 	return 0;
1677 }
1678 
state_htab_size(struct bpf_verifier_env * env)1679 static u32 state_htab_size(struct bpf_verifier_env *env)
1680 {
1681 	return env->prog->len;
1682 }
1683 
explored_state(struct bpf_verifier_env * env,int idx)1684 static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
1685 {
1686 	struct bpf_verifier_state *cur = env->cur_state;
1687 	struct bpf_func_state *state = cur->frame[cur->curframe];
1688 
1689 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1690 }
1691 
same_callsites(struct bpf_verifier_state * a,struct bpf_verifier_state * b)1692 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1693 {
1694 	int fr;
1695 
1696 	if (a->curframe != b->curframe)
1697 		return false;
1698 
1699 	for (fr = a->curframe; fr >= 0; fr--)
1700 		if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1701 			return false;
1702 
1703 	return true;
1704 }
1705 
1706 /* Open coded iterators allow back-edges in the state graph in order to
1707  * check unbounded loops that iterators.
1708  *
1709  * In is_state_visited() it is necessary to know if explored states are
1710  * part of some loops in order to decide whether non-exact states
1711  * comparison could be used:
1712  * - non-exact states comparison establishes sub-state relation and uses
1713  *   read and precision marks to do so, these marks are propagated from
1714  *   children states and thus are not guaranteed to be final in a loop;
1715  * - exact states comparison just checks if current and explored states
1716  *   are identical (and thus form a back-edge).
1717  *
1718  * Paper "A New Algorithm for Identifying Loops in Decompilation"
1719  * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
1720  * algorithm for loop structure detection and gives an overview of
1721  * relevant terminology. It also has helpful illustrations.
1722  *
1723  * [1] https://api.semanticscholar.org/CorpusID:15784067
1724  *
1725  * We use a similar algorithm but because loop nested structure is
1726  * irrelevant for verifier ours is significantly simpler and resembles
1727  * strongly connected components algorithm from Sedgewick's textbook.
1728  *
1729  * Define topmost loop entry as a first node of the loop traversed in a
1730  * depth first search starting from initial state. The goal of the loop
1731  * tracking algorithm is to associate topmost loop entries with states
1732  * derived from these entries.
1733  *
1734  * For each step in the DFS states traversal algorithm needs to identify
1735  * the following situations:
1736  *
1737  *          initial                     initial                   initial
1738  *            |                           |                         |
1739  *            V                           V                         V
1740  *           ...                         ...           .---------> hdr
1741  *            |                           |            |            |
1742  *            V                           V            |            V
1743  *           cur                     .-> succ          |    .------...
1744  *            |                      |    |            |    |       |
1745  *            V                      |    V            |    V       V
1746  *           succ                    '-- cur           |   ...     ...
1747  *                                                     |    |       |
1748  *                                                     |    V       V
1749  *                                                     |   succ <- cur
1750  *                                                     |    |
1751  *                                                     |    V
1752  *                                                     |   ...
1753  *                                                     |    |
1754  *                                                     '----'
1755  *
1756  *  (A) successor state of cur   (B) successor state of cur or it's entry
1757  *      not yet traversed            are in current DFS path, thus cur and succ
1758  *                                   are members of the same outermost loop
1759  *
1760  *                      initial                  initial
1761  *                        |                        |
1762  *                        V                        V
1763  *                       ...                      ...
1764  *                        |                        |
1765  *                        V                        V
1766  *                .------...               .------...
1767  *                |       |                |       |
1768  *                V       V                V       V
1769  *           .-> hdr     ...              ...     ...
1770  *           |    |       |                |       |
1771  *           |    V       V                V       V
1772  *           |   succ <- cur              succ <- cur
1773  *           |    |                        |
1774  *           |    V                        V
1775  *           |   ...                      ...
1776  *           |    |                        |
1777  *           '----'                       exit
1778  *
1779  * (C) successor state of cur is a part of some loop but this loop
1780  *     does not include cur or successor state is not in a loop at all.
1781  *
1782  * Algorithm could be described as the following python code:
1783  *
1784  *     traversed = set()   # Set of traversed nodes
1785  *     entries = {}        # Mapping from node to loop entry
1786  *     depths = {}         # Depth level assigned to graph node
1787  *     path = set()        # Current DFS path
1788  *
1789  *     # Find outermost loop entry known for n
1790  *     def get_loop_entry(n):
1791  *         h = entries.get(n, None)
1792  *         while h in entries and entries[h] != h:
1793  *             h = entries[h]
1794  *         return h
1795  *
1796  *     # Update n's loop entry if h's outermost entry comes
1797  *     # before n's outermost entry in current DFS path.
1798  *     def update_loop_entry(n, h):
1799  *         n1 = get_loop_entry(n) or n
1800  *         h1 = get_loop_entry(h) or h
1801  *         if h1 in path and depths[h1] <= depths[n1]:
1802  *             entries[n] = h1
1803  *
1804  *     def dfs(n, depth):
1805  *         traversed.add(n)
1806  *         path.add(n)
1807  *         depths[n] = depth
1808  *         for succ in G.successors(n):
1809  *             if succ not in traversed:
1810  *                 # Case A: explore succ and update cur's loop entry
1811  *                 #         only if succ's entry is in current DFS path.
1812  *                 dfs(succ, depth + 1)
1813  *                 h = get_loop_entry(succ)
1814  *                 update_loop_entry(n, h)
1815  *             else:
1816  *                 # Case B or C depending on `h1 in path` check in update_loop_entry().
1817  *                 update_loop_entry(n, succ)
1818  *         path.remove(n)
1819  *
1820  * To adapt this algorithm for use with verifier:
1821  * - use st->branch == 0 as a signal that DFS of succ had been finished
1822  *   and cur's loop entry has to be updated (case A), handle this in
1823  *   update_branch_counts();
1824  * - use st->branch > 0 as a signal that st is in the current DFS path;
1825  * - handle cases B and C in is_state_visited();
1826  * - update topmost loop entry for intermediate states in get_loop_entry().
1827  */
get_loop_entry(struct bpf_verifier_state * st)1828 static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st)
1829 {
1830 	struct bpf_verifier_state *topmost = st->loop_entry, *old;
1831 
1832 	while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
1833 		topmost = topmost->loop_entry;
1834 	/* Update loop entries for intermediate states to avoid this
1835 	 * traversal in future get_loop_entry() calls.
1836 	 */
1837 	while (st && st->loop_entry != topmost) {
1838 		old = st->loop_entry;
1839 		st->loop_entry = topmost;
1840 		st = old;
1841 	}
1842 	return topmost;
1843 }
1844 
update_loop_entry(struct bpf_verifier_state * cur,struct bpf_verifier_state * hdr)1845 static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
1846 {
1847 	struct bpf_verifier_state *cur1, *hdr1;
1848 
1849 	cur1 = get_loop_entry(cur) ?: cur;
1850 	hdr1 = get_loop_entry(hdr) ?: hdr;
1851 	/* The head1->branches check decides between cases B and C in
1852 	 * comment for get_loop_entry(). If hdr1->branches == 0 then
1853 	 * head's topmost loop entry is not in current DFS path,
1854 	 * hence 'cur' and 'hdr' are not in the same loop and there is
1855 	 * no need to update cur->loop_entry.
1856 	 */
1857 	if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
1858 		cur->loop_entry = hdr;
1859 		hdr->used_as_loop_entry = true;
1860 	}
1861 }
1862 
update_branch_counts(struct bpf_verifier_env * env,struct bpf_verifier_state * st)1863 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1864 {
1865 	while (st) {
1866 		u32 br = --st->branches;
1867 
1868 		/* br == 0 signals that DFS exploration for 'st' is finished,
1869 		 * thus it is necessary to update parent's loop entry if it
1870 		 * turned out that st is a part of some loop.
1871 		 * This is a part of 'case A' in get_loop_entry() comment.
1872 		 */
1873 		if (br == 0 && st->parent && st->loop_entry)
1874 			update_loop_entry(st->parent, st->loop_entry);
1875 
1876 		/* WARN_ON(br > 1) technically makes sense here,
1877 		 * but see comment in push_stack(), hence:
1878 		 */
1879 		WARN_ONCE((int)br < 0,
1880 			  "BUG update_branch_counts:branches_to_explore=%d\n",
1881 			  br);
1882 		if (br)
1883 			break;
1884 		st = st->parent;
1885 	}
1886 }
1887 
pop_stack(struct bpf_verifier_env * env,int * prev_insn_idx,int * insn_idx,bool pop_log)1888 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1889 		     int *insn_idx, bool pop_log)
1890 {
1891 	struct bpf_verifier_state *cur = env->cur_state;
1892 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1893 	int err;
1894 
1895 	if (env->head == NULL)
1896 		return -ENOENT;
1897 
1898 	if (cur) {
1899 		err = copy_verifier_state(cur, &head->st);
1900 		if (err)
1901 			return err;
1902 	}
1903 	if (pop_log)
1904 		bpf_vlog_reset(&env->log, head->log_pos);
1905 	if (insn_idx)
1906 		*insn_idx = head->insn_idx;
1907 	if (prev_insn_idx)
1908 		*prev_insn_idx = head->prev_insn_idx;
1909 	elem = head->next;
1910 	free_verifier_state(&head->st, false);
1911 	kfree(head);
1912 	env->head = elem;
1913 	env->stack_size--;
1914 	return 0;
1915 }
1916 
push_stack(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,bool speculative)1917 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1918 					     int insn_idx, int prev_insn_idx,
1919 					     bool speculative)
1920 {
1921 	struct bpf_verifier_state *cur = env->cur_state;
1922 	struct bpf_verifier_stack_elem *elem;
1923 	int err;
1924 
1925 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1926 	if (!elem)
1927 		goto err;
1928 
1929 	elem->insn_idx = insn_idx;
1930 	elem->prev_insn_idx = prev_insn_idx;
1931 	elem->next = env->head;
1932 	elem->log_pos = env->log.end_pos;
1933 	env->head = elem;
1934 	env->stack_size++;
1935 	err = copy_verifier_state(&elem->st, cur);
1936 	if (err)
1937 		goto err;
1938 	elem->st.speculative |= speculative;
1939 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1940 		verbose(env, "The sequence of %d jumps is too complex.\n",
1941 			env->stack_size);
1942 		goto err;
1943 	}
1944 	if (elem->st.parent) {
1945 		++elem->st.parent->branches;
1946 		/* WARN_ON(branches > 2) technically makes sense here,
1947 		 * but
1948 		 * 1. speculative states will bump 'branches' for non-branch
1949 		 * instructions
1950 		 * 2. is_state_visited() heuristics may decide not to create
1951 		 * a new state for a sequence of branches and all such current
1952 		 * and cloned states will be pointing to a single parent state
1953 		 * which might have large 'branches' count.
1954 		 */
1955 	}
1956 	return &elem->st;
1957 err:
1958 	free_verifier_state(env->cur_state, true);
1959 	env->cur_state = NULL;
1960 	/* pop all elements and return */
1961 	while (!pop_stack(env, NULL, NULL, false));
1962 	return NULL;
1963 }
1964 
1965 #define CALLER_SAVED_REGS 6
1966 static const int caller_saved[CALLER_SAVED_REGS] = {
1967 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1968 };
1969 
1970 /* This helper doesn't clear reg->id */
___mark_reg_known(struct bpf_reg_state * reg,u64 imm)1971 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1972 {
1973 	reg->var_off = tnum_const(imm);
1974 	reg->smin_value = (s64)imm;
1975 	reg->smax_value = (s64)imm;
1976 	reg->umin_value = imm;
1977 	reg->umax_value = imm;
1978 
1979 	reg->s32_min_value = (s32)imm;
1980 	reg->s32_max_value = (s32)imm;
1981 	reg->u32_min_value = (u32)imm;
1982 	reg->u32_max_value = (u32)imm;
1983 }
1984 
1985 /* Mark the unknown part of a register (variable offset or scalar value) as
1986  * known to have the value @imm.
1987  */
__mark_reg_known(struct bpf_reg_state * reg,u64 imm)1988 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1989 {
1990 	/* Clear off and union(map_ptr, range) */
1991 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1992 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1993 	reg->id = 0;
1994 	reg->ref_obj_id = 0;
1995 	___mark_reg_known(reg, imm);
1996 }
1997 
__mark_reg32_known(struct bpf_reg_state * reg,u64 imm)1998 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1999 {
2000 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
2001 	reg->s32_min_value = (s32)imm;
2002 	reg->s32_max_value = (s32)imm;
2003 	reg->u32_min_value = (u32)imm;
2004 	reg->u32_max_value = (u32)imm;
2005 }
2006 
2007 /* Mark the 'variable offset' part of a register as zero.  This should be
2008  * used only on registers holding a pointer type.
2009  */
__mark_reg_known_zero(struct bpf_reg_state * reg)2010 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
2011 {
2012 	__mark_reg_known(reg, 0);
2013 }
2014 
__mark_reg_const_zero(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2015 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
2016 {
2017 	__mark_reg_known(reg, 0);
2018 	reg->type = SCALAR_VALUE;
2019 	/* all scalars are assumed imprecise initially (unless unprivileged,
2020 	 * in which case everything is forced to be precise)
2021 	 */
2022 	reg->precise = !env->bpf_capable;
2023 }
2024 
mark_reg_known_zero(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2025 static void mark_reg_known_zero(struct bpf_verifier_env *env,
2026 				struct bpf_reg_state *regs, u32 regno)
2027 {
2028 	if (WARN_ON(regno >= MAX_BPF_REG)) {
2029 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
2030 		/* Something bad happened, let's kill all regs */
2031 		for (regno = 0; regno < MAX_BPF_REG; regno++)
2032 			__mark_reg_not_init(env, regs + regno);
2033 		return;
2034 	}
2035 	__mark_reg_known_zero(regs + regno);
2036 }
2037 
__mark_dynptr_reg(struct bpf_reg_state * reg,enum bpf_dynptr_type type,bool first_slot,int dynptr_id)2038 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
2039 			      bool first_slot, int dynptr_id)
2040 {
2041 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
2042 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
2043 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
2044 	 */
2045 	__mark_reg_known_zero(reg);
2046 	reg->type = CONST_PTR_TO_DYNPTR;
2047 	/* Give each dynptr a unique id to uniquely associate slices to it. */
2048 	reg->id = dynptr_id;
2049 	reg->dynptr.type = type;
2050 	reg->dynptr.first_slot = first_slot;
2051 }
2052 
mark_ptr_not_null_reg(struct bpf_reg_state * reg)2053 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
2054 {
2055 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
2056 		const struct bpf_map *map = reg->map_ptr;
2057 
2058 		if (map->inner_map_meta) {
2059 			reg->type = CONST_PTR_TO_MAP;
2060 			reg->map_ptr = map->inner_map_meta;
2061 			/* transfer reg's id which is unique for every map_lookup_elem
2062 			 * as UID of the inner map.
2063 			 */
2064 			if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
2065 				reg->map_uid = reg->id;
2066 			if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE))
2067 				reg->map_uid = reg->id;
2068 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
2069 			reg->type = PTR_TO_XDP_SOCK;
2070 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
2071 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
2072 			reg->type = PTR_TO_SOCKET;
2073 		} else {
2074 			reg->type = PTR_TO_MAP_VALUE;
2075 		}
2076 		return;
2077 	}
2078 
2079 	reg->type &= ~PTR_MAYBE_NULL;
2080 }
2081 
mark_reg_graph_node(struct bpf_reg_state * regs,u32 regno,struct btf_field_graph_root * ds_head)2082 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
2083 				struct btf_field_graph_root *ds_head)
2084 {
2085 	__mark_reg_known_zero(&regs[regno]);
2086 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
2087 	regs[regno].btf = ds_head->btf;
2088 	regs[regno].btf_id = ds_head->value_btf_id;
2089 	regs[regno].off = ds_head->node_offset;
2090 }
2091 
reg_is_pkt_pointer(const struct bpf_reg_state * reg)2092 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
2093 {
2094 	return type_is_pkt_pointer(reg->type);
2095 }
2096 
reg_is_pkt_pointer_any(const struct bpf_reg_state * reg)2097 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
2098 {
2099 	return reg_is_pkt_pointer(reg) ||
2100 	       reg->type == PTR_TO_PACKET_END;
2101 }
2102 
reg_is_dynptr_slice_pkt(const struct bpf_reg_state * reg)2103 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
2104 {
2105 	return base_type(reg->type) == PTR_TO_MEM &&
2106 		(reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
2107 }
2108 
2109 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state * reg,enum bpf_reg_type which)2110 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
2111 				    enum bpf_reg_type which)
2112 {
2113 	/* The register can already have a range from prior markings.
2114 	 * This is fine as long as it hasn't been advanced from its
2115 	 * origin.
2116 	 */
2117 	return reg->type == which &&
2118 	       reg->id == 0 &&
2119 	       reg->off == 0 &&
2120 	       tnum_equals_const(reg->var_off, 0);
2121 }
2122 
2123 /* Reset the min/max bounds of a register */
__mark_reg_unbounded(struct bpf_reg_state * reg)2124 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
2125 {
2126 	reg->smin_value = S64_MIN;
2127 	reg->smax_value = S64_MAX;
2128 	reg->umin_value = 0;
2129 	reg->umax_value = U64_MAX;
2130 
2131 	reg->s32_min_value = S32_MIN;
2132 	reg->s32_max_value = S32_MAX;
2133 	reg->u32_min_value = 0;
2134 	reg->u32_max_value = U32_MAX;
2135 }
2136 
__mark_reg64_unbounded(struct bpf_reg_state * reg)2137 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
2138 {
2139 	reg->smin_value = S64_MIN;
2140 	reg->smax_value = S64_MAX;
2141 	reg->umin_value = 0;
2142 	reg->umax_value = U64_MAX;
2143 }
2144 
__mark_reg32_unbounded(struct bpf_reg_state * reg)2145 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
2146 {
2147 	reg->s32_min_value = S32_MIN;
2148 	reg->s32_max_value = S32_MAX;
2149 	reg->u32_min_value = 0;
2150 	reg->u32_max_value = U32_MAX;
2151 }
2152 
__update_reg32_bounds(struct bpf_reg_state * reg)2153 static void __update_reg32_bounds(struct bpf_reg_state *reg)
2154 {
2155 	struct tnum var32_off = tnum_subreg(reg->var_off);
2156 
2157 	/* min signed is max(sign bit) | min(other bits) */
2158 	reg->s32_min_value = max_t(s32, reg->s32_min_value,
2159 			var32_off.value | (var32_off.mask & S32_MIN));
2160 	/* max signed is min(sign bit) | max(other bits) */
2161 	reg->s32_max_value = min_t(s32, reg->s32_max_value,
2162 			var32_off.value | (var32_off.mask & S32_MAX));
2163 	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
2164 	reg->u32_max_value = min(reg->u32_max_value,
2165 				 (u32)(var32_off.value | var32_off.mask));
2166 }
2167 
__update_reg64_bounds(struct bpf_reg_state * reg)2168 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2169 {
2170 	/* min signed is max(sign bit) | min(other bits) */
2171 	reg->smin_value = max_t(s64, reg->smin_value,
2172 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
2173 	/* max signed is min(sign bit) | max(other bits) */
2174 	reg->smax_value = min_t(s64, reg->smax_value,
2175 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
2176 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
2177 	reg->umax_value = min(reg->umax_value,
2178 			      reg->var_off.value | reg->var_off.mask);
2179 }
2180 
__update_reg_bounds(struct bpf_reg_state * reg)2181 static void __update_reg_bounds(struct bpf_reg_state *reg)
2182 {
2183 	__update_reg32_bounds(reg);
2184 	__update_reg64_bounds(reg);
2185 }
2186 
2187 /* Uses signed min/max values to inform unsigned, and vice-versa */
__reg32_deduce_bounds(struct bpf_reg_state * reg)2188 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
2189 {
2190 	/* If upper 32 bits of u64/s64 range don't change, we can use lower 32
2191 	 * bits to improve our u32/s32 boundaries.
2192 	 *
2193 	 * E.g., the case where we have upper 32 bits as zero ([10, 20] in
2194 	 * u64) is pretty trivial, it's obvious that in u32 we'll also have
2195 	 * [10, 20] range. But this property holds for any 64-bit range as
2196 	 * long as upper 32 bits in that entire range of values stay the same.
2197 	 *
2198 	 * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
2199 	 * in decimal) has the same upper 32 bits throughout all the values in
2200 	 * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
2201 	 * range.
2202 	 *
2203 	 * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
2204 	 * following the rules outlined below about u64/s64 correspondence
2205 	 * (which equally applies to u32 vs s32 correspondence). In general it
2206 	 * depends on actual hexadecimal values of 32-bit range. They can form
2207 	 * only valid u32, or only valid s32 ranges in some cases.
2208 	 *
2209 	 * So we use all these insights to derive bounds for subregisters here.
2210 	 */
2211 	if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
2212 		/* u64 to u32 casting preserves validity of low 32 bits as
2213 		 * a range, if upper 32 bits are the same
2214 		 */
2215 		reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
2216 		reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
2217 
2218 		if ((s32)reg->umin_value <= (s32)reg->umax_value) {
2219 			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2220 			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2221 		}
2222 	}
2223 	if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
2224 		/* low 32 bits should form a proper u32 range */
2225 		if ((u32)reg->smin_value <= (u32)reg->smax_value) {
2226 			reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
2227 			reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
2228 		}
2229 		/* low 32 bits should form a proper s32 range */
2230 		if ((s32)reg->smin_value <= (s32)reg->smax_value) {
2231 			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2232 			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2233 		}
2234 	}
2235 	/* Special case where upper bits form a small sequence of two
2236 	 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
2237 	 * 0x00000000 is also valid), while lower bits form a proper s32 range
2238 	 * going from negative numbers to positive numbers. E.g., let's say we
2239 	 * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
2240 	 * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
2241 	 * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
2242 	 * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
2243 	 * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
2244 	 * upper 32 bits. As a random example, s64 range
2245 	 * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
2246 	 * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
2247 	 */
2248 	if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
2249 	    (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
2250 		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2251 		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2252 	}
2253 	if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
2254 	    (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
2255 		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2256 		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2257 	}
2258 	/* if u32 range forms a valid s32 range (due to matching sign bit),
2259 	 * try to learn from that
2260 	 */
2261 	if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
2262 		reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
2263 		reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
2264 	}
2265 	/* If we cannot cross the sign boundary, then signed and unsigned bounds
2266 	 * are the same, so combine.  This works even in the negative case, e.g.
2267 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2268 	 */
2269 	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2270 		reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
2271 		reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
2272 	}
2273 }
2274 
__reg64_deduce_bounds(struct bpf_reg_state * reg)2275 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2276 {
2277 	/* If u64 range forms a valid s64 range (due to matching sign bit),
2278 	 * try to learn from that. Let's do a bit of ASCII art to see when
2279 	 * this is happening. Let's take u64 range first:
2280 	 *
2281 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2282 	 * |-------------------------------|--------------------------------|
2283 	 *
2284 	 * Valid u64 range is formed when umin and umax are anywhere in the
2285 	 * range [0, U64_MAX], and umin <= umax. u64 case is simple and
2286 	 * straightforward. Let's see how s64 range maps onto the same range
2287 	 * of values, annotated below the line for comparison:
2288 	 *
2289 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2290 	 * |-------------------------------|--------------------------------|
2291 	 * 0                        S64_MAX S64_MIN                        -1
2292 	 *
2293 	 * So s64 values basically start in the middle and they are logically
2294 	 * contiguous to the right of it, wrapping around from -1 to 0, and
2295 	 * then finishing as S64_MAX (0x7fffffffffffffff) right before
2296 	 * S64_MIN. We can try drawing the continuity of u64 vs s64 values
2297 	 * more visually as mapped to sign-agnostic range of hex values.
2298 	 *
2299 	 *  u64 start                                               u64 end
2300 	 *  _______________________________________________________________
2301 	 * /                                                               \
2302 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2303 	 * |-------------------------------|--------------------------------|
2304 	 * 0                        S64_MAX S64_MIN                        -1
2305 	 *                                / \
2306 	 * >------------------------------   ------------------------------->
2307 	 * s64 continues...        s64 end   s64 start          s64 "midpoint"
2308 	 *
2309 	 * What this means is that, in general, we can't always derive
2310 	 * something new about u64 from any random s64 range, and vice versa.
2311 	 *
2312 	 * But we can do that in two particular cases. One is when entire
2313 	 * u64/s64 range is *entirely* contained within left half of the above
2314 	 * diagram or when it is *entirely* contained in the right half. I.e.:
2315 	 *
2316 	 * |-------------------------------|--------------------------------|
2317 	 *     ^                   ^            ^                 ^
2318 	 *     A                   B            C                 D
2319 	 *
2320 	 * [A, B] and [C, D] are contained entirely in their respective halves
2321 	 * and form valid contiguous ranges as both u64 and s64 values. [A, B]
2322 	 * will be non-negative both as u64 and s64 (and in fact it will be
2323 	 * identical ranges no matter the signedness). [C, D] treated as s64
2324 	 * will be a range of negative values, while in u64 it will be
2325 	 * non-negative range of values larger than 0x8000000000000000.
2326 	 *
2327 	 * Now, any other range here can't be represented in both u64 and s64
2328 	 * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
2329 	 * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
2330 	 * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
2331 	 * for example. Similarly, valid s64 range [D, A] (going from negative
2332 	 * to positive values), would be two separate [D, U64_MAX] and [0, A]
2333 	 * ranges as u64. Currently reg_state can't represent two segments per
2334 	 * numeric domain, so in such situations we can only derive maximal
2335 	 * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
2336 	 *
2337 	 * So we use these facts to derive umin/umax from smin/smax and vice
2338 	 * versa only if they stay within the same "half". This is equivalent
2339 	 * to checking sign bit: lower half will have sign bit as zero, upper
2340 	 * half have sign bit 1. Below in code we simplify this by just
2341 	 * casting umin/umax as smin/smax and checking if they form valid
2342 	 * range, and vice versa. Those are equivalent checks.
2343 	 */
2344 	if ((s64)reg->umin_value <= (s64)reg->umax_value) {
2345 		reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
2346 		reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
2347 	}
2348 	/* If we cannot cross the sign boundary, then signed and unsigned bounds
2349 	 * are the same, so combine.  This works even in the negative case, e.g.
2350 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2351 	 */
2352 	if ((u64)reg->smin_value <= (u64)reg->smax_value) {
2353 		reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
2354 		reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
2355 	}
2356 }
2357 
__reg_deduce_mixed_bounds(struct bpf_reg_state * reg)2358 static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
2359 {
2360 	/* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
2361 	 * values on both sides of 64-bit range in hope to have tighter range.
2362 	 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
2363 	 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
2364 	 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
2365 	 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
2366 	 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
2367 	 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
2368 	 * We just need to make sure that derived bounds we are intersecting
2369 	 * with are well-formed ranges in respective s64 or u64 domain, just
2370 	 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
2371 	 */
2372 	__u64 new_umin, new_umax;
2373 	__s64 new_smin, new_smax;
2374 
2375 	/* u32 -> u64 tightening, it's always well-formed */
2376 	new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
2377 	new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
2378 	reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2379 	reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2380 	/* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
2381 	new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
2382 	new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
2383 	reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2384 	reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2385 
2386 	/* if s32 can be treated as valid u32 range, we can use it as well */
2387 	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2388 		/* s32 -> u64 tightening */
2389 		new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
2390 		new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
2391 		reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2392 		reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2393 		/* s32 -> s64 tightening */
2394 		new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
2395 		new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
2396 		reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2397 		reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2398 	}
2399 
2400 	/* Here we would like to handle a special case after sign extending load,
2401 	 * when upper bits for a 64-bit range are all 1s or all 0s.
2402 	 *
2403 	 * Upper bits are all 1s when register is in a range:
2404 	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
2405 	 * Upper bits are all 0s when register is in a range:
2406 	 *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
2407 	 * Together this forms are continuous range:
2408 	 *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
2409 	 *
2410 	 * Now, suppose that register range is in fact tighter:
2411 	 *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
2412 	 * Also suppose that it's 32-bit range is positive,
2413 	 * meaning that lower 32-bits of the full 64-bit register
2414 	 * are in the range:
2415 	 *   [0x0000_0000, 0x7fff_ffff] (W)
2416 	 *
2417 	 * If this happens, then any value in a range:
2418 	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
2419 	 * is smaller than a lowest bound of the range (R):
2420 	 *   0xffff_ffff_8000_0000
2421 	 * which means that upper bits of the full 64-bit register
2422 	 * can't be all 1s, when lower bits are in range (W).
2423 	 *
2424 	 * Note that:
2425 	 *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
2426 	 *  - 0x0000_0000_7fff_ffff == (s64)S32_MAX
2427 	 * These relations are used in the conditions below.
2428 	 */
2429 	if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
2430 		reg->smin_value = reg->s32_min_value;
2431 		reg->smax_value = reg->s32_max_value;
2432 		reg->umin_value = reg->s32_min_value;
2433 		reg->umax_value = reg->s32_max_value;
2434 		reg->var_off = tnum_intersect(reg->var_off,
2435 					      tnum_range(reg->smin_value, reg->smax_value));
2436 	}
2437 }
2438 
__reg_deduce_bounds(struct bpf_reg_state * reg)2439 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2440 {
2441 	__reg32_deduce_bounds(reg);
2442 	__reg64_deduce_bounds(reg);
2443 	__reg_deduce_mixed_bounds(reg);
2444 }
2445 
2446 /* Attempts to improve var_off based on unsigned min/max information */
__reg_bound_offset(struct bpf_reg_state * reg)2447 static void __reg_bound_offset(struct bpf_reg_state *reg)
2448 {
2449 	struct tnum var64_off = tnum_intersect(reg->var_off,
2450 					       tnum_range(reg->umin_value,
2451 							  reg->umax_value));
2452 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2453 					       tnum_range(reg->u32_min_value,
2454 							  reg->u32_max_value));
2455 
2456 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2457 }
2458 
reg_bounds_sync(struct bpf_reg_state * reg)2459 static void reg_bounds_sync(struct bpf_reg_state *reg)
2460 {
2461 	/* We might have learned new bounds from the var_off. */
2462 	__update_reg_bounds(reg);
2463 	/* We might have learned something about the sign bit. */
2464 	__reg_deduce_bounds(reg);
2465 	__reg_deduce_bounds(reg);
2466 	/* We might have learned some bits from the bounds. */
2467 	__reg_bound_offset(reg);
2468 	/* Intersecting with the old var_off might have improved our bounds
2469 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2470 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2471 	 */
2472 	__update_reg_bounds(reg);
2473 }
2474 
reg_bounds_sanity_check(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * ctx)2475 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2476 				   struct bpf_reg_state *reg, const char *ctx)
2477 {
2478 	const char *msg;
2479 
2480 	if (reg->umin_value > reg->umax_value ||
2481 	    reg->smin_value > reg->smax_value ||
2482 	    reg->u32_min_value > reg->u32_max_value ||
2483 	    reg->s32_min_value > reg->s32_max_value) {
2484 		    msg = "range bounds violation";
2485 		    goto out;
2486 	}
2487 
2488 	if (tnum_is_const(reg->var_off)) {
2489 		u64 uval = reg->var_off.value;
2490 		s64 sval = (s64)uval;
2491 
2492 		if (reg->umin_value != uval || reg->umax_value != uval ||
2493 		    reg->smin_value != sval || reg->smax_value != sval) {
2494 			msg = "const tnum out of sync with range bounds";
2495 			goto out;
2496 		}
2497 	}
2498 
2499 	if (tnum_subreg_is_const(reg->var_off)) {
2500 		u32 uval32 = tnum_subreg(reg->var_off).value;
2501 		s32 sval32 = (s32)uval32;
2502 
2503 		if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
2504 		    reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
2505 			msg = "const subreg tnum out of sync with range bounds";
2506 			goto out;
2507 		}
2508 	}
2509 
2510 	return 0;
2511 out:
2512 	verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
2513 		"s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
2514 		ctx, msg, reg->umin_value, reg->umax_value,
2515 		reg->smin_value, reg->smax_value,
2516 		reg->u32_min_value, reg->u32_max_value,
2517 		reg->s32_min_value, reg->s32_max_value,
2518 		reg->var_off.value, reg->var_off.mask);
2519 	if (env->test_reg_invariants)
2520 		return -EFAULT;
2521 	__mark_reg_unbounded(reg);
2522 	return 0;
2523 }
2524 
__reg32_bound_s64(s32 a)2525 static bool __reg32_bound_s64(s32 a)
2526 {
2527 	return a >= 0 && a <= S32_MAX;
2528 }
2529 
__reg_assign_32_into_64(struct bpf_reg_state * reg)2530 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2531 {
2532 	reg->umin_value = reg->u32_min_value;
2533 	reg->umax_value = reg->u32_max_value;
2534 
2535 	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2536 	 * be positive otherwise set to worse case bounds and refine later
2537 	 * from tnum.
2538 	 */
2539 	if (__reg32_bound_s64(reg->s32_min_value) &&
2540 	    __reg32_bound_s64(reg->s32_max_value)) {
2541 		reg->smin_value = reg->s32_min_value;
2542 		reg->smax_value = reg->s32_max_value;
2543 	} else {
2544 		reg->smin_value = 0;
2545 		reg->smax_value = U32_MAX;
2546 	}
2547 }
2548 
2549 /* Mark a register as having a completely unknown (scalar) value. */
__mark_reg_unknown_imprecise(struct bpf_reg_state * reg)2550 static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2551 {
2552 	/*
2553 	 * Clear type, off, and union(map_ptr, range) and
2554 	 * padding between 'type' and union
2555 	 */
2556 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2557 	reg->type = SCALAR_VALUE;
2558 	reg->id = 0;
2559 	reg->ref_obj_id = 0;
2560 	reg->var_off = tnum_unknown;
2561 	reg->frameno = 0;
2562 	reg->precise = false;
2563 	__mark_reg_unbounded(reg);
2564 }
2565 
2566 /* Mark a register as having a completely unknown (scalar) value,
2567  * initialize .precise as true when not bpf capable.
2568  */
__mark_reg_unknown(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2569 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2570 			       struct bpf_reg_state *reg)
2571 {
2572 	__mark_reg_unknown_imprecise(reg);
2573 	reg->precise = !env->bpf_capable;
2574 }
2575 
mark_reg_unknown(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2576 static void mark_reg_unknown(struct bpf_verifier_env *env,
2577 			     struct bpf_reg_state *regs, u32 regno)
2578 {
2579 	if (WARN_ON(regno >= MAX_BPF_REG)) {
2580 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2581 		/* Something bad happened, let's kill all regs except FP */
2582 		for (regno = 0; regno < BPF_REG_FP; regno++)
2583 			__mark_reg_not_init(env, regs + regno);
2584 		return;
2585 	}
2586 	__mark_reg_unknown(env, regs + regno);
2587 }
2588 
__mark_reg_s32_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,s32 s32_min,s32 s32_max)2589 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2590 				struct bpf_reg_state *regs,
2591 				u32 regno,
2592 				s32 s32_min,
2593 				s32 s32_max)
2594 {
2595 	struct bpf_reg_state *reg = regs + regno;
2596 
2597 	reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
2598 	reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
2599 
2600 	reg->smin_value = max_t(s64, reg->smin_value, s32_min);
2601 	reg->smax_value = min_t(s64, reg->smax_value, s32_max);
2602 
2603 	reg_bounds_sync(reg);
2604 
2605 	return reg_bounds_sanity_check(env, reg, "s32_range");
2606 }
2607 
__mark_reg_not_init(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2608 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2609 				struct bpf_reg_state *reg)
2610 {
2611 	__mark_reg_unknown(env, reg);
2612 	reg->type = NOT_INIT;
2613 }
2614 
mark_reg_not_init(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2615 static void mark_reg_not_init(struct bpf_verifier_env *env,
2616 			      struct bpf_reg_state *regs, u32 regno)
2617 {
2618 	if (WARN_ON(regno >= MAX_BPF_REG)) {
2619 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2620 		/* Something bad happened, let's kill all regs except FP */
2621 		for (regno = 0; regno < BPF_REG_FP; regno++)
2622 			__mark_reg_not_init(env, regs + regno);
2623 		return;
2624 	}
2625 	__mark_reg_not_init(env, regs + regno);
2626 }
2627 
mark_btf_ld_reg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum bpf_reg_type reg_type,struct btf * btf,u32 btf_id,enum bpf_type_flag flag)2628 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2629 			    struct bpf_reg_state *regs, u32 regno,
2630 			    enum bpf_reg_type reg_type,
2631 			    struct btf *btf, u32 btf_id,
2632 			    enum bpf_type_flag flag)
2633 {
2634 	if (reg_type == SCALAR_VALUE) {
2635 		mark_reg_unknown(env, regs, regno);
2636 		return;
2637 	}
2638 	mark_reg_known_zero(env, regs, regno);
2639 	regs[regno].type = PTR_TO_BTF_ID | flag;
2640 	regs[regno].btf = btf;
2641 	regs[regno].btf_id = btf_id;
2642 	if (type_may_be_null(flag))
2643 		regs[regno].id = ++env->id_gen;
2644 }
2645 
2646 #define DEF_NOT_SUBREG	(0)
init_reg_state(struct bpf_verifier_env * env,struct bpf_func_state * state)2647 static void init_reg_state(struct bpf_verifier_env *env,
2648 			   struct bpf_func_state *state)
2649 {
2650 	struct bpf_reg_state *regs = state->regs;
2651 	int i;
2652 
2653 	for (i = 0; i < MAX_BPF_REG; i++) {
2654 		mark_reg_not_init(env, regs, i);
2655 		regs[i].live = REG_LIVE_NONE;
2656 		regs[i].parent = NULL;
2657 		regs[i].subreg_def = DEF_NOT_SUBREG;
2658 	}
2659 
2660 	/* frame pointer */
2661 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2662 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2663 	regs[BPF_REG_FP].frameno = state->frameno;
2664 }
2665 
retval_range(s32 minval,s32 maxval)2666 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2667 {
2668 	return (struct bpf_retval_range){ minval, maxval };
2669 }
2670 
2671 #define BPF_MAIN_FUNC (-1)
init_func_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int callsite,int frameno,int subprogno)2672 static void init_func_state(struct bpf_verifier_env *env,
2673 			    struct bpf_func_state *state,
2674 			    int callsite, int frameno, int subprogno)
2675 {
2676 	state->callsite = callsite;
2677 	state->frameno = frameno;
2678 	state->subprogno = subprogno;
2679 	state->callback_ret_range = retval_range(0, 0);
2680 	init_reg_state(env, state);
2681 	mark_verifier_state_scratched(env);
2682 }
2683 
2684 /* Similar to push_stack(), but for async callbacks */
push_async_cb(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,int subprog,bool is_sleepable)2685 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2686 						int insn_idx, int prev_insn_idx,
2687 						int subprog, bool is_sleepable)
2688 {
2689 	struct bpf_verifier_stack_elem *elem;
2690 	struct bpf_func_state *frame;
2691 
2692 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2693 	if (!elem)
2694 		goto err;
2695 
2696 	elem->insn_idx = insn_idx;
2697 	elem->prev_insn_idx = prev_insn_idx;
2698 	elem->next = env->head;
2699 	elem->log_pos = env->log.end_pos;
2700 	env->head = elem;
2701 	env->stack_size++;
2702 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2703 		verbose(env,
2704 			"The sequence of %d jumps is too complex for async cb.\n",
2705 			env->stack_size);
2706 		goto err;
2707 	}
2708 	/* Unlike push_stack() do not copy_verifier_state().
2709 	 * The caller state doesn't matter.
2710 	 * This is async callback. It starts in a fresh stack.
2711 	 * Initialize it similar to do_check_common().
2712 	 * But we do need to make sure to not clobber insn_hist, so we keep
2713 	 * chaining insn_hist_start/insn_hist_end indices as for a normal
2714 	 * child state.
2715 	 */
2716 	elem->st.branches = 1;
2717 	elem->st.in_sleepable = is_sleepable;
2718 	elem->st.insn_hist_start = env->cur_state->insn_hist_end;
2719 	elem->st.insn_hist_end = elem->st.insn_hist_start;
2720 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2721 	if (!frame)
2722 		goto err;
2723 	init_func_state(env, frame,
2724 			BPF_MAIN_FUNC /* callsite */,
2725 			0 /* frameno within this callchain */,
2726 			subprog /* subprog number within this prog */);
2727 	elem->st.frame[0] = frame;
2728 	return &elem->st;
2729 err:
2730 	free_verifier_state(env->cur_state, true);
2731 	env->cur_state = NULL;
2732 	/* pop all elements and return */
2733 	while (!pop_stack(env, NULL, NULL, false));
2734 	return NULL;
2735 }
2736 
2737 
2738 enum reg_arg_type {
2739 	SRC_OP,		/* register is used as source operand */
2740 	DST_OP,		/* register is used as destination operand */
2741 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
2742 };
2743 
cmp_subprogs(const void * a,const void * b)2744 static int cmp_subprogs(const void *a, const void *b)
2745 {
2746 	return ((struct bpf_subprog_info *)a)->start -
2747 	       ((struct bpf_subprog_info *)b)->start;
2748 }
2749 
2750 /* Find subprogram that contains instruction at 'off' */
find_containing_subprog(struct bpf_verifier_env * env,int off)2751 static struct bpf_subprog_info *find_containing_subprog(struct bpf_verifier_env *env, int off)
2752 {
2753 	struct bpf_subprog_info *vals = env->subprog_info;
2754 	int l, r, m;
2755 
2756 	if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2757 		return NULL;
2758 
2759 	l = 0;
2760 	r = env->subprog_cnt - 1;
2761 	while (l < r) {
2762 		m = l + (r - l + 1) / 2;
2763 		if (vals[m].start <= off)
2764 			l = m;
2765 		else
2766 			r = m - 1;
2767 	}
2768 	return &vals[l];
2769 }
2770 
2771 /* Find subprogram that starts exactly at 'off' */
find_subprog(struct bpf_verifier_env * env,int off)2772 static int find_subprog(struct bpf_verifier_env *env, int off)
2773 {
2774 	struct bpf_subprog_info *p;
2775 
2776 	p = find_containing_subprog(env, off);
2777 	if (!p || p->start != off)
2778 		return -ENOENT;
2779 	return p - env->subprog_info;
2780 }
2781 
add_subprog(struct bpf_verifier_env * env,int off)2782 static int add_subprog(struct bpf_verifier_env *env, int off)
2783 {
2784 	int insn_cnt = env->prog->len;
2785 	int ret;
2786 
2787 	if (off >= insn_cnt || off < 0) {
2788 		verbose(env, "call to invalid destination\n");
2789 		return -EINVAL;
2790 	}
2791 	ret = find_subprog(env, off);
2792 	if (ret >= 0)
2793 		return ret;
2794 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2795 		verbose(env, "too many subprograms\n");
2796 		return -E2BIG;
2797 	}
2798 	/* determine subprog starts. The end is one before the next starts */
2799 	env->subprog_info[env->subprog_cnt++].start = off;
2800 	sort(env->subprog_info, env->subprog_cnt,
2801 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2802 	return env->subprog_cnt - 1;
2803 }
2804 
bpf_find_exception_callback_insn_off(struct bpf_verifier_env * env)2805 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2806 {
2807 	struct bpf_prog_aux *aux = env->prog->aux;
2808 	struct btf *btf = aux->btf;
2809 	const struct btf_type *t;
2810 	u32 main_btf_id, id;
2811 	const char *name;
2812 	int ret, i;
2813 
2814 	/* Non-zero func_info_cnt implies valid btf */
2815 	if (!aux->func_info_cnt)
2816 		return 0;
2817 	main_btf_id = aux->func_info[0].type_id;
2818 
2819 	t = btf_type_by_id(btf, main_btf_id);
2820 	if (!t) {
2821 		verbose(env, "invalid btf id for main subprog in func_info\n");
2822 		return -EINVAL;
2823 	}
2824 
2825 	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2826 	if (IS_ERR(name)) {
2827 		ret = PTR_ERR(name);
2828 		/* If there is no tag present, there is no exception callback */
2829 		if (ret == -ENOENT)
2830 			ret = 0;
2831 		else if (ret == -EEXIST)
2832 			verbose(env, "multiple exception callback tags for main subprog\n");
2833 		return ret;
2834 	}
2835 
2836 	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2837 	if (ret < 0) {
2838 		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2839 		return ret;
2840 	}
2841 	id = ret;
2842 	t = btf_type_by_id(btf, id);
2843 	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2844 		verbose(env, "exception callback '%s' must have global linkage\n", name);
2845 		return -EINVAL;
2846 	}
2847 	ret = 0;
2848 	for (i = 0; i < aux->func_info_cnt; i++) {
2849 		if (aux->func_info[i].type_id != id)
2850 			continue;
2851 		ret = aux->func_info[i].insn_off;
2852 		/* Further func_info and subprog checks will also happen
2853 		 * later, so assume this is the right insn_off for now.
2854 		 */
2855 		if (!ret) {
2856 			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2857 			ret = -EINVAL;
2858 		}
2859 	}
2860 	if (!ret) {
2861 		verbose(env, "exception callback type id not found in func_info\n");
2862 		ret = -EINVAL;
2863 	}
2864 	return ret;
2865 }
2866 
2867 #define MAX_KFUNC_DESCS 256
2868 #define MAX_KFUNC_BTFS	256
2869 
2870 struct bpf_kfunc_desc {
2871 	struct btf_func_model func_model;
2872 	u32 func_id;
2873 	s32 imm;
2874 	u16 offset;
2875 	unsigned long addr;
2876 };
2877 
2878 struct bpf_kfunc_btf {
2879 	struct btf *btf;
2880 	struct module *module;
2881 	u16 offset;
2882 };
2883 
2884 struct bpf_kfunc_desc_tab {
2885 	/* Sorted by func_id (BTF ID) and offset (fd_array offset) during
2886 	 * verification. JITs do lookups by bpf_insn, where func_id may not be
2887 	 * available, therefore at the end of verification do_misc_fixups()
2888 	 * sorts this by imm and offset.
2889 	 */
2890 	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2891 	u32 nr_descs;
2892 };
2893 
2894 struct bpf_kfunc_btf_tab {
2895 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2896 	u32 nr_descs;
2897 };
2898 
kfunc_desc_cmp_by_id_off(const void * a,const void * b)2899 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2900 {
2901 	const struct bpf_kfunc_desc *d0 = a;
2902 	const struct bpf_kfunc_desc *d1 = b;
2903 
2904 	/* func_id is not greater than BTF_MAX_TYPE */
2905 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2906 }
2907 
kfunc_btf_cmp_by_off(const void * a,const void * b)2908 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2909 {
2910 	const struct bpf_kfunc_btf *d0 = a;
2911 	const struct bpf_kfunc_btf *d1 = b;
2912 
2913 	return d0->offset - d1->offset;
2914 }
2915 
2916 static const struct bpf_kfunc_desc *
find_kfunc_desc(const struct bpf_prog * prog,u32 func_id,u16 offset)2917 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2918 {
2919 	struct bpf_kfunc_desc desc = {
2920 		.func_id = func_id,
2921 		.offset = offset,
2922 	};
2923 	struct bpf_kfunc_desc_tab *tab;
2924 
2925 	tab = prog->aux->kfunc_tab;
2926 	return bsearch(&desc, tab->descs, tab->nr_descs,
2927 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2928 }
2929 
bpf_get_kfunc_addr(const struct bpf_prog * prog,u32 func_id,u16 btf_fd_idx,u8 ** func_addr)2930 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2931 		       u16 btf_fd_idx, u8 **func_addr)
2932 {
2933 	const struct bpf_kfunc_desc *desc;
2934 
2935 	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2936 	if (!desc)
2937 		return -EFAULT;
2938 
2939 	*func_addr = (u8 *)desc->addr;
2940 	return 0;
2941 }
2942 
__find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)2943 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2944 					 s16 offset)
2945 {
2946 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2947 	struct bpf_kfunc_btf_tab *tab;
2948 	struct bpf_kfunc_btf *b;
2949 	struct module *mod;
2950 	struct btf *btf;
2951 	int btf_fd;
2952 
2953 	tab = env->prog->aux->kfunc_btf_tab;
2954 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2955 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2956 	if (!b) {
2957 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2958 			verbose(env, "too many different module BTFs\n");
2959 			return ERR_PTR(-E2BIG);
2960 		}
2961 
2962 		if (bpfptr_is_null(env->fd_array)) {
2963 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2964 			return ERR_PTR(-EPROTO);
2965 		}
2966 
2967 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2968 					    offset * sizeof(btf_fd),
2969 					    sizeof(btf_fd)))
2970 			return ERR_PTR(-EFAULT);
2971 
2972 		btf = btf_get_by_fd(btf_fd);
2973 		if (IS_ERR(btf)) {
2974 			verbose(env, "invalid module BTF fd specified\n");
2975 			return btf;
2976 		}
2977 
2978 		if (!btf_is_module(btf)) {
2979 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2980 			btf_put(btf);
2981 			return ERR_PTR(-EINVAL);
2982 		}
2983 
2984 		mod = btf_try_get_module(btf);
2985 		if (!mod) {
2986 			btf_put(btf);
2987 			return ERR_PTR(-ENXIO);
2988 		}
2989 
2990 		b = &tab->descs[tab->nr_descs++];
2991 		b->btf = btf;
2992 		b->module = mod;
2993 		b->offset = offset;
2994 
2995 		/* sort() reorders entries by value, so b may no longer point
2996 		 * to the right entry after this
2997 		 */
2998 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2999 		     kfunc_btf_cmp_by_off, NULL);
3000 	} else {
3001 		btf = b->btf;
3002 	}
3003 
3004 	return btf;
3005 }
3006 
bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab * tab)3007 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
3008 {
3009 	if (!tab)
3010 		return;
3011 
3012 	while (tab->nr_descs--) {
3013 		module_put(tab->descs[tab->nr_descs].module);
3014 		btf_put(tab->descs[tab->nr_descs].btf);
3015 	}
3016 	kfree(tab);
3017 }
3018 
find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)3019 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
3020 {
3021 	if (offset) {
3022 		if (offset < 0) {
3023 			/* In the future, this can be allowed to increase limit
3024 			 * of fd index into fd_array, interpreted as u16.
3025 			 */
3026 			verbose(env, "negative offset disallowed for kernel module function call\n");
3027 			return ERR_PTR(-EINVAL);
3028 		}
3029 
3030 		return __find_kfunc_desc_btf(env, offset);
3031 	}
3032 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
3033 }
3034 
add_kfunc_call(struct bpf_verifier_env * env,u32 func_id,s16 offset)3035 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
3036 {
3037 	const struct btf_type *func, *func_proto;
3038 	struct bpf_kfunc_btf_tab *btf_tab;
3039 	struct bpf_kfunc_desc_tab *tab;
3040 	struct bpf_prog_aux *prog_aux;
3041 	struct bpf_kfunc_desc *desc;
3042 	const char *func_name;
3043 	struct btf *desc_btf;
3044 	unsigned long call_imm;
3045 	unsigned long addr;
3046 	int err;
3047 
3048 	prog_aux = env->prog->aux;
3049 	tab = prog_aux->kfunc_tab;
3050 	btf_tab = prog_aux->kfunc_btf_tab;
3051 	if (!tab) {
3052 		if (!btf_vmlinux) {
3053 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
3054 			return -ENOTSUPP;
3055 		}
3056 
3057 		if (!env->prog->jit_requested) {
3058 			verbose(env, "JIT is required for calling kernel function\n");
3059 			return -ENOTSUPP;
3060 		}
3061 
3062 		if (!bpf_jit_supports_kfunc_call()) {
3063 			verbose(env, "JIT does not support calling kernel function\n");
3064 			return -ENOTSUPP;
3065 		}
3066 
3067 		if (!env->prog->gpl_compatible) {
3068 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
3069 			return -EINVAL;
3070 		}
3071 
3072 		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
3073 		if (!tab)
3074 			return -ENOMEM;
3075 		prog_aux->kfunc_tab = tab;
3076 	}
3077 
3078 	/* func_id == 0 is always invalid, but instead of returning an error, be
3079 	 * conservative and wait until the code elimination pass before returning
3080 	 * error, so that invalid calls that get pruned out can be in BPF programs
3081 	 * loaded from userspace.  It is also required that offset be untouched
3082 	 * for such calls.
3083 	 */
3084 	if (!func_id && !offset)
3085 		return 0;
3086 
3087 	if (!btf_tab && offset) {
3088 		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
3089 		if (!btf_tab)
3090 			return -ENOMEM;
3091 		prog_aux->kfunc_btf_tab = btf_tab;
3092 	}
3093 
3094 	desc_btf = find_kfunc_desc_btf(env, offset);
3095 	if (IS_ERR(desc_btf)) {
3096 		verbose(env, "failed to find BTF for kernel function\n");
3097 		return PTR_ERR(desc_btf);
3098 	}
3099 
3100 	if (find_kfunc_desc(env->prog, func_id, offset))
3101 		return 0;
3102 
3103 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
3104 		verbose(env, "too many different kernel function calls\n");
3105 		return -E2BIG;
3106 	}
3107 
3108 	func = btf_type_by_id(desc_btf, func_id);
3109 	if (!func || !btf_type_is_func(func)) {
3110 		verbose(env, "kernel btf_id %u is not a function\n",
3111 			func_id);
3112 		return -EINVAL;
3113 	}
3114 	func_proto = btf_type_by_id(desc_btf, func->type);
3115 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
3116 		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
3117 			func_id);
3118 		return -EINVAL;
3119 	}
3120 
3121 	func_name = btf_name_by_offset(desc_btf, func->name_off);
3122 	addr = kallsyms_lookup_name(func_name);
3123 	if (!addr) {
3124 		verbose(env, "cannot find address for kernel function %s\n",
3125 			func_name);
3126 		return -EINVAL;
3127 	}
3128 	specialize_kfunc(env, func_id, offset, &addr);
3129 
3130 	if (bpf_jit_supports_far_kfunc_call()) {
3131 		call_imm = func_id;
3132 	} else {
3133 		call_imm = BPF_CALL_IMM(addr);
3134 		/* Check whether the relative offset overflows desc->imm */
3135 		if ((unsigned long)(s32)call_imm != call_imm) {
3136 			verbose(env, "address of kernel function %s is out of range\n",
3137 				func_name);
3138 			return -EINVAL;
3139 		}
3140 	}
3141 
3142 	if (bpf_dev_bound_kfunc_id(func_id)) {
3143 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
3144 		if (err)
3145 			return err;
3146 	}
3147 
3148 	desc = &tab->descs[tab->nr_descs++];
3149 	desc->func_id = func_id;
3150 	desc->imm = call_imm;
3151 	desc->offset = offset;
3152 	desc->addr = addr;
3153 	err = btf_distill_func_proto(&env->log, desc_btf,
3154 				     func_proto, func_name,
3155 				     &desc->func_model);
3156 	if (!err)
3157 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3158 		     kfunc_desc_cmp_by_id_off, NULL);
3159 	return err;
3160 }
3161 
kfunc_desc_cmp_by_imm_off(const void * a,const void * b)3162 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
3163 {
3164 	const struct bpf_kfunc_desc *d0 = a;
3165 	const struct bpf_kfunc_desc *d1 = b;
3166 
3167 	if (d0->imm != d1->imm)
3168 		return d0->imm < d1->imm ? -1 : 1;
3169 	if (d0->offset != d1->offset)
3170 		return d0->offset < d1->offset ? -1 : 1;
3171 	return 0;
3172 }
3173 
sort_kfunc_descs_by_imm_off(struct bpf_prog * prog)3174 static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
3175 {
3176 	struct bpf_kfunc_desc_tab *tab;
3177 
3178 	tab = prog->aux->kfunc_tab;
3179 	if (!tab)
3180 		return;
3181 
3182 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3183 	     kfunc_desc_cmp_by_imm_off, NULL);
3184 }
3185 
bpf_prog_has_kfunc_call(const struct bpf_prog * prog)3186 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
3187 {
3188 	return !!prog->aux->kfunc_tab;
3189 }
3190 
3191 const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog * prog,const struct bpf_insn * insn)3192 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
3193 			 const struct bpf_insn *insn)
3194 {
3195 	const struct bpf_kfunc_desc desc = {
3196 		.imm = insn->imm,
3197 		.offset = insn->off,
3198 	};
3199 	const struct bpf_kfunc_desc *res;
3200 	struct bpf_kfunc_desc_tab *tab;
3201 
3202 	tab = prog->aux->kfunc_tab;
3203 	res = bsearch(&desc, tab->descs, tab->nr_descs,
3204 		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
3205 
3206 	return res ? &res->func_model : NULL;
3207 }
3208 
add_subprog_and_kfunc(struct bpf_verifier_env * env)3209 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
3210 {
3211 	struct bpf_subprog_info *subprog = env->subprog_info;
3212 	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
3213 	struct bpf_insn *insn = env->prog->insnsi;
3214 
3215 	/* Add entry function. */
3216 	ret = add_subprog(env, 0);
3217 	if (ret)
3218 		return ret;
3219 
3220 	for (i = 0; i < insn_cnt; i++, insn++) {
3221 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
3222 		    !bpf_pseudo_kfunc_call(insn))
3223 			continue;
3224 
3225 		if (!env->bpf_capable) {
3226 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3227 			return -EPERM;
3228 		}
3229 
3230 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
3231 			ret = add_subprog(env, i + insn->imm + 1);
3232 		else
3233 			ret = add_kfunc_call(env, insn->imm, insn->off);
3234 
3235 		if (ret < 0)
3236 			return ret;
3237 	}
3238 
3239 	ret = bpf_find_exception_callback_insn_off(env);
3240 	if (ret < 0)
3241 		return ret;
3242 	ex_cb_insn = ret;
3243 
3244 	/* If ex_cb_insn > 0, this means that the main program has a subprog
3245 	 * marked using BTF decl tag to serve as the exception callback.
3246 	 */
3247 	if (ex_cb_insn) {
3248 		ret = add_subprog(env, ex_cb_insn);
3249 		if (ret < 0)
3250 			return ret;
3251 		for (i = 1; i < env->subprog_cnt; i++) {
3252 			if (env->subprog_info[i].start != ex_cb_insn)
3253 				continue;
3254 			env->exception_callback_subprog = i;
3255 			mark_subprog_exc_cb(env, i);
3256 			break;
3257 		}
3258 	}
3259 
3260 	/* Add a fake 'exit' subprog which could simplify subprog iteration
3261 	 * logic. 'subprog_cnt' should not be increased.
3262 	 */
3263 	subprog[env->subprog_cnt].start = insn_cnt;
3264 
3265 	if (env->log.level & BPF_LOG_LEVEL2)
3266 		for (i = 0; i < env->subprog_cnt; i++)
3267 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
3268 
3269 	return 0;
3270 }
3271 
check_subprogs(struct bpf_verifier_env * env)3272 static int check_subprogs(struct bpf_verifier_env *env)
3273 {
3274 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
3275 	struct bpf_subprog_info *subprog = env->subprog_info;
3276 	struct bpf_insn *insn = env->prog->insnsi;
3277 	int insn_cnt = env->prog->len;
3278 
3279 	/* now check that all jumps are within the same subprog */
3280 	subprog_start = subprog[cur_subprog].start;
3281 	subprog_end = subprog[cur_subprog + 1].start;
3282 	for (i = 0; i < insn_cnt; i++) {
3283 		u8 code = insn[i].code;
3284 
3285 		if (code == (BPF_JMP | BPF_CALL) &&
3286 		    insn[i].src_reg == 0 &&
3287 		    insn[i].imm == BPF_FUNC_tail_call) {
3288 			subprog[cur_subprog].has_tail_call = true;
3289 			subprog[cur_subprog].tail_call_reachable = true;
3290 		}
3291 		if (BPF_CLASS(code) == BPF_LD &&
3292 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
3293 			subprog[cur_subprog].has_ld_abs = true;
3294 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3295 			goto next;
3296 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
3297 			goto next;
3298 		if (code == (BPF_JMP32 | BPF_JA))
3299 			off = i + insn[i].imm + 1;
3300 		else
3301 			off = i + insn[i].off + 1;
3302 		if (off < subprog_start || off >= subprog_end) {
3303 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
3304 			return -EINVAL;
3305 		}
3306 next:
3307 		if (i == subprog_end - 1) {
3308 			/* to avoid fall-through from one subprog into another
3309 			 * the last insn of the subprog should be either exit
3310 			 * or unconditional jump back or bpf_throw call
3311 			 */
3312 			if (code != (BPF_JMP | BPF_EXIT) &&
3313 			    code != (BPF_JMP32 | BPF_JA) &&
3314 			    code != (BPF_JMP | BPF_JA)) {
3315 				verbose(env, "last insn is not an exit or jmp\n");
3316 				return -EINVAL;
3317 			}
3318 			subprog_start = subprog_end;
3319 			cur_subprog++;
3320 			if (cur_subprog < env->subprog_cnt)
3321 				subprog_end = subprog[cur_subprog + 1].start;
3322 		}
3323 	}
3324 	return 0;
3325 }
3326 
3327 /* Parentage chain of this register (or stack slot) should take care of all
3328  * issues like callee-saved registers, stack slot allocation time, etc.
3329  */
mark_reg_read(struct bpf_verifier_env * env,const struct bpf_reg_state * state,struct bpf_reg_state * parent,u8 flag)3330 static int mark_reg_read(struct bpf_verifier_env *env,
3331 			 const struct bpf_reg_state *state,
3332 			 struct bpf_reg_state *parent, u8 flag)
3333 {
3334 	bool writes = parent == state->parent; /* Observe write marks */
3335 	int cnt = 0;
3336 
3337 	while (parent) {
3338 		/* if read wasn't screened by an earlier write ... */
3339 		if (writes && state->live & REG_LIVE_WRITTEN)
3340 			break;
3341 		if (parent->live & REG_LIVE_DONE) {
3342 			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
3343 				reg_type_str(env, parent->type),
3344 				parent->var_off.value, parent->off);
3345 			return -EFAULT;
3346 		}
3347 		/* The first condition is more likely to be true than the
3348 		 * second, checked it first.
3349 		 */
3350 		if ((parent->live & REG_LIVE_READ) == flag ||
3351 		    parent->live & REG_LIVE_READ64)
3352 			/* The parentage chain never changes and
3353 			 * this parent was already marked as LIVE_READ.
3354 			 * There is no need to keep walking the chain again and
3355 			 * keep re-marking all parents as LIVE_READ.
3356 			 * This case happens when the same register is read
3357 			 * multiple times without writes into it in-between.
3358 			 * Also, if parent has the stronger REG_LIVE_READ64 set,
3359 			 * then no need to set the weak REG_LIVE_READ32.
3360 			 */
3361 			break;
3362 		/* ... then we depend on parent's value */
3363 		parent->live |= flag;
3364 		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
3365 		if (flag == REG_LIVE_READ64)
3366 			parent->live &= ~REG_LIVE_READ32;
3367 		state = parent;
3368 		parent = state->parent;
3369 		writes = true;
3370 		cnt++;
3371 	}
3372 
3373 	if (env->longest_mark_read_walk < cnt)
3374 		env->longest_mark_read_walk = cnt;
3375 	return 0;
3376 }
3377 
mark_stack_slot_obj_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3378 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3379 				    int spi, int nr_slots)
3380 {
3381 	struct bpf_func_state *state = func(env, reg);
3382 	int err, i;
3383 
3384 	for (i = 0; i < nr_slots; i++) {
3385 		struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
3386 
3387 		err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
3388 		if (err)
3389 			return err;
3390 
3391 		mark_stack_slot_scratched(env, spi - i);
3392 	}
3393 	return 0;
3394 }
3395 
mark_dynptr_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3396 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3397 {
3398 	int spi;
3399 
3400 	/* For CONST_PTR_TO_DYNPTR, it must have already been done by
3401 	 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3402 	 * check_kfunc_call.
3403 	 */
3404 	if (reg->type == CONST_PTR_TO_DYNPTR)
3405 		return 0;
3406 	spi = dynptr_get_spi(env, reg);
3407 	if (spi < 0)
3408 		return spi;
3409 	/* Caller ensures dynptr is valid and initialized, which means spi is in
3410 	 * bounds and spi is the first dynptr slot. Simply mark stack slot as
3411 	 * read.
3412 	 */
3413 	return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS);
3414 }
3415 
mark_iter_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3416 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3417 			  int spi, int nr_slots)
3418 {
3419 	return mark_stack_slot_obj_read(env, reg, spi, nr_slots);
3420 }
3421 
mark_irq_flag_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3422 static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3423 {
3424 	int spi;
3425 
3426 	spi = irq_flag_get_spi(env, reg);
3427 	if (spi < 0)
3428 		return spi;
3429 	return mark_stack_slot_obj_read(env, reg, spi, 1);
3430 }
3431 
3432 /* This function is supposed to be used by the following 32-bit optimization
3433  * code only. It returns TRUE if the source or destination register operates
3434  * on 64-bit, otherwise return FALSE.
3435  */
is_reg64(struct bpf_verifier_env * env,struct bpf_insn * insn,u32 regno,struct bpf_reg_state * reg,enum reg_arg_type t)3436 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
3437 		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
3438 {
3439 	u8 code, class, op;
3440 
3441 	code = insn->code;
3442 	class = BPF_CLASS(code);
3443 	op = BPF_OP(code);
3444 	if (class == BPF_JMP) {
3445 		/* BPF_EXIT for "main" will reach here. Return TRUE
3446 		 * conservatively.
3447 		 */
3448 		if (op == BPF_EXIT)
3449 			return true;
3450 		if (op == BPF_CALL) {
3451 			/* BPF to BPF call will reach here because of marking
3452 			 * caller saved clobber with DST_OP_NO_MARK for which we
3453 			 * don't care the register def because they are anyway
3454 			 * marked as NOT_INIT already.
3455 			 */
3456 			if (insn->src_reg == BPF_PSEUDO_CALL)
3457 				return false;
3458 			/* Helper call will reach here because of arg type
3459 			 * check, conservatively return TRUE.
3460 			 */
3461 			if (t == SRC_OP)
3462 				return true;
3463 
3464 			return false;
3465 		}
3466 	}
3467 
3468 	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3469 		return false;
3470 
3471 	if (class == BPF_ALU64 || class == BPF_JMP ||
3472 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3473 		return true;
3474 
3475 	if (class == BPF_ALU || class == BPF_JMP32)
3476 		return false;
3477 
3478 	if (class == BPF_LDX) {
3479 		if (t != SRC_OP)
3480 			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3481 		/* LDX source must be ptr. */
3482 		return true;
3483 	}
3484 
3485 	if (class == BPF_STX) {
3486 		/* BPF_STX (including atomic variants) has multiple source
3487 		 * operands, one of which is a ptr. Check whether the caller is
3488 		 * asking about it.
3489 		 */
3490 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3491 			return true;
3492 		return BPF_SIZE(code) == BPF_DW;
3493 	}
3494 
3495 	if (class == BPF_LD) {
3496 		u8 mode = BPF_MODE(code);
3497 
3498 		/* LD_IMM64 */
3499 		if (mode == BPF_IMM)
3500 			return true;
3501 
3502 		/* Both LD_IND and LD_ABS return 32-bit data. */
3503 		if (t != SRC_OP)
3504 			return  false;
3505 
3506 		/* Implicit ctx ptr. */
3507 		if (regno == BPF_REG_6)
3508 			return true;
3509 
3510 		/* Explicit source could be any width. */
3511 		return true;
3512 	}
3513 
3514 	if (class == BPF_ST)
3515 		/* The only source register for BPF_ST is a ptr. */
3516 		return true;
3517 
3518 	/* Conservatively return true at default. */
3519 	return true;
3520 }
3521 
3522 /* Return the regno defined by the insn, or -1. */
insn_def_regno(const struct bpf_insn * insn)3523 static int insn_def_regno(const struct bpf_insn *insn)
3524 {
3525 	switch (BPF_CLASS(insn->code)) {
3526 	case BPF_JMP:
3527 	case BPF_JMP32:
3528 	case BPF_ST:
3529 		return -1;
3530 	case BPF_STX:
3531 		if ((BPF_MODE(insn->code) == BPF_ATOMIC ||
3532 		     BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) &&
3533 		    (insn->imm & BPF_FETCH)) {
3534 			if (insn->imm == BPF_CMPXCHG)
3535 				return BPF_REG_0;
3536 			else
3537 				return insn->src_reg;
3538 		} else {
3539 			return -1;
3540 		}
3541 	default:
3542 		return insn->dst_reg;
3543 	}
3544 }
3545 
3546 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_verifier_env * env,struct bpf_insn * insn)3547 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
3548 {
3549 	int dst_reg = insn_def_regno(insn);
3550 
3551 	if (dst_reg == -1)
3552 		return false;
3553 
3554 	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
3555 }
3556 
mark_insn_zext(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3557 static void mark_insn_zext(struct bpf_verifier_env *env,
3558 			   struct bpf_reg_state *reg)
3559 {
3560 	s32 def_idx = reg->subreg_def;
3561 
3562 	if (def_idx == DEF_NOT_SUBREG)
3563 		return;
3564 
3565 	env->insn_aux_data[def_idx - 1].zext_dst = true;
3566 	/* The dst will be zero extended, so won't be sub-register anymore. */
3567 	reg->subreg_def = DEF_NOT_SUBREG;
3568 }
3569 
__check_reg_arg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum reg_arg_type t)3570 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3571 			   enum reg_arg_type t)
3572 {
3573 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3574 	struct bpf_reg_state *reg;
3575 	bool rw64;
3576 
3577 	if (regno >= MAX_BPF_REG) {
3578 		verbose(env, "R%d is invalid\n", regno);
3579 		return -EINVAL;
3580 	}
3581 
3582 	mark_reg_scratched(env, regno);
3583 
3584 	reg = &regs[regno];
3585 	rw64 = is_reg64(env, insn, regno, reg, t);
3586 	if (t == SRC_OP) {
3587 		/* check whether register used as source operand can be read */
3588 		if (reg->type == NOT_INIT) {
3589 			verbose(env, "R%d !read_ok\n", regno);
3590 			return -EACCES;
3591 		}
3592 		/* We don't need to worry about FP liveness because it's read-only */
3593 		if (regno == BPF_REG_FP)
3594 			return 0;
3595 
3596 		if (rw64)
3597 			mark_insn_zext(env, reg);
3598 
3599 		return mark_reg_read(env, reg, reg->parent,
3600 				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3601 	} else {
3602 		/* check whether register used as dest operand can be written to */
3603 		if (regno == BPF_REG_FP) {
3604 			verbose(env, "frame pointer is read only\n");
3605 			return -EACCES;
3606 		}
3607 		reg->live |= REG_LIVE_WRITTEN;
3608 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3609 		if (t == DST_OP)
3610 			mark_reg_unknown(env, regs, regno);
3611 	}
3612 	return 0;
3613 }
3614 
check_reg_arg(struct bpf_verifier_env * env,u32 regno,enum reg_arg_type t)3615 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3616 			 enum reg_arg_type t)
3617 {
3618 	struct bpf_verifier_state *vstate = env->cur_state;
3619 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3620 
3621 	return __check_reg_arg(env, state->regs, regno, t);
3622 }
3623 
insn_stack_access_flags(int frameno,int spi)3624 static int insn_stack_access_flags(int frameno, int spi)
3625 {
3626 	return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
3627 }
3628 
insn_stack_access_spi(int insn_flags)3629 static int insn_stack_access_spi(int insn_flags)
3630 {
3631 	return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
3632 }
3633 
insn_stack_access_frameno(int insn_flags)3634 static int insn_stack_access_frameno(int insn_flags)
3635 {
3636 	return insn_flags & INSN_F_FRAMENO_MASK;
3637 }
3638 
mark_jmp_point(struct bpf_verifier_env * env,int idx)3639 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
3640 {
3641 	env->insn_aux_data[idx].jmp_point = true;
3642 }
3643 
is_jmp_point(struct bpf_verifier_env * env,int insn_idx)3644 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
3645 {
3646 	return env->insn_aux_data[insn_idx].jmp_point;
3647 }
3648 
3649 #define LR_FRAMENO_BITS	3
3650 #define LR_SPI_BITS	6
3651 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3652 #define LR_SIZE_BITS	4
3653 #define LR_FRAMENO_MASK	((1ull << LR_FRAMENO_BITS) - 1)
3654 #define LR_SPI_MASK	((1ull << LR_SPI_BITS)     - 1)
3655 #define LR_SIZE_MASK	((1ull << LR_SIZE_BITS)    - 1)
3656 #define LR_SPI_OFF	LR_FRAMENO_BITS
3657 #define LR_IS_REG_OFF	(LR_SPI_BITS + LR_FRAMENO_BITS)
3658 #define LINKED_REGS_MAX	6
3659 
3660 struct linked_reg {
3661 	u8 frameno;
3662 	union {
3663 		u8 spi;
3664 		u8 regno;
3665 	};
3666 	bool is_reg;
3667 };
3668 
3669 struct linked_regs {
3670 	int cnt;
3671 	struct linked_reg entries[LINKED_REGS_MAX];
3672 };
3673 
linked_regs_push(struct linked_regs * s)3674 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3675 {
3676 	if (s->cnt < LINKED_REGS_MAX)
3677 		return &s->entries[s->cnt++];
3678 
3679 	return NULL;
3680 }
3681 
3682 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
3683  * number of elements currently in stack.
3684  * Pack one history entry for linked registers as 10 bits in the following format:
3685  * - 3-bits frameno
3686  * - 6-bits spi_or_reg
3687  * - 1-bit  is_reg
3688  */
linked_regs_pack(struct linked_regs * s)3689 static u64 linked_regs_pack(struct linked_regs *s)
3690 {
3691 	u64 val = 0;
3692 	int i;
3693 
3694 	for (i = 0; i < s->cnt; ++i) {
3695 		struct linked_reg *e = &s->entries[i];
3696 		u64 tmp = 0;
3697 
3698 		tmp |= e->frameno;
3699 		tmp |= e->spi << LR_SPI_OFF;
3700 		tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3701 
3702 		val <<= LR_ENTRY_BITS;
3703 		val |= tmp;
3704 	}
3705 	val <<= LR_SIZE_BITS;
3706 	val |= s->cnt;
3707 	return val;
3708 }
3709 
linked_regs_unpack(u64 val,struct linked_regs * s)3710 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3711 {
3712 	int i;
3713 
3714 	s->cnt = val & LR_SIZE_MASK;
3715 	val >>= LR_SIZE_BITS;
3716 
3717 	for (i = 0; i < s->cnt; ++i) {
3718 		struct linked_reg *e = &s->entries[i];
3719 
3720 		e->frameno =  val & LR_FRAMENO_MASK;
3721 		e->spi     = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3722 		e->is_reg  = (val >> LR_IS_REG_OFF) & 0x1;
3723 		val >>= LR_ENTRY_BITS;
3724 	}
3725 }
3726 
3727 /* for any branch, call, exit record the history of jmps in the given state */
push_insn_history(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,int insn_flags,u64 linked_regs)3728 static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
3729 			     int insn_flags, u64 linked_regs)
3730 {
3731 	struct bpf_insn_hist_entry *p;
3732 	size_t alloc_size;
3733 
3734 	/* combine instruction flags if we already recorded this instruction */
3735 	if (env->cur_hist_ent) {
3736 		/* atomic instructions push insn_flags twice, for READ and
3737 		 * WRITE sides, but they should agree on stack slot
3738 		 */
3739 		WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
3740 			  (env->cur_hist_ent->flags & insn_flags) != insn_flags,
3741 			  "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
3742 			  env->insn_idx, env->cur_hist_ent->flags, insn_flags);
3743 		env->cur_hist_ent->flags |= insn_flags;
3744 		WARN_ONCE(env->cur_hist_ent->linked_regs != 0,
3745 			  "verifier insn history bug: insn_idx %d linked_regs != 0: %#llx\n",
3746 			  env->insn_idx, env->cur_hist_ent->linked_regs);
3747 		env->cur_hist_ent->linked_regs = linked_regs;
3748 		return 0;
3749 	}
3750 
3751 	if (cur->insn_hist_end + 1 > env->insn_hist_cap) {
3752 		alloc_size = size_mul(cur->insn_hist_end + 1, sizeof(*p));
3753 		p = kvrealloc(env->insn_hist, alloc_size, GFP_USER);
3754 		if (!p)
3755 			return -ENOMEM;
3756 		env->insn_hist = p;
3757 		env->insn_hist_cap = alloc_size / sizeof(*p);
3758 	}
3759 
3760 	p = &env->insn_hist[cur->insn_hist_end];
3761 	p->idx = env->insn_idx;
3762 	p->prev_idx = env->prev_insn_idx;
3763 	p->flags = insn_flags;
3764 	p->linked_regs = linked_regs;
3765 
3766 	cur->insn_hist_end++;
3767 	env->cur_hist_ent = p;
3768 
3769 	return 0;
3770 }
3771 
get_insn_hist_entry(struct bpf_verifier_env * env,u32 hist_start,u32 hist_end,int insn_idx)3772 static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *env,
3773 						       u32 hist_start, u32 hist_end, int insn_idx)
3774 {
3775 	if (hist_end > hist_start && env->insn_hist[hist_end - 1].idx == insn_idx)
3776 		return &env->insn_hist[hist_end - 1];
3777 	return NULL;
3778 }
3779 
3780 /* Backtrack one insn at a time. If idx is not at the top of recorded
3781  * history then previous instruction came from straight line execution.
3782  * Return -ENOENT if we exhausted all instructions within given state.
3783  *
3784  * It's legal to have a bit of a looping with the same starting and ending
3785  * insn index within the same state, e.g.: 3->4->5->3, so just because current
3786  * instruction index is the same as state's first_idx doesn't mean we are
3787  * done. If there is still some jump history left, we should keep going. We
3788  * need to take into account that we might have a jump history between given
3789  * state's parent and itself, due to checkpointing. In this case, we'll have
3790  * history entry recording a jump from last instruction of parent state and
3791  * first instruction of given state.
3792  */
get_prev_insn_idx(const struct bpf_verifier_env * env,struct bpf_verifier_state * st,int insn_idx,u32 hist_start,u32 * hist_endp)3793 static int get_prev_insn_idx(const struct bpf_verifier_env *env,
3794 			     struct bpf_verifier_state *st,
3795 			     int insn_idx, u32 hist_start, u32 *hist_endp)
3796 {
3797 	u32 hist_end = *hist_endp;
3798 	u32 cnt = hist_end - hist_start;
3799 
3800 	if (insn_idx == st->first_insn_idx) {
3801 		if (cnt == 0)
3802 			return -ENOENT;
3803 		if (cnt == 1 && env->insn_hist[hist_start].idx == insn_idx)
3804 			return -ENOENT;
3805 	}
3806 
3807 	if (cnt && env->insn_hist[hist_end - 1].idx == insn_idx) {
3808 		(*hist_endp)--;
3809 		return env->insn_hist[hist_end - 1].prev_idx;
3810 	} else {
3811 		return insn_idx - 1;
3812 	}
3813 }
3814 
disasm_kfunc_name(void * data,const struct bpf_insn * insn)3815 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3816 {
3817 	const struct btf_type *func;
3818 	struct btf *desc_btf;
3819 
3820 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3821 		return NULL;
3822 
3823 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3824 	if (IS_ERR(desc_btf))
3825 		return "<error>";
3826 
3827 	func = btf_type_by_id(desc_btf, insn->imm);
3828 	return btf_name_by_offset(desc_btf, func->name_off);
3829 }
3830 
bt_init(struct backtrack_state * bt,u32 frame)3831 static inline void bt_init(struct backtrack_state *bt, u32 frame)
3832 {
3833 	bt->frame = frame;
3834 }
3835 
bt_reset(struct backtrack_state * bt)3836 static inline void bt_reset(struct backtrack_state *bt)
3837 {
3838 	struct bpf_verifier_env *env = bt->env;
3839 
3840 	memset(bt, 0, sizeof(*bt));
3841 	bt->env = env;
3842 }
3843 
bt_empty(struct backtrack_state * bt)3844 static inline u32 bt_empty(struct backtrack_state *bt)
3845 {
3846 	u64 mask = 0;
3847 	int i;
3848 
3849 	for (i = 0; i <= bt->frame; i++)
3850 		mask |= bt->reg_masks[i] | bt->stack_masks[i];
3851 
3852 	return mask == 0;
3853 }
3854 
bt_subprog_enter(struct backtrack_state * bt)3855 static inline int bt_subprog_enter(struct backtrack_state *bt)
3856 {
3857 	if (bt->frame == MAX_CALL_FRAMES - 1) {
3858 		verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
3859 		WARN_ONCE(1, "verifier backtracking bug");
3860 		return -EFAULT;
3861 	}
3862 	bt->frame++;
3863 	return 0;
3864 }
3865 
bt_subprog_exit(struct backtrack_state * bt)3866 static inline int bt_subprog_exit(struct backtrack_state *bt)
3867 {
3868 	if (bt->frame == 0) {
3869 		verbose(bt->env, "BUG subprog exit from frame 0\n");
3870 		WARN_ONCE(1, "verifier backtracking bug");
3871 		return -EFAULT;
3872 	}
3873 	bt->frame--;
3874 	return 0;
3875 }
3876 
bt_set_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)3877 static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3878 {
3879 	bt->reg_masks[frame] |= 1 << reg;
3880 }
3881 
bt_clear_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)3882 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3883 {
3884 	bt->reg_masks[frame] &= ~(1 << reg);
3885 }
3886 
bt_set_reg(struct backtrack_state * bt,u32 reg)3887 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
3888 {
3889 	bt_set_frame_reg(bt, bt->frame, reg);
3890 }
3891 
bt_clear_reg(struct backtrack_state * bt,u32 reg)3892 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
3893 {
3894 	bt_clear_frame_reg(bt, bt->frame, reg);
3895 }
3896 
bt_set_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)3897 static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3898 {
3899 	bt->stack_masks[frame] |= 1ull << slot;
3900 }
3901 
bt_clear_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)3902 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3903 {
3904 	bt->stack_masks[frame] &= ~(1ull << slot);
3905 }
3906 
bt_frame_reg_mask(struct backtrack_state * bt,u32 frame)3907 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
3908 {
3909 	return bt->reg_masks[frame];
3910 }
3911 
bt_reg_mask(struct backtrack_state * bt)3912 static inline u32 bt_reg_mask(struct backtrack_state *bt)
3913 {
3914 	return bt->reg_masks[bt->frame];
3915 }
3916 
bt_frame_stack_mask(struct backtrack_state * bt,u32 frame)3917 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
3918 {
3919 	return bt->stack_masks[frame];
3920 }
3921 
bt_stack_mask(struct backtrack_state * bt)3922 static inline u64 bt_stack_mask(struct backtrack_state *bt)
3923 {
3924 	return bt->stack_masks[bt->frame];
3925 }
3926 
bt_is_reg_set(struct backtrack_state * bt,u32 reg)3927 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
3928 {
3929 	return bt->reg_masks[bt->frame] & (1 << reg);
3930 }
3931 
bt_is_frame_reg_set(struct backtrack_state * bt,u32 frame,u32 reg)3932 static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
3933 {
3934 	return bt->reg_masks[frame] & (1 << reg);
3935 }
3936 
bt_is_frame_slot_set(struct backtrack_state * bt,u32 frame,u32 slot)3937 static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
3938 {
3939 	return bt->stack_masks[frame] & (1ull << slot);
3940 }
3941 
3942 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
fmt_reg_mask(char * buf,ssize_t buf_sz,u32 reg_mask)3943 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
3944 {
3945 	DECLARE_BITMAP(mask, 64);
3946 	bool first = true;
3947 	int i, n;
3948 
3949 	buf[0] = '\0';
3950 
3951 	bitmap_from_u64(mask, reg_mask);
3952 	for_each_set_bit(i, mask, 32) {
3953 		n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
3954 		first = false;
3955 		buf += n;
3956 		buf_sz -= n;
3957 		if (buf_sz < 0)
3958 			break;
3959 	}
3960 }
3961 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
fmt_stack_mask(char * buf,ssize_t buf_sz,u64 stack_mask)3962 static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
3963 {
3964 	DECLARE_BITMAP(mask, 64);
3965 	bool first = true;
3966 	int i, n;
3967 
3968 	buf[0] = '\0';
3969 
3970 	bitmap_from_u64(mask, stack_mask);
3971 	for_each_set_bit(i, mask, 64) {
3972 		n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
3973 		first = false;
3974 		buf += n;
3975 		buf_sz -= n;
3976 		if (buf_sz < 0)
3977 			break;
3978 	}
3979 }
3980 
3981 /* If any register R in hist->linked_regs is marked as precise in bt,
3982  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
3983  */
bt_sync_linked_regs(struct backtrack_state * bt,struct bpf_insn_hist_entry * hist)3984 static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_insn_hist_entry *hist)
3985 {
3986 	struct linked_regs linked_regs;
3987 	bool some_precise = false;
3988 	int i;
3989 
3990 	if (!hist || hist->linked_regs == 0)
3991 		return;
3992 
3993 	linked_regs_unpack(hist->linked_regs, &linked_regs);
3994 	for (i = 0; i < linked_regs.cnt; ++i) {
3995 		struct linked_reg *e = &linked_regs.entries[i];
3996 
3997 		if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
3998 		    (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
3999 			some_precise = true;
4000 			break;
4001 		}
4002 	}
4003 
4004 	if (!some_precise)
4005 		return;
4006 
4007 	for (i = 0; i < linked_regs.cnt; ++i) {
4008 		struct linked_reg *e = &linked_regs.entries[i];
4009 
4010 		if (e->is_reg)
4011 			bt_set_frame_reg(bt, e->frameno, e->regno);
4012 		else
4013 			bt_set_frame_slot(bt, e->frameno, e->spi);
4014 	}
4015 }
4016 
4017 static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
4018 
4019 /* For given verifier state backtrack_insn() is called from the last insn to
4020  * the first insn. Its purpose is to compute a bitmask of registers and
4021  * stack slots that needs precision in the parent verifier state.
4022  *
4023  * @idx is an index of the instruction we are currently processing;
4024  * @subseq_idx is an index of the subsequent instruction that:
4025  *   - *would be* executed next, if jump history is viewed in forward order;
4026  *   - *was* processed previously during backtracking.
4027  */
backtrack_insn(struct bpf_verifier_env * env,int idx,int subseq_idx,struct bpf_insn_hist_entry * hist,struct backtrack_state * bt)4028 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
4029 			  struct bpf_insn_hist_entry *hist, struct backtrack_state *bt)
4030 {
4031 	const struct bpf_insn_cbs cbs = {
4032 		.cb_call	= disasm_kfunc_name,
4033 		.cb_print	= verbose,
4034 		.private_data	= env,
4035 	};
4036 	struct bpf_insn *insn = env->prog->insnsi + idx;
4037 	u8 class = BPF_CLASS(insn->code);
4038 	u8 opcode = BPF_OP(insn->code);
4039 	u8 mode = BPF_MODE(insn->code);
4040 	u32 dreg = insn->dst_reg;
4041 	u32 sreg = insn->src_reg;
4042 	u32 spi, i, fr;
4043 
4044 	if (insn->code == 0)
4045 		return 0;
4046 	if (env->log.level & BPF_LOG_LEVEL2) {
4047 		fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
4048 		verbose(env, "mark_precise: frame%d: regs=%s ",
4049 			bt->frame, env->tmp_str_buf);
4050 		fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
4051 		verbose(env, "stack=%s before ", env->tmp_str_buf);
4052 		verbose(env, "%d: ", idx);
4053 		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
4054 	}
4055 
4056 	/* If there is a history record that some registers gained range at this insn,
4057 	 * propagate precision marks to those registers, so that bt_is_reg_set()
4058 	 * accounts for these registers.
4059 	 */
4060 	bt_sync_linked_regs(bt, hist);
4061 
4062 	if (class == BPF_ALU || class == BPF_ALU64) {
4063 		if (!bt_is_reg_set(bt, dreg))
4064 			return 0;
4065 		if (opcode == BPF_END || opcode == BPF_NEG) {
4066 			/* sreg is reserved and unused
4067 			 * dreg still need precision before this insn
4068 			 */
4069 			return 0;
4070 		} else if (opcode == BPF_MOV) {
4071 			if (BPF_SRC(insn->code) == BPF_X) {
4072 				/* dreg = sreg or dreg = (s8, s16, s32)sreg
4073 				 * dreg needs precision after this insn
4074 				 * sreg needs precision before this insn
4075 				 */
4076 				bt_clear_reg(bt, dreg);
4077 				if (sreg != BPF_REG_FP)
4078 					bt_set_reg(bt, sreg);
4079 			} else {
4080 				/* dreg = K
4081 				 * dreg needs precision after this insn.
4082 				 * Corresponding register is already marked
4083 				 * as precise=true in this verifier state.
4084 				 * No further markings in parent are necessary
4085 				 */
4086 				bt_clear_reg(bt, dreg);
4087 			}
4088 		} else {
4089 			if (BPF_SRC(insn->code) == BPF_X) {
4090 				/* dreg += sreg
4091 				 * both dreg and sreg need precision
4092 				 * before this insn
4093 				 */
4094 				if (sreg != BPF_REG_FP)
4095 					bt_set_reg(bt, sreg);
4096 			} /* else dreg += K
4097 			   * dreg still needs precision before this insn
4098 			   */
4099 		}
4100 	} else if (class == BPF_LDX) {
4101 		if (!bt_is_reg_set(bt, dreg))
4102 			return 0;
4103 		bt_clear_reg(bt, dreg);
4104 
4105 		/* scalars can only be spilled into stack w/o losing precision.
4106 		 * Load from any other memory can be zero extended.
4107 		 * The desire to keep that precision is already indicated
4108 		 * by 'precise' mark in corresponding register of this state.
4109 		 * No further tracking necessary.
4110 		 */
4111 		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
4112 			return 0;
4113 		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
4114 		 * that [fp - off] slot contains scalar that needs to be
4115 		 * tracked with precision
4116 		 */
4117 		spi = insn_stack_access_spi(hist->flags);
4118 		fr = insn_stack_access_frameno(hist->flags);
4119 		bt_set_frame_slot(bt, fr, spi);
4120 	} else if (class == BPF_STX || class == BPF_ST) {
4121 		if (bt_is_reg_set(bt, dreg))
4122 			/* stx & st shouldn't be using _scalar_ dst_reg
4123 			 * to access memory. It means backtracking
4124 			 * encountered a case of pointer subtraction.
4125 			 */
4126 			return -ENOTSUPP;
4127 		/* scalars can only be spilled into stack */
4128 		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
4129 			return 0;
4130 		spi = insn_stack_access_spi(hist->flags);
4131 		fr = insn_stack_access_frameno(hist->flags);
4132 		if (!bt_is_frame_slot_set(bt, fr, spi))
4133 			return 0;
4134 		bt_clear_frame_slot(bt, fr, spi);
4135 		if (class == BPF_STX)
4136 			bt_set_reg(bt, sreg);
4137 	} else if (class == BPF_JMP || class == BPF_JMP32) {
4138 		if (bpf_pseudo_call(insn)) {
4139 			int subprog_insn_idx, subprog;
4140 
4141 			subprog_insn_idx = idx + insn->imm + 1;
4142 			subprog = find_subprog(env, subprog_insn_idx);
4143 			if (subprog < 0)
4144 				return -EFAULT;
4145 
4146 			if (subprog_is_global(env, subprog)) {
4147 				/* check that jump history doesn't have any
4148 				 * extra instructions from subprog; the next
4149 				 * instruction after call to global subprog
4150 				 * should be literally next instruction in
4151 				 * caller program
4152 				 */
4153 				WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
4154 				/* r1-r5 are invalidated after subprog call,
4155 				 * so for global func call it shouldn't be set
4156 				 * anymore
4157 				 */
4158 				if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4159 					verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4160 					WARN_ONCE(1, "verifier backtracking bug");
4161 					return -EFAULT;
4162 				}
4163 				/* global subprog always sets R0 */
4164 				bt_clear_reg(bt, BPF_REG_0);
4165 				return 0;
4166 			} else {
4167 				/* static subprog call instruction, which
4168 				 * means that we are exiting current subprog,
4169 				 * so only r1-r5 could be still requested as
4170 				 * precise, r0 and r6-r10 or any stack slot in
4171 				 * the current frame should be zero by now
4172 				 */
4173 				if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
4174 					verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4175 					WARN_ONCE(1, "verifier backtracking bug");
4176 					return -EFAULT;
4177 				}
4178 				/* we are now tracking register spills correctly,
4179 				 * so any instance of leftover slots is a bug
4180 				 */
4181 				if (bt_stack_mask(bt) != 0) {
4182 					verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
4183 					WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
4184 					return -EFAULT;
4185 				}
4186 				/* propagate r1-r5 to the caller */
4187 				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
4188 					if (bt_is_reg_set(bt, i)) {
4189 						bt_clear_reg(bt, i);
4190 						bt_set_frame_reg(bt, bt->frame - 1, i);
4191 					}
4192 				}
4193 				if (bt_subprog_exit(bt))
4194 					return -EFAULT;
4195 				return 0;
4196 			}
4197 		} else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
4198 			/* exit from callback subprog to callback-calling helper or
4199 			 * kfunc call. Use idx/subseq_idx check to discern it from
4200 			 * straight line code backtracking.
4201 			 * Unlike the subprog call handling above, we shouldn't
4202 			 * propagate precision of r1-r5 (if any requested), as they are
4203 			 * not actually arguments passed directly to callback subprogs
4204 			 */
4205 			if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
4206 				verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4207 				WARN_ONCE(1, "verifier backtracking bug");
4208 				return -EFAULT;
4209 			}
4210 			if (bt_stack_mask(bt) != 0) {
4211 				verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
4212 				WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
4213 				return -EFAULT;
4214 			}
4215 			/* clear r1-r5 in callback subprog's mask */
4216 			for (i = BPF_REG_1; i <= BPF_REG_5; i++)
4217 				bt_clear_reg(bt, i);
4218 			if (bt_subprog_exit(bt))
4219 				return -EFAULT;
4220 			return 0;
4221 		} else if (opcode == BPF_CALL) {
4222 			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
4223 			 * catch this error later. Make backtracking conservative
4224 			 * with ENOTSUPP.
4225 			 */
4226 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
4227 				return -ENOTSUPP;
4228 			/* regular helper call sets R0 */
4229 			bt_clear_reg(bt, BPF_REG_0);
4230 			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4231 				/* if backtracing was looking for registers R1-R5
4232 				 * they should have been found already.
4233 				 */
4234 				verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4235 				WARN_ONCE(1, "verifier backtracking bug");
4236 				return -EFAULT;
4237 			}
4238 		} else if (opcode == BPF_EXIT) {
4239 			bool r0_precise;
4240 
4241 			/* Backtracking to a nested function call, 'idx' is a part of
4242 			 * the inner frame 'subseq_idx' is a part of the outer frame.
4243 			 * In case of a regular function call, instructions giving
4244 			 * precision to registers R1-R5 should have been found already.
4245 			 * In case of a callback, it is ok to have R1-R5 marked for
4246 			 * backtracking, as these registers are set by the function
4247 			 * invoking callback.
4248 			 */
4249 			if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
4250 				for (i = BPF_REG_1; i <= BPF_REG_5; i++)
4251 					bt_clear_reg(bt, i);
4252 			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4253 				verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4254 				WARN_ONCE(1, "verifier backtracking bug");
4255 				return -EFAULT;
4256 			}
4257 
4258 			/* BPF_EXIT in subprog or callback always returns
4259 			 * right after the call instruction, so by checking
4260 			 * whether the instruction at subseq_idx-1 is subprog
4261 			 * call or not we can distinguish actual exit from
4262 			 * *subprog* from exit from *callback*. In the former
4263 			 * case, we need to propagate r0 precision, if
4264 			 * necessary. In the former we never do that.
4265 			 */
4266 			r0_precise = subseq_idx - 1 >= 0 &&
4267 				     bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
4268 				     bt_is_reg_set(bt, BPF_REG_0);
4269 
4270 			bt_clear_reg(bt, BPF_REG_0);
4271 			if (bt_subprog_enter(bt))
4272 				return -EFAULT;
4273 
4274 			if (r0_precise)
4275 				bt_set_reg(bt, BPF_REG_0);
4276 			/* r6-r9 and stack slots will stay set in caller frame
4277 			 * bitmasks until we return back from callee(s)
4278 			 */
4279 			return 0;
4280 		} else if (BPF_SRC(insn->code) == BPF_X) {
4281 			if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
4282 				return 0;
4283 			/* dreg <cond> sreg
4284 			 * Both dreg and sreg need precision before
4285 			 * this insn. If only sreg was marked precise
4286 			 * before it would be equally necessary to
4287 			 * propagate it to dreg.
4288 			 */
4289 			bt_set_reg(bt, dreg);
4290 			bt_set_reg(bt, sreg);
4291 		} else if (BPF_SRC(insn->code) == BPF_K) {
4292 			 /* dreg <cond> K
4293 			  * Only dreg still needs precision before
4294 			  * this insn, so for the K-based conditional
4295 			  * there is nothing new to be marked.
4296 			  */
4297 		}
4298 	} else if (class == BPF_LD) {
4299 		if (!bt_is_reg_set(bt, dreg))
4300 			return 0;
4301 		bt_clear_reg(bt, dreg);
4302 		/* It's ld_imm64 or ld_abs or ld_ind.
4303 		 * For ld_imm64 no further tracking of precision
4304 		 * into parent is necessary
4305 		 */
4306 		if (mode == BPF_IND || mode == BPF_ABS)
4307 			/* to be analyzed */
4308 			return -ENOTSUPP;
4309 	}
4310 	/* Propagate precision marks to linked registers, to account for
4311 	 * registers marked as precise in this function.
4312 	 */
4313 	bt_sync_linked_regs(bt, hist);
4314 	return 0;
4315 }
4316 
4317 /* the scalar precision tracking algorithm:
4318  * . at the start all registers have precise=false.
4319  * . scalar ranges are tracked as normal through alu and jmp insns.
4320  * . once precise value of the scalar register is used in:
4321  *   .  ptr + scalar alu
4322  *   . if (scalar cond K|scalar)
4323  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
4324  *   backtrack through the verifier states and mark all registers and
4325  *   stack slots with spilled constants that these scalar regisers
4326  *   should be precise.
4327  * . during state pruning two registers (or spilled stack slots)
4328  *   are equivalent if both are not precise.
4329  *
4330  * Note the verifier cannot simply walk register parentage chain,
4331  * since many different registers and stack slots could have been
4332  * used to compute single precise scalar.
4333  *
4334  * The approach of starting with precise=true for all registers and then
4335  * backtrack to mark a register as not precise when the verifier detects
4336  * that program doesn't care about specific value (e.g., when helper
4337  * takes register as ARG_ANYTHING parameter) is not safe.
4338  *
4339  * It's ok to walk single parentage chain of the verifier states.
4340  * It's possible that this backtracking will go all the way till 1st insn.
4341  * All other branches will be explored for needing precision later.
4342  *
4343  * The backtracking needs to deal with cases like:
4344  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
4345  * r9 -= r8
4346  * r5 = r9
4347  * if r5 > 0x79f goto pc+7
4348  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
4349  * r5 += 1
4350  * ...
4351  * call bpf_perf_event_output#25
4352  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
4353  *
4354  * and this case:
4355  * r6 = 1
4356  * call foo // uses callee's r6 inside to compute r0
4357  * r0 += r6
4358  * if r0 == 0 goto
4359  *
4360  * to track above reg_mask/stack_mask needs to be independent for each frame.
4361  *
4362  * Also if parent's curframe > frame where backtracking started,
4363  * the verifier need to mark registers in both frames, otherwise callees
4364  * may incorrectly prune callers. This is similar to
4365  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
4366  *
4367  * For now backtracking falls back into conservative marking.
4368  */
mark_all_scalars_precise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)4369 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
4370 				     struct bpf_verifier_state *st)
4371 {
4372 	struct bpf_func_state *func;
4373 	struct bpf_reg_state *reg;
4374 	int i, j;
4375 
4376 	if (env->log.level & BPF_LOG_LEVEL2) {
4377 		verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
4378 			st->curframe);
4379 	}
4380 
4381 	/* big hammer: mark all scalars precise in this path.
4382 	 * pop_stack may still get !precise scalars.
4383 	 * We also skip current state and go straight to first parent state,
4384 	 * because precision markings in current non-checkpointed state are
4385 	 * not needed. See why in the comment in __mark_chain_precision below.
4386 	 */
4387 	for (st = st->parent; st; st = st->parent) {
4388 		for (i = 0; i <= st->curframe; i++) {
4389 			func = st->frame[i];
4390 			for (j = 0; j < BPF_REG_FP; j++) {
4391 				reg = &func->regs[j];
4392 				if (reg->type != SCALAR_VALUE || reg->precise)
4393 					continue;
4394 				reg->precise = true;
4395 				if (env->log.level & BPF_LOG_LEVEL2) {
4396 					verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
4397 						i, j);
4398 				}
4399 			}
4400 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4401 				if (!is_spilled_reg(&func->stack[j]))
4402 					continue;
4403 				reg = &func->stack[j].spilled_ptr;
4404 				if (reg->type != SCALAR_VALUE || reg->precise)
4405 					continue;
4406 				reg->precise = true;
4407 				if (env->log.level & BPF_LOG_LEVEL2) {
4408 					verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
4409 						i, -(j + 1) * 8);
4410 				}
4411 			}
4412 		}
4413 	}
4414 }
4415 
mark_all_scalars_imprecise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)4416 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
4417 {
4418 	struct bpf_func_state *func;
4419 	struct bpf_reg_state *reg;
4420 	int i, j;
4421 
4422 	for (i = 0; i <= st->curframe; i++) {
4423 		func = st->frame[i];
4424 		for (j = 0; j < BPF_REG_FP; j++) {
4425 			reg = &func->regs[j];
4426 			if (reg->type != SCALAR_VALUE)
4427 				continue;
4428 			reg->precise = false;
4429 		}
4430 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4431 			if (!is_spilled_reg(&func->stack[j]))
4432 				continue;
4433 			reg = &func->stack[j].spilled_ptr;
4434 			if (reg->type != SCALAR_VALUE)
4435 				continue;
4436 			reg->precise = false;
4437 		}
4438 	}
4439 }
4440 
4441 /*
4442  * __mark_chain_precision() backtracks BPF program instruction sequence and
4443  * chain of verifier states making sure that register *regno* (if regno >= 0)
4444  * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
4445  * SCALARS, as well as any other registers and slots that contribute to
4446  * a tracked state of given registers/stack slots, depending on specific BPF
4447  * assembly instructions (see backtrack_insns() for exact instruction handling
4448  * logic). This backtracking relies on recorded insn_hist and is able to
4449  * traverse entire chain of parent states. This process ends only when all the
4450  * necessary registers/slots and their transitive dependencies are marked as
4451  * precise.
4452  *
4453  * One important and subtle aspect is that precise marks *do not matter* in
4454  * the currently verified state (current state). It is important to understand
4455  * why this is the case.
4456  *
4457  * First, note that current state is the state that is not yet "checkpointed",
4458  * i.e., it is not yet put into env->explored_states, and it has no children
4459  * states as well. It's ephemeral, and can end up either a) being discarded if
4460  * compatible explored state is found at some point or BPF_EXIT instruction is
4461  * reached or b) checkpointed and put into env->explored_states, branching out
4462  * into one or more children states.
4463  *
4464  * In the former case, precise markings in current state are completely
4465  * ignored by state comparison code (see regsafe() for details). Only
4466  * checkpointed ("old") state precise markings are important, and if old
4467  * state's register/slot is precise, regsafe() assumes current state's
4468  * register/slot as precise and checks value ranges exactly and precisely. If
4469  * states turn out to be compatible, current state's necessary precise
4470  * markings and any required parent states' precise markings are enforced
4471  * after the fact with propagate_precision() logic, after the fact. But it's
4472  * important to realize that in this case, even after marking current state
4473  * registers/slots as precise, we immediately discard current state. So what
4474  * actually matters is any of the precise markings propagated into current
4475  * state's parent states, which are always checkpointed (due to b) case above).
4476  * As such, for scenario a) it doesn't matter if current state has precise
4477  * markings set or not.
4478  *
4479  * Now, for the scenario b), checkpointing and forking into child(ren)
4480  * state(s). Note that before current state gets to checkpointing step, any
4481  * processed instruction always assumes precise SCALAR register/slot
4482  * knowledge: if precise value or range is useful to prune jump branch, BPF
4483  * verifier takes this opportunity enthusiastically. Similarly, when
4484  * register's value is used to calculate offset or memory address, exact
4485  * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4486  * what we mentioned above about state comparison ignoring precise markings
4487  * during state comparison, BPF verifier ignores and also assumes precise
4488  * markings *at will* during instruction verification process. But as verifier
4489  * assumes precision, it also propagates any precision dependencies across
4490  * parent states, which are not yet finalized, so can be further restricted
4491  * based on new knowledge gained from restrictions enforced by their children
4492  * states. This is so that once those parent states are finalized, i.e., when
4493  * they have no more active children state, state comparison logic in
4494  * is_state_visited() would enforce strict and precise SCALAR ranges, if
4495  * required for correctness.
4496  *
4497  * To build a bit more intuition, note also that once a state is checkpointed,
4498  * the path we took to get to that state is not important. This is crucial
4499  * property for state pruning. When state is checkpointed and finalized at
4500  * some instruction index, it can be correctly and safely used to "short
4501  * circuit" any *compatible* state that reaches exactly the same instruction
4502  * index. I.e., if we jumped to that instruction from a completely different
4503  * code path than original finalized state was derived from, it doesn't
4504  * matter, current state can be discarded because from that instruction
4505  * forward having a compatible state will ensure we will safely reach the
4506  * exit. States describe preconditions for further exploration, but completely
4507  * forget the history of how we got here.
4508  *
4509  * This also means that even if we needed precise SCALAR range to get to
4510  * finalized state, but from that point forward *that same* SCALAR register is
4511  * never used in a precise context (i.e., it's precise value is not needed for
4512  * correctness), it's correct and safe to mark such register as "imprecise"
4513  * (i.e., precise marking set to false). This is what we rely on when we do
4514  * not set precise marking in current state. If no child state requires
4515  * precision for any given SCALAR register, it's safe to dictate that it can
4516  * be imprecise. If any child state does require this register to be precise,
4517  * we'll mark it precise later retroactively during precise markings
4518  * propagation from child state to parent states.
4519  *
4520  * Skipping precise marking setting in current state is a mild version of
4521  * relying on the above observation. But we can utilize this property even
4522  * more aggressively by proactively forgetting any precise marking in the
4523  * current state (which we inherited from the parent state), right before we
4524  * checkpoint it and branch off into new child state. This is done by
4525  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
4526  * finalized states which help in short circuiting more future states.
4527  */
__mark_chain_precision(struct bpf_verifier_env * env,int regno)4528 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
4529 {
4530 	struct backtrack_state *bt = &env->bt;
4531 	struct bpf_verifier_state *st = env->cur_state;
4532 	int first_idx = st->first_insn_idx;
4533 	int last_idx = env->insn_idx;
4534 	int subseq_idx = -1;
4535 	struct bpf_func_state *func;
4536 	struct bpf_reg_state *reg;
4537 	bool skip_first = true;
4538 	int i, fr, err;
4539 
4540 	if (!env->bpf_capable)
4541 		return 0;
4542 
4543 	/* set frame number from which we are starting to backtrack */
4544 	bt_init(bt, env->cur_state->curframe);
4545 
4546 	/* Do sanity checks against current state of register and/or stack
4547 	 * slot, but don't set precise flag in current state, as precision
4548 	 * tracking in the current state is unnecessary.
4549 	 */
4550 	func = st->frame[bt->frame];
4551 	if (regno >= 0) {
4552 		reg = &func->regs[regno];
4553 		if (reg->type != SCALAR_VALUE) {
4554 			WARN_ONCE(1, "backtracing misuse");
4555 			return -EFAULT;
4556 		}
4557 		bt_set_reg(bt, regno);
4558 	}
4559 
4560 	if (bt_empty(bt))
4561 		return 0;
4562 
4563 	for (;;) {
4564 		DECLARE_BITMAP(mask, 64);
4565 		u32 hist_start = st->insn_hist_start;
4566 		u32 hist_end = st->insn_hist_end;
4567 		struct bpf_insn_hist_entry *hist;
4568 
4569 		if (env->log.level & BPF_LOG_LEVEL2) {
4570 			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4571 				bt->frame, last_idx, first_idx, subseq_idx);
4572 		}
4573 
4574 		if (last_idx < 0) {
4575 			/* we are at the entry into subprog, which
4576 			 * is expected for global funcs, but only if
4577 			 * requested precise registers are R1-R5
4578 			 * (which are global func's input arguments)
4579 			 */
4580 			if (st->curframe == 0 &&
4581 			    st->frame[0]->subprogno > 0 &&
4582 			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
4583 			    bt_stack_mask(bt) == 0 &&
4584 			    (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
4585 				bitmap_from_u64(mask, bt_reg_mask(bt));
4586 				for_each_set_bit(i, mask, 32) {
4587 					reg = &st->frame[0]->regs[i];
4588 					bt_clear_reg(bt, i);
4589 					if (reg->type == SCALAR_VALUE)
4590 						reg->precise = true;
4591 				}
4592 				return 0;
4593 			}
4594 
4595 			verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4596 				st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4597 			WARN_ONCE(1, "verifier backtracking bug");
4598 			return -EFAULT;
4599 		}
4600 
4601 		for (i = last_idx;;) {
4602 			if (skip_first) {
4603 				err = 0;
4604 				skip_first = false;
4605 			} else {
4606 				hist = get_insn_hist_entry(env, hist_start, hist_end, i);
4607 				err = backtrack_insn(env, i, subseq_idx, hist, bt);
4608 			}
4609 			if (err == -ENOTSUPP) {
4610 				mark_all_scalars_precise(env, env->cur_state);
4611 				bt_reset(bt);
4612 				return 0;
4613 			} else if (err) {
4614 				return err;
4615 			}
4616 			if (bt_empty(bt))
4617 				/* Found assignment(s) into tracked register in this state.
4618 				 * Since this state is already marked, just return.
4619 				 * Nothing to be tracked further in the parent state.
4620 				 */
4621 				return 0;
4622 			subseq_idx = i;
4623 			i = get_prev_insn_idx(env, st, i, hist_start, &hist_end);
4624 			if (i == -ENOENT)
4625 				break;
4626 			if (i >= env->prog->len) {
4627 				/* This can happen if backtracking reached insn 0
4628 				 * and there are still reg_mask or stack_mask
4629 				 * to backtrack.
4630 				 * It means the backtracking missed the spot where
4631 				 * particular register was initialized with a constant.
4632 				 */
4633 				verbose(env, "BUG backtracking idx %d\n", i);
4634 				WARN_ONCE(1, "verifier backtracking bug");
4635 				return -EFAULT;
4636 			}
4637 		}
4638 		st = st->parent;
4639 		if (!st)
4640 			break;
4641 
4642 		for (fr = bt->frame; fr >= 0; fr--) {
4643 			func = st->frame[fr];
4644 			bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
4645 			for_each_set_bit(i, mask, 32) {
4646 				reg = &func->regs[i];
4647 				if (reg->type != SCALAR_VALUE) {
4648 					bt_clear_frame_reg(bt, fr, i);
4649 					continue;
4650 				}
4651 				if (reg->precise)
4652 					bt_clear_frame_reg(bt, fr, i);
4653 				else
4654 					reg->precise = true;
4655 			}
4656 
4657 			bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
4658 			for_each_set_bit(i, mask, 64) {
4659 				if (i >= func->allocated_stack / BPF_REG_SIZE) {
4660 					verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
4661 						i, func->allocated_stack / BPF_REG_SIZE);
4662 					WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
4663 					return -EFAULT;
4664 				}
4665 
4666 				if (!is_spilled_scalar_reg(&func->stack[i])) {
4667 					bt_clear_frame_slot(bt, fr, i);
4668 					continue;
4669 				}
4670 				reg = &func->stack[i].spilled_ptr;
4671 				if (reg->precise)
4672 					bt_clear_frame_slot(bt, fr, i);
4673 				else
4674 					reg->precise = true;
4675 			}
4676 			if (env->log.level & BPF_LOG_LEVEL2) {
4677 				fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4678 					     bt_frame_reg_mask(bt, fr));
4679 				verbose(env, "mark_precise: frame%d: parent state regs=%s ",
4680 					fr, env->tmp_str_buf);
4681 				fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4682 					       bt_frame_stack_mask(bt, fr));
4683 				verbose(env, "stack=%s: ", env->tmp_str_buf);
4684 				print_verifier_state(env, st, fr, true);
4685 			}
4686 		}
4687 
4688 		if (bt_empty(bt))
4689 			return 0;
4690 
4691 		subseq_idx = first_idx;
4692 		last_idx = st->last_insn_idx;
4693 		first_idx = st->first_insn_idx;
4694 	}
4695 
4696 	/* if we still have requested precise regs or slots, we missed
4697 	 * something (e.g., stack access through non-r10 register), so
4698 	 * fallback to marking all precise
4699 	 */
4700 	if (!bt_empty(bt)) {
4701 		mark_all_scalars_precise(env, env->cur_state);
4702 		bt_reset(bt);
4703 	}
4704 
4705 	return 0;
4706 }
4707 
mark_chain_precision(struct bpf_verifier_env * env,int regno)4708 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
4709 {
4710 	return __mark_chain_precision(env, regno);
4711 }
4712 
4713 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
4714  * desired reg and stack masks across all relevant frames
4715  */
mark_chain_precision_batch(struct bpf_verifier_env * env)4716 static int mark_chain_precision_batch(struct bpf_verifier_env *env)
4717 {
4718 	return __mark_chain_precision(env, -1);
4719 }
4720 
is_spillable_regtype(enum bpf_reg_type type)4721 static bool is_spillable_regtype(enum bpf_reg_type type)
4722 {
4723 	switch (base_type(type)) {
4724 	case PTR_TO_MAP_VALUE:
4725 	case PTR_TO_STACK:
4726 	case PTR_TO_CTX:
4727 	case PTR_TO_PACKET:
4728 	case PTR_TO_PACKET_META:
4729 	case PTR_TO_PACKET_END:
4730 	case PTR_TO_FLOW_KEYS:
4731 	case CONST_PTR_TO_MAP:
4732 	case PTR_TO_SOCKET:
4733 	case PTR_TO_SOCK_COMMON:
4734 	case PTR_TO_TCP_SOCK:
4735 	case PTR_TO_XDP_SOCK:
4736 	case PTR_TO_BTF_ID:
4737 	case PTR_TO_BUF:
4738 	case PTR_TO_MEM:
4739 	case PTR_TO_FUNC:
4740 	case PTR_TO_MAP_KEY:
4741 	case PTR_TO_ARENA:
4742 		return true;
4743 	default:
4744 		return false;
4745 	}
4746 }
4747 
4748 /* Does this register contain a constant zero? */
register_is_null(struct bpf_reg_state * reg)4749 static bool register_is_null(struct bpf_reg_state *reg)
4750 {
4751 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
4752 }
4753 
4754 /* check if register is a constant scalar value */
is_reg_const(struct bpf_reg_state * reg,bool subreg32)4755 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
4756 {
4757 	return reg->type == SCALAR_VALUE &&
4758 	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
4759 }
4760 
4761 /* assuming is_reg_const() is true, return constant value of a register */
reg_const_value(struct bpf_reg_state * reg,bool subreg32)4762 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
4763 {
4764 	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
4765 }
4766 
__is_pointer_value(bool allow_ptr_leaks,const struct bpf_reg_state * reg)4767 static bool __is_pointer_value(bool allow_ptr_leaks,
4768 			       const struct bpf_reg_state *reg)
4769 {
4770 	if (allow_ptr_leaks)
4771 		return false;
4772 
4773 	return reg->type != SCALAR_VALUE;
4774 }
4775 
assign_scalar_id_before_mov(struct bpf_verifier_env * env,struct bpf_reg_state * src_reg)4776 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
4777 					struct bpf_reg_state *src_reg)
4778 {
4779 	if (src_reg->type != SCALAR_VALUE)
4780 		return;
4781 
4782 	if (src_reg->id & BPF_ADD_CONST) {
4783 		/*
4784 		 * The verifier is processing rX = rY insn and
4785 		 * rY->id has special linked register already.
4786 		 * Cleared it, since multiple rX += const are not supported.
4787 		 */
4788 		src_reg->id = 0;
4789 		src_reg->off = 0;
4790 	}
4791 
4792 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
4793 		/* Ensure that src_reg has a valid ID that will be copied to
4794 		 * dst_reg and then will be used by sync_linked_regs() to
4795 		 * propagate min/max range.
4796 		 */
4797 		src_reg->id = ++env->id_gen;
4798 }
4799 
4800 /* Copy src state preserving dst->parent and dst->live fields */
copy_register_state(struct bpf_reg_state * dst,const struct bpf_reg_state * src)4801 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
4802 {
4803 	struct bpf_reg_state *parent = dst->parent;
4804 	enum bpf_reg_liveness live = dst->live;
4805 
4806 	*dst = *src;
4807 	dst->parent = parent;
4808 	dst->live = live;
4809 }
4810 
save_register_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi,struct bpf_reg_state * reg,int size)4811 static void save_register_state(struct bpf_verifier_env *env,
4812 				struct bpf_func_state *state,
4813 				int spi, struct bpf_reg_state *reg,
4814 				int size)
4815 {
4816 	int i;
4817 
4818 	copy_register_state(&state->stack[spi].spilled_ptr, reg);
4819 	if (size == BPF_REG_SIZE)
4820 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4821 
4822 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
4823 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
4824 
4825 	/* size < 8 bytes spill */
4826 	for (; i; i--)
4827 		mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
4828 }
4829 
is_bpf_st_mem(struct bpf_insn * insn)4830 static bool is_bpf_st_mem(struct bpf_insn *insn)
4831 {
4832 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
4833 }
4834 
get_reg_width(struct bpf_reg_state * reg)4835 static int get_reg_width(struct bpf_reg_state *reg)
4836 {
4837 	return fls64(reg->umax_value);
4838 }
4839 
4840 /* See comment for mark_fastcall_pattern_for_call() */
check_fastcall_stack_contract(struct bpf_verifier_env * env,struct bpf_func_state * state,int insn_idx,int off)4841 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
4842 					  struct bpf_func_state *state, int insn_idx, int off)
4843 {
4844 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4845 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
4846 	int i;
4847 
4848 	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
4849 		return;
4850 	/* access to the region [max_stack_depth .. fastcall_stack_off)
4851 	 * from something that is not a part of the fastcall pattern,
4852 	 * disable fastcall rewrites for current subprogram by setting
4853 	 * fastcall_stack_off to a value smaller than any possible offset.
4854 	 */
4855 	subprog->fastcall_stack_off = S16_MIN;
4856 	/* reset fastcall aux flags within subprogram,
4857 	 * happens at most once per subprogram
4858 	 */
4859 	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
4860 		aux[i].fastcall_spills_num = 0;
4861 		aux[i].fastcall_pattern = 0;
4862 	}
4863 }
4864 
4865 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
4866  * stack boundary and alignment are checked in check_mem_access()
4867  */
check_stack_write_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int off,int size,int value_regno,int insn_idx)4868 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
4869 				       /* stack frame we're writing to */
4870 				       struct bpf_func_state *state,
4871 				       int off, int size, int value_regno,
4872 				       int insn_idx)
4873 {
4874 	struct bpf_func_state *cur; /* state of the current function */
4875 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
4876 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4877 	struct bpf_reg_state *reg = NULL;
4878 	int insn_flags = insn_stack_access_flags(state->frameno, spi);
4879 
4880 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
4881 	 * so it's aligned access and [off, off + size) are within stack limits
4882 	 */
4883 	if (!env->allow_ptr_leaks &&
4884 	    is_spilled_reg(&state->stack[spi]) &&
4885 	    !is_spilled_scalar_reg(&state->stack[spi]) &&
4886 	    size != BPF_REG_SIZE) {
4887 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
4888 		return -EACCES;
4889 	}
4890 
4891 	cur = env->cur_state->frame[env->cur_state->curframe];
4892 	if (value_regno >= 0)
4893 		reg = &cur->regs[value_regno];
4894 	if (!env->bypass_spec_v4) {
4895 		bool sanitize = reg && is_spillable_regtype(reg->type);
4896 
4897 		for (i = 0; i < size; i++) {
4898 			u8 type = state->stack[spi].slot_type[i];
4899 
4900 			if (type != STACK_MISC && type != STACK_ZERO) {
4901 				sanitize = true;
4902 				break;
4903 			}
4904 		}
4905 
4906 		if (sanitize)
4907 			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
4908 	}
4909 
4910 	err = destroy_if_dynptr_stack_slot(env, state, spi);
4911 	if (err)
4912 		return err;
4913 
4914 	check_fastcall_stack_contract(env, state, insn_idx, off);
4915 	mark_stack_slot_scratched(env, spi);
4916 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
4917 		bool reg_value_fits;
4918 
4919 		reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
4920 		/* Make sure that reg had an ID to build a relation on spill. */
4921 		if (reg_value_fits)
4922 			assign_scalar_id_before_mov(env, reg);
4923 		save_register_state(env, state, spi, reg, size);
4924 		/* Break the relation on a narrowing spill. */
4925 		if (!reg_value_fits)
4926 			state->stack[spi].spilled_ptr.id = 0;
4927 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
4928 		   env->bpf_capable) {
4929 		struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
4930 
4931 		memset(tmp_reg, 0, sizeof(*tmp_reg));
4932 		__mark_reg_known(tmp_reg, insn->imm);
4933 		tmp_reg->type = SCALAR_VALUE;
4934 		save_register_state(env, state, spi, tmp_reg, size);
4935 	} else if (reg && is_spillable_regtype(reg->type)) {
4936 		/* register containing pointer is being spilled into stack */
4937 		if (size != BPF_REG_SIZE) {
4938 			verbose_linfo(env, insn_idx, "; ");
4939 			verbose(env, "invalid size of register spill\n");
4940 			return -EACCES;
4941 		}
4942 		if (state != cur && reg->type == PTR_TO_STACK) {
4943 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
4944 			return -EINVAL;
4945 		}
4946 		save_register_state(env, state, spi, reg, size);
4947 	} else {
4948 		u8 type = STACK_MISC;
4949 
4950 		/* regular write of data into stack destroys any spilled ptr */
4951 		state->stack[spi].spilled_ptr.type = NOT_INIT;
4952 		/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
4953 		if (is_stack_slot_special(&state->stack[spi]))
4954 			for (i = 0; i < BPF_REG_SIZE; i++)
4955 				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
4956 
4957 		/* only mark the slot as written if all 8 bytes were written
4958 		 * otherwise read propagation may incorrectly stop too soon
4959 		 * when stack slots are partially written.
4960 		 * This heuristic means that read propagation will be
4961 		 * conservative, since it will add reg_live_read marks
4962 		 * to stack slots all the way to first state when programs
4963 		 * writes+reads less than 8 bytes
4964 		 */
4965 		if (size == BPF_REG_SIZE)
4966 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4967 
4968 		/* when we zero initialize stack slots mark them as such */
4969 		if ((reg && register_is_null(reg)) ||
4970 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
4971 			/* STACK_ZERO case happened because register spill
4972 			 * wasn't properly aligned at the stack slot boundary,
4973 			 * so it's not a register spill anymore; force
4974 			 * originating register to be precise to make
4975 			 * STACK_ZERO correct for subsequent states
4976 			 */
4977 			err = mark_chain_precision(env, value_regno);
4978 			if (err)
4979 				return err;
4980 			type = STACK_ZERO;
4981 		}
4982 
4983 		/* Mark slots affected by this stack write. */
4984 		for (i = 0; i < size; i++)
4985 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
4986 		insn_flags = 0; /* not a register spill */
4987 	}
4988 
4989 	if (insn_flags)
4990 		return push_insn_history(env, env->cur_state, insn_flags, 0);
4991 	return 0;
4992 }
4993 
4994 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
4995  * known to contain a variable offset.
4996  * This function checks whether the write is permitted and conservatively
4997  * tracks the effects of the write, considering that each stack slot in the
4998  * dynamic range is potentially written to.
4999  *
5000  * 'off' includes 'regno->off'.
5001  * 'value_regno' can be -1, meaning that an unknown value is being written to
5002  * the stack.
5003  *
5004  * Spilled pointers in range are not marked as written because we don't know
5005  * what's going to be actually written. This means that read propagation for
5006  * future reads cannot be terminated by this write.
5007  *
5008  * For privileged programs, uninitialized stack slots are considered
5009  * initialized by this write (even though we don't know exactly what offsets
5010  * are going to be written to). The idea is that we don't want the verifier to
5011  * reject future reads that access slots written to through variable offsets.
5012  */
check_stack_write_var_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int ptr_regno,int off,int size,int value_regno,int insn_idx)5013 static int check_stack_write_var_off(struct bpf_verifier_env *env,
5014 				     /* func where register points to */
5015 				     struct bpf_func_state *state,
5016 				     int ptr_regno, int off, int size,
5017 				     int value_regno, int insn_idx)
5018 {
5019 	struct bpf_func_state *cur; /* state of the current function */
5020 	int min_off, max_off;
5021 	int i, err;
5022 	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
5023 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5024 	bool writing_zero = false;
5025 	/* set if the fact that we're writing a zero is used to let any
5026 	 * stack slots remain STACK_ZERO
5027 	 */
5028 	bool zero_used = false;
5029 
5030 	cur = env->cur_state->frame[env->cur_state->curframe];
5031 	ptr_reg = &cur->regs[ptr_regno];
5032 	min_off = ptr_reg->smin_value + off;
5033 	max_off = ptr_reg->smax_value + off + size;
5034 	if (value_regno >= 0)
5035 		value_reg = &cur->regs[value_regno];
5036 	if ((value_reg && register_is_null(value_reg)) ||
5037 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
5038 		writing_zero = true;
5039 
5040 	for (i = min_off; i < max_off; i++) {
5041 		int spi;
5042 
5043 		spi = __get_spi(i);
5044 		err = destroy_if_dynptr_stack_slot(env, state, spi);
5045 		if (err)
5046 			return err;
5047 	}
5048 
5049 	check_fastcall_stack_contract(env, state, insn_idx, min_off);
5050 	/* Variable offset writes destroy any spilled pointers in range. */
5051 	for (i = min_off; i < max_off; i++) {
5052 		u8 new_type, *stype;
5053 		int slot, spi;
5054 
5055 		slot = -i - 1;
5056 		spi = slot / BPF_REG_SIZE;
5057 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5058 		mark_stack_slot_scratched(env, spi);
5059 
5060 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
5061 			/* Reject the write if range we may write to has not
5062 			 * been initialized beforehand. If we didn't reject
5063 			 * here, the ptr status would be erased below (even
5064 			 * though not all slots are actually overwritten),
5065 			 * possibly opening the door to leaks.
5066 			 *
5067 			 * We do however catch STACK_INVALID case below, and
5068 			 * only allow reading possibly uninitialized memory
5069 			 * later for CAP_PERFMON, as the write may not happen to
5070 			 * that slot.
5071 			 */
5072 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
5073 				insn_idx, i);
5074 			return -EINVAL;
5075 		}
5076 
5077 		/* If writing_zero and the spi slot contains a spill of value 0,
5078 		 * maintain the spill type.
5079 		 */
5080 		if (writing_zero && *stype == STACK_SPILL &&
5081 		    is_spilled_scalar_reg(&state->stack[spi])) {
5082 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
5083 
5084 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
5085 				zero_used = true;
5086 				continue;
5087 			}
5088 		}
5089 
5090 		/* Erase all other spilled pointers. */
5091 		state->stack[spi].spilled_ptr.type = NOT_INIT;
5092 
5093 		/* Update the slot type. */
5094 		new_type = STACK_MISC;
5095 		if (writing_zero && *stype == STACK_ZERO) {
5096 			new_type = STACK_ZERO;
5097 			zero_used = true;
5098 		}
5099 		/* If the slot is STACK_INVALID, we check whether it's OK to
5100 		 * pretend that it will be initialized by this write. The slot
5101 		 * might not actually be written to, and so if we mark it as
5102 		 * initialized future reads might leak uninitialized memory.
5103 		 * For privileged programs, we will accept such reads to slots
5104 		 * that may or may not be written because, if we're reject
5105 		 * them, the error would be too confusing.
5106 		 */
5107 		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
5108 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
5109 					insn_idx, i);
5110 			return -EINVAL;
5111 		}
5112 		*stype = new_type;
5113 	}
5114 	if (zero_used) {
5115 		/* backtracking doesn't work for STACK_ZERO yet. */
5116 		err = mark_chain_precision(env, value_regno);
5117 		if (err)
5118 			return err;
5119 	}
5120 	return 0;
5121 }
5122 
5123 /* When register 'dst_regno' is assigned some values from stack[min_off,
5124  * max_off), we set the register's type according to the types of the
5125  * respective stack slots. If all the stack values are known to be zeros, then
5126  * so is the destination reg. Otherwise, the register is considered to be
5127  * SCALAR. This function does not deal with register filling; the caller must
5128  * ensure that all spilled registers in the stack range have been marked as
5129  * read.
5130  */
mark_reg_stack_read(struct bpf_verifier_env * env,struct bpf_func_state * ptr_state,int min_off,int max_off,int dst_regno)5131 static void mark_reg_stack_read(struct bpf_verifier_env *env,
5132 				/* func where src register points to */
5133 				struct bpf_func_state *ptr_state,
5134 				int min_off, int max_off, int dst_regno)
5135 {
5136 	struct bpf_verifier_state *vstate = env->cur_state;
5137 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5138 	int i, slot, spi;
5139 	u8 *stype;
5140 	int zeros = 0;
5141 
5142 	for (i = min_off; i < max_off; i++) {
5143 		slot = -i - 1;
5144 		spi = slot / BPF_REG_SIZE;
5145 		mark_stack_slot_scratched(env, spi);
5146 		stype = ptr_state->stack[spi].slot_type;
5147 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
5148 			break;
5149 		zeros++;
5150 	}
5151 	if (zeros == max_off - min_off) {
5152 		/* Any access_size read into register is zero extended,
5153 		 * so the whole register == const_zero.
5154 		 */
5155 		__mark_reg_const_zero(env, &state->regs[dst_regno]);
5156 	} else {
5157 		/* have read misc data from the stack */
5158 		mark_reg_unknown(env, state->regs, dst_regno);
5159 	}
5160 	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
5161 }
5162 
5163 /* Read the stack at 'off' and put the results into the register indicated by
5164  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
5165  * spilled reg.
5166  *
5167  * 'dst_regno' can be -1, meaning that the read value is not going to a
5168  * register.
5169  *
5170  * The access is assumed to be within the current stack bounds.
5171  */
check_stack_read_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * reg_state,int off,int size,int dst_regno)5172 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
5173 				      /* func where src register points to */
5174 				      struct bpf_func_state *reg_state,
5175 				      int off, int size, int dst_regno)
5176 {
5177 	struct bpf_verifier_state *vstate = env->cur_state;
5178 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5179 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
5180 	struct bpf_reg_state *reg;
5181 	u8 *stype, type;
5182 	int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
5183 
5184 	stype = reg_state->stack[spi].slot_type;
5185 	reg = &reg_state->stack[spi].spilled_ptr;
5186 
5187 	mark_stack_slot_scratched(env, spi);
5188 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
5189 
5190 	if (is_spilled_reg(&reg_state->stack[spi])) {
5191 		u8 spill_size = 1;
5192 
5193 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
5194 			spill_size++;
5195 
5196 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
5197 			if (reg->type != SCALAR_VALUE) {
5198 				verbose_linfo(env, env->insn_idx, "; ");
5199 				verbose(env, "invalid size of register fill\n");
5200 				return -EACCES;
5201 			}
5202 
5203 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
5204 			if (dst_regno < 0)
5205 				return 0;
5206 
5207 			if (size <= spill_size &&
5208 			    bpf_stack_narrow_access_ok(off, size, spill_size)) {
5209 				/* The earlier check_reg_arg() has decided the
5210 				 * subreg_def for this insn.  Save it first.
5211 				 */
5212 				s32 subreg_def = state->regs[dst_regno].subreg_def;
5213 
5214 				copy_register_state(&state->regs[dst_regno], reg);
5215 				state->regs[dst_regno].subreg_def = subreg_def;
5216 
5217 				/* Break the relation on a narrowing fill.
5218 				 * coerce_reg_to_size will adjust the boundaries.
5219 				 */
5220 				if (get_reg_width(reg) > size * BITS_PER_BYTE)
5221 					state->regs[dst_regno].id = 0;
5222 			} else {
5223 				int spill_cnt = 0, zero_cnt = 0;
5224 
5225 				for (i = 0; i < size; i++) {
5226 					type = stype[(slot - i) % BPF_REG_SIZE];
5227 					if (type == STACK_SPILL) {
5228 						spill_cnt++;
5229 						continue;
5230 					}
5231 					if (type == STACK_MISC)
5232 						continue;
5233 					if (type == STACK_ZERO) {
5234 						zero_cnt++;
5235 						continue;
5236 					}
5237 					if (type == STACK_INVALID && env->allow_uninit_stack)
5238 						continue;
5239 					verbose(env, "invalid read from stack off %d+%d size %d\n",
5240 						off, i, size);
5241 					return -EACCES;
5242 				}
5243 
5244 				if (spill_cnt == size &&
5245 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
5246 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
5247 					/* this IS register fill, so keep insn_flags */
5248 				} else if (zero_cnt == size) {
5249 					/* similarly to mark_reg_stack_read(), preserve zeroes */
5250 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
5251 					insn_flags = 0; /* not restoring original register state */
5252 				} else {
5253 					mark_reg_unknown(env, state->regs, dst_regno);
5254 					insn_flags = 0; /* not restoring original register state */
5255 				}
5256 			}
5257 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
5258 		} else if (dst_regno >= 0) {
5259 			/* restore register state from stack */
5260 			copy_register_state(&state->regs[dst_regno], reg);
5261 			/* mark reg as written since spilled pointer state likely
5262 			 * has its liveness marks cleared by is_state_visited()
5263 			 * which resets stack/reg liveness for state transitions
5264 			 */
5265 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
5266 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
5267 			/* If dst_regno==-1, the caller is asking us whether
5268 			 * it is acceptable to use this value as a SCALAR_VALUE
5269 			 * (e.g. for XADD).
5270 			 * We must not allow unprivileged callers to do that
5271 			 * with spilled pointers.
5272 			 */
5273 			verbose(env, "leaking pointer from stack off %d\n",
5274 				off);
5275 			return -EACCES;
5276 		}
5277 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
5278 	} else {
5279 		for (i = 0; i < size; i++) {
5280 			type = stype[(slot - i) % BPF_REG_SIZE];
5281 			if (type == STACK_MISC)
5282 				continue;
5283 			if (type == STACK_ZERO)
5284 				continue;
5285 			if (type == STACK_INVALID && env->allow_uninit_stack)
5286 				continue;
5287 			verbose(env, "invalid read from stack off %d+%d size %d\n",
5288 				off, i, size);
5289 			return -EACCES;
5290 		}
5291 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
5292 		if (dst_regno >= 0)
5293 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
5294 		insn_flags = 0; /* we are not restoring spilled register */
5295 	}
5296 	if (insn_flags)
5297 		return push_insn_history(env, env->cur_state, insn_flags, 0);
5298 	return 0;
5299 }
5300 
5301 enum bpf_access_src {
5302 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
5303 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
5304 };
5305 
5306 static int check_stack_range_initialized(struct bpf_verifier_env *env,
5307 					 int regno, int off, int access_size,
5308 					 bool zero_size_allowed,
5309 					 enum bpf_access_type type,
5310 					 struct bpf_call_arg_meta *meta);
5311 
reg_state(struct bpf_verifier_env * env,int regno)5312 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
5313 {
5314 	return cur_regs(env) + regno;
5315 }
5316 
5317 /* Read the stack at 'ptr_regno + off' and put the result into the register
5318  * 'dst_regno'.
5319  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
5320  * but not its variable offset.
5321  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
5322  *
5323  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
5324  * filling registers (i.e. reads of spilled register cannot be detected when
5325  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
5326  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
5327  * offset; for a fixed offset check_stack_read_fixed_off should be used
5328  * instead.
5329  */
check_stack_read_var_off(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)5330 static int check_stack_read_var_off(struct bpf_verifier_env *env,
5331 				    int ptr_regno, int off, int size, int dst_regno)
5332 {
5333 	/* The state of the source register. */
5334 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5335 	struct bpf_func_state *ptr_state = func(env, reg);
5336 	int err;
5337 	int min_off, max_off;
5338 
5339 	/* Note that we pass a NULL meta, so raw access will not be permitted.
5340 	 */
5341 	err = check_stack_range_initialized(env, ptr_regno, off, size,
5342 					    false, BPF_READ, NULL);
5343 	if (err)
5344 		return err;
5345 
5346 	min_off = reg->smin_value + off;
5347 	max_off = reg->smax_value + off;
5348 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
5349 	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
5350 	return 0;
5351 }
5352 
5353 /* check_stack_read dispatches to check_stack_read_fixed_off or
5354  * check_stack_read_var_off.
5355  *
5356  * The caller must ensure that the offset falls within the allocated stack
5357  * bounds.
5358  *
5359  * 'dst_regno' is a register which will receive the value from the stack. It
5360  * can be -1, meaning that the read value is not going to a register.
5361  */
check_stack_read(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)5362 static int check_stack_read(struct bpf_verifier_env *env,
5363 			    int ptr_regno, int off, int size,
5364 			    int dst_regno)
5365 {
5366 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5367 	struct bpf_func_state *state = func(env, reg);
5368 	int err;
5369 	/* Some accesses are only permitted with a static offset. */
5370 	bool var_off = !tnum_is_const(reg->var_off);
5371 
5372 	/* The offset is required to be static when reads don't go to a
5373 	 * register, in order to not leak pointers (see
5374 	 * check_stack_read_fixed_off).
5375 	 */
5376 	if (dst_regno < 0 && var_off) {
5377 		char tn_buf[48];
5378 
5379 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5380 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
5381 			tn_buf, off, size);
5382 		return -EACCES;
5383 	}
5384 	/* Variable offset is prohibited for unprivileged mode for simplicity
5385 	 * since it requires corresponding support in Spectre masking for stack
5386 	 * ALU. See also retrieve_ptr_limit(). The check in
5387 	 * check_stack_access_for_ptr_arithmetic() called by
5388 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
5389 	 * with variable offsets, therefore no check is required here. Further,
5390 	 * just checking it here would be insufficient as speculative stack
5391 	 * writes could still lead to unsafe speculative behaviour.
5392 	 */
5393 	if (!var_off) {
5394 		off += reg->var_off.value;
5395 		err = check_stack_read_fixed_off(env, state, off, size,
5396 						 dst_regno);
5397 	} else {
5398 		/* Variable offset stack reads need more conservative handling
5399 		 * than fixed offset ones. Note that dst_regno >= 0 on this
5400 		 * branch.
5401 		 */
5402 		err = check_stack_read_var_off(env, ptr_regno, off, size,
5403 					       dst_regno);
5404 	}
5405 	return err;
5406 }
5407 
5408 
5409 /* check_stack_write dispatches to check_stack_write_fixed_off or
5410  * check_stack_write_var_off.
5411  *
5412  * 'ptr_regno' is the register used as a pointer into the stack.
5413  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5414  * 'value_regno' is the register whose value we're writing to the stack. It can
5415  * be -1, meaning that we're not writing from a register.
5416  *
5417  * The caller must ensure that the offset falls within the maximum stack size.
5418  */
check_stack_write(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int value_regno,int insn_idx)5419 static int check_stack_write(struct bpf_verifier_env *env,
5420 			     int ptr_regno, int off, int size,
5421 			     int value_regno, int insn_idx)
5422 {
5423 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5424 	struct bpf_func_state *state = func(env, reg);
5425 	int err;
5426 
5427 	if (tnum_is_const(reg->var_off)) {
5428 		off += reg->var_off.value;
5429 		err = check_stack_write_fixed_off(env, state, off, size,
5430 						  value_regno, insn_idx);
5431 	} else {
5432 		/* Variable offset stack reads need more conservative handling
5433 		 * than fixed offset ones.
5434 		 */
5435 		err = check_stack_write_var_off(env, state,
5436 						ptr_regno, off, size,
5437 						value_regno, insn_idx);
5438 	}
5439 	return err;
5440 }
5441 
check_map_access_type(struct bpf_verifier_env * env,u32 regno,int off,int size,enum bpf_access_type type)5442 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
5443 				 int off, int size, enum bpf_access_type type)
5444 {
5445 	struct bpf_reg_state *regs = cur_regs(env);
5446 	struct bpf_map *map = regs[regno].map_ptr;
5447 	u32 cap = bpf_map_flags_to_cap(map);
5448 
5449 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
5450 		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
5451 			map->value_size, off, size);
5452 		return -EACCES;
5453 	}
5454 
5455 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
5456 		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
5457 			map->value_size, off, size);
5458 		return -EACCES;
5459 	}
5460 
5461 	return 0;
5462 }
5463 
5464 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env * env,int regno,int off,int size,u32 mem_size,bool zero_size_allowed)5465 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
5466 			      int off, int size, u32 mem_size,
5467 			      bool zero_size_allowed)
5468 {
5469 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
5470 	struct bpf_reg_state *reg;
5471 
5472 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
5473 		return 0;
5474 
5475 	reg = &cur_regs(env)[regno];
5476 	switch (reg->type) {
5477 	case PTR_TO_MAP_KEY:
5478 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
5479 			mem_size, off, size);
5480 		break;
5481 	case PTR_TO_MAP_VALUE:
5482 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
5483 			mem_size, off, size);
5484 		break;
5485 	case PTR_TO_PACKET:
5486 	case PTR_TO_PACKET_META:
5487 	case PTR_TO_PACKET_END:
5488 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5489 			off, size, regno, reg->id, off, mem_size);
5490 		break;
5491 	case PTR_TO_MEM:
5492 	default:
5493 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
5494 			mem_size, off, size);
5495 	}
5496 
5497 	return -EACCES;
5498 }
5499 
5500 /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env * env,u32 regno,int off,int size,u32 mem_size,bool zero_size_allowed)5501 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
5502 				   int off, int size, u32 mem_size,
5503 				   bool zero_size_allowed)
5504 {
5505 	struct bpf_verifier_state *vstate = env->cur_state;
5506 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5507 	struct bpf_reg_state *reg = &state->regs[regno];
5508 	int err;
5509 
5510 	/* We may have adjusted the register pointing to memory region, so we
5511 	 * need to try adding each of min_value and max_value to off
5512 	 * to make sure our theoretical access will be safe.
5513 	 *
5514 	 * The minimum value is only important with signed
5515 	 * comparisons where we can't assume the floor of a
5516 	 * value is 0.  If we are using signed variables for our
5517 	 * index'es we need to make sure that whatever we use
5518 	 * will have a set floor within our range.
5519 	 */
5520 	if (reg->smin_value < 0 &&
5521 	    (reg->smin_value == S64_MIN ||
5522 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
5523 	      reg->smin_value + off < 0)) {
5524 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5525 			regno);
5526 		return -EACCES;
5527 	}
5528 	err = __check_mem_access(env, regno, reg->smin_value + off, size,
5529 				 mem_size, zero_size_allowed);
5530 	if (err) {
5531 		verbose(env, "R%d min value is outside of the allowed memory range\n",
5532 			regno);
5533 		return err;
5534 	}
5535 
5536 	/* If we haven't set a max value then we need to bail since we can't be
5537 	 * sure we won't do bad things.
5538 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
5539 	 */
5540 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
5541 		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
5542 			regno);
5543 		return -EACCES;
5544 	}
5545 	err = __check_mem_access(env, regno, reg->umax_value + off, size,
5546 				 mem_size, zero_size_allowed);
5547 	if (err) {
5548 		verbose(env, "R%d max value is outside of the allowed memory range\n",
5549 			regno);
5550 		return err;
5551 	}
5552 
5553 	return 0;
5554 }
5555 
__check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,bool fixed_off_ok)5556 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
5557 			       const struct bpf_reg_state *reg, int regno,
5558 			       bool fixed_off_ok)
5559 {
5560 	/* Access to this pointer-typed register or passing it to a helper
5561 	 * is only allowed in its original, unmodified form.
5562 	 */
5563 
5564 	if (reg->off < 0) {
5565 		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
5566 			reg_type_str(env, reg->type), regno, reg->off);
5567 		return -EACCES;
5568 	}
5569 
5570 	if (!fixed_off_ok && reg->off) {
5571 		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
5572 			reg_type_str(env, reg->type), regno, reg->off);
5573 		return -EACCES;
5574 	}
5575 
5576 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5577 		char tn_buf[48];
5578 
5579 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5580 		verbose(env, "variable %s access var_off=%s disallowed\n",
5581 			reg_type_str(env, reg->type), tn_buf);
5582 		return -EACCES;
5583 	}
5584 
5585 	return 0;
5586 }
5587 
check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno)5588 static int check_ptr_off_reg(struct bpf_verifier_env *env,
5589 		             const struct bpf_reg_state *reg, int regno)
5590 {
5591 	return __check_ptr_off_reg(env, reg, regno, false);
5592 }
5593 
map_kptr_match_type(struct bpf_verifier_env * env,struct btf_field * kptr_field,struct bpf_reg_state * reg,u32 regno)5594 static int map_kptr_match_type(struct bpf_verifier_env *env,
5595 			       struct btf_field *kptr_field,
5596 			       struct bpf_reg_state *reg, u32 regno)
5597 {
5598 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
5599 	int perm_flags;
5600 	const char *reg_name = "";
5601 
5602 	if (btf_is_kernel(reg->btf)) {
5603 		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
5604 
5605 		/* Only unreferenced case accepts untrusted pointers */
5606 		if (kptr_field->type == BPF_KPTR_UNREF)
5607 			perm_flags |= PTR_UNTRUSTED;
5608 	} else {
5609 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
5610 		if (kptr_field->type == BPF_KPTR_PERCPU)
5611 			perm_flags |= MEM_PERCPU;
5612 	}
5613 
5614 	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
5615 		goto bad_type;
5616 
5617 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
5618 	reg_name = btf_type_name(reg->btf, reg->btf_id);
5619 
5620 	/* For ref_ptr case, release function check should ensure we get one
5621 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5622 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
5623 	 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
5624 	 * reg->off and reg->ref_obj_id are not needed here.
5625 	 */
5626 	if (__check_ptr_off_reg(env, reg, regno, true))
5627 		return -EACCES;
5628 
5629 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
5630 	 * we also need to take into account the reg->off.
5631 	 *
5632 	 * We want to support cases like:
5633 	 *
5634 	 * struct foo {
5635 	 *         struct bar br;
5636 	 *         struct baz bz;
5637 	 * };
5638 	 *
5639 	 * struct foo *v;
5640 	 * v = func();	      // PTR_TO_BTF_ID
5641 	 * val->foo = v;      // reg->off is zero, btf and btf_id match type
5642 	 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
5643 	 *                    // first member type of struct after comparison fails
5644 	 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5645 	 *                    // to match type
5646 	 *
5647 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5648 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
5649 	 * the struct to match type against first member of struct, i.e. reject
5650 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
5651 	 * strict mode to true for type match.
5652 	 */
5653 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5654 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
5655 				  kptr_field->type != BPF_KPTR_UNREF))
5656 		goto bad_type;
5657 	return 0;
5658 bad_type:
5659 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
5660 		reg_type_str(env, reg->type), reg_name);
5661 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
5662 	if (kptr_field->type == BPF_KPTR_UNREF)
5663 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
5664 			targ_name);
5665 	else
5666 		verbose(env, "\n");
5667 	return -EINVAL;
5668 }
5669 
in_sleepable(struct bpf_verifier_env * env)5670 static bool in_sleepable(struct bpf_verifier_env *env)
5671 {
5672 	return env->prog->sleepable ||
5673 	       (env->cur_state && env->cur_state->in_sleepable);
5674 }
5675 
5676 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
5677  * can dereference RCU protected pointers and result is PTR_TRUSTED.
5678  */
in_rcu_cs(struct bpf_verifier_env * env)5679 static bool in_rcu_cs(struct bpf_verifier_env *env)
5680 {
5681 	return env->cur_state->active_rcu_lock ||
5682 	       env->cur_state->active_locks ||
5683 	       !in_sleepable(env);
5684 }
5685 
5686 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
5687 BTF_SET_START(rcu_protected_types)
5688 #ifdef CONFIG_NET
BTF_ID(struct,prog_test_ref_kfunc)5689 BTF_ID(struct, prog_test_ref_kfunc)
5690 #endif
5691 #ifdef CONFIG_CGROUPS
5692 BTF_ID(struct, cgroup)
5693 #endif
5694 #ifdef CONFIG_BPF_JIT
5695 BTF_ID(struct, bpf_cpumask)
5696 #endif
5697 BTF_ID(struct, task_struct)
5698 #ifdef CONFIG_CRYPTO
5699 BTF_ID(struct, bpf_crypto_ctx)
5700 #endif
5701 BTF_SET_END(rcu_protected_types)
5702 
5703 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
5704 {
5705 	if (!btf_is_kernel(btf))
5706 		return true;
5707 	return btf_id_set_contains(&rcu_protected_types, btf_id);
5708 }
5709 
kptr_pointee_btf_record(struct btf_field * kptr_field)5710 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
5711 {
5712 	struct btf_struct_meta *meta;
5713 
5714 	if (btf_is_kernel(kptr_field->kptr.btf))
5715 		return NULL;
5716 
5717 	meta = btf_find_struct_meta(kptr_field->kptr.btf,
5718 				    kptr_field->kptr.btf_id);
5719 
5720 	return meta ? meta->record : NULL;
5721 }
5722 
rcu_safe_kptr(const struct btf_field * field)5723 static bool rcu_safe_kptr(const struct btf_field *field)
5724 {
5725 	const struct btf_field_kptr *kptr = &field->kptr;
5726 
5727 	return field->type == BPF_KPTR_PERCPU ||
5728 	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
5729 }
5730 
btf_ld_kptr_type(struct bpf_verifier_env * env,struct btf_field * kptr_field)5731 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
5732 {
5733 	struct btf_record *rec;
5734 	u32 ret;
5735 
5736 	ret = PTR_MAYBE_NULL;
5737 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
5738 		ret |= MEM_RCU;
5739 		if (kptr_field->type == BPF_KPTR_PERCPU)
5740 			ret |= MEM_PERCPU;
5741 		else if (!btf_is_kernel(kptr_field->kptr.btf))
5742 			ret |= MEM_ALLOC;
5743 
5744 		rec = kptr_pointee_btf_record(kptr_field);
5745 		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
5746 			ret |= NON_OWN_REF;
5747 	} else {
5748 		ret |= PTR_UNTRUSTED;
5749 	}
5750 
5751 	return ret;
5752 }
5753 
mark_uptr_ld_reg(struct bpf_verifier_env * env,u32 regno,struct btf_field * field)5754 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
5755 			    struct btf_field *field)
5756 {
5757 	struct bpf_reg_state *reg;
5758 	const struct btf_type *t;
5759 
5760 	t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
5761 	mark_reg_known_zero(env, cur_regs(env), regno);
5762 	reg = reg_state(env, regno);
5763 	reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
5764 	reg->mem_size = t->size;
5765 	reg->id = ++env->id_gen;
5766 
5767 	return 0;
5768 }
5769 
check_map_kptr_access(struct bpf_verifier_env * env,u32 regno,int value_regno,int insn_idx,struct btf_field * kptr_field)5770 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
5771 				 int value_regno, int insn_idx,
5772 				 struct btf_field *kptr_field)
5773 {
5774 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5775 	int class = BPF_CLASS(insn->code);
5776 	struct bpf_reg_state *val_reg;
5777 
5778 	/* Things we already checked for in check_map_access and caller:
5779 	 *  - Reject cases where variable offset may touch kptr
5780 	 *  - size of access (must be BPF_DW)
5781 	 *  - tnum_is_const(reg->var_off)
5782 	 *  - kptr_field->offset == off + reg->var_off.value
5783 	 */
5784 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
5785 	if (BPF_MODE(insn->code) != BPF_MEM) {
5786 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5787 		return -EACCES;
5788 	}
5789 
5790 	/* We only allow loading referenced kptr, since it will be marked as
5791 	 * untrusted, similar to unreferenced kptr.
5792 	 */
5793 	if (class != BPF_LDX &&
5794 	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
5795 		verbose(env, "store to referenced kptr disallowed\n");
5796 		return -EACCES;
5797 	}
5798 	if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
5799 		verbose(env, "store to uptr disallowed\n");
5800 		return -EACCES;
5801 	}
5802 
5803 	if (class == BPF_LDX) {
5804 		if (kptr_field->type == BPF_UPTR)
5805 			return mark_uptr_ld_reg(env, value_regno, kptr_field);
5806 
5807 		/* We can simply mark the value_regno receiving the pointer
5808 		 * value from map as PTR_TO_BTF_ID, with the correct type.
5809 		 */
5810 		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
5811 				kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
5812 	} else if (class == BPF_STX) {
5813 		val_reg = reg_state(env, value_regno);
5814 		if (!register_is_null(val_reg) &&
5815 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
5816 			return -EACCES;
5817 	} else if (class == BPF_ST) {
5818 		if (insn->imm) {
5819 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5820 				kptr_field->offset);
5821 			return -EACCES;
5822 		}
5823 	} else {
5824 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5825 		return -EACCES;
5826 	}
5827 	return 0;
5828 }
5829 
5830 /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed,enum bpf_access_src src)5831 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
5832 			    int off, int size, bool zero_size_allowed,
5833 			    enum bpf_access_src src)
5834 {
5835 	struct bpf_verifier_state *vstate = env->cur_state;
5836 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5837 	struct bpf_reg_state *reg = &state->regs[regno];
5838 	struct bpf_map *map = reg->map_ptr;
5839 	struct btf_record *rec;
5840 	int err, i;
5841 
5842 	err = check_mem_region_access(env, regno, off, size, map->value_size,
5843 				      zero_size_allowed);
5844 	if (err)
5845 		return err;
5846 
5847 	if (IS_ERR_OR_NULL(map->record))
5848 		return 0;
5849 	rec = map->record;
5850 	for (i = 0; i < rec->cnt; i++) {
5851 		struct btf_field *field = &rec->fields[i];
5852 		u32 p = field->offset;
5853 
5854 		/* If any part of a field  can be touched by load/store, reject
5855 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
5856 		 * it is sufficient to check x1 < y2 && y1 < x2.
5857 		 */
5858 		if (reg->smin_value + off < p + field->size &&
5859 		    p < reg->umax_value + off + size) {
5860 			switch (field->type) {
5861 			case BPF_KPTR_UNREF:
5862 			case BPF_KPTR_REF:
5863 			case BPF_KPTR_PERCPU:
5864 			case BPF_UPTR:
5865 				if (src != ACCESS_DIRECT) {
5866 					verbose(env, "%s cannot be accessed indirectly by helper\n",
5867 						btf_field_type_name(field->type));
5868 					return -EACCES;
5869 				}
5870 				if (!tnum_is_const(reg->var_off)) {
5871 					verbose(env, "%s access cannot have variable offset\n",
5872 						btf_field_type_name(field->type));
5873 					return -EACCES;
5874 				}
5875 				if (p != off + reg->var_off.value) {
5876 					verbose(env, "%s access misaligned expected=%u off=%llu\n",
5877 						btf_field_type_name(field->type),
5878 						p, off + reg->var_off.value);
5879 					return -EACCES;
5880 				}
5881 				if (size != bpf_size_to_bytes(BPF_DW)) {
5882 					verbose(env, "%s access size must be BPF_DW\n",
5883 						btf_field_type_name(field->type));
5884 					return -EACCES;
5885 				}
5886 				break;
5887 			default:
5888 				verbose(env, "%s cannot be accessed directly by load/store\n",
5889 					btf_field_type_name(field->type));
5890 				return -EACCES;
5891 			}
5892 		}
5893 	}
5894 	return 0;
5895 }
5896 
5897 #define MAX_PACKET_OFF 0xffff
5898 
may_access_direct_pkt_data(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_access_type t)5899 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
5900 				       const struct bpf_call_arg_meta *meta,
5901 				       enum bpf_access_type t)
5902 {
5903 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
5904 
5905 	switch (prog_type) {
5906 	/* Program types only with direct read access go here! */
5907 	case BPF_PROG_TYPE_LWT_IN:
5908 	case BPF_PROG_TYPE_LWT_OUT:
5909 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
5910 	case BPF_PROG_TYPE_SK_REUSEPORT:
5911 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
5912 	case BPF_PROG_TYPE_CGROUP_SKB:
5913 		if (t == BPF_WRITE)
5914 			return false;
5915 		fallthrough;
5916 
5917 	/* Program types with direct read + write access go here! */
5918 	case BPF_PROG_TYPE_SCHED_CLS:
5919 	case BPF_PROG_TYPE_SCHED_ACT:
5920 	case BPF_PROG_TYPE_XDP:
5921 	case BPF_PROG_TYPE_LWT_XMIT:
5922 	case BPF_PROG_TYPE_SK_SKB:
5923 	case BPF_PROG_TYPE_SK_MSG:
5924 		if (meta)
5925 			return meta->pkt_access;
5926 
5927 		env->seen_direct_write = true;
5928 		return true;
5929 
5930 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
5931 		if (t == BPF_WRITE)
5932 			env->seen_direct_write = true;
5933 
5934 		return true;
5935 
5936 	default:
5937 		return false;
5938 	}
5939 }
5940 
check_packet_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed)5941 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
5942 			       int size, bool zero_size_allowed)
5943 {
5944 	struct bpf_reg_state *regs = cur_regs(env);
5945 	struct bpf_reg_state *reg = &regs[regno];
5946 	int err;
5947 
5948 	/* We may have added a variable offset to the packet pointer; but any
5949 	 * reg->range we have comes after that.  We are only checking the fixed
5950 	 * offset.
5951 	 */
5952 
5953 	/* We don't allow negative numbers, because we aren't tracking enough
5954 	 * detail to prove they're safe.
5955 	 */
5956 	if (reg->smin_value < 0) {
5957 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5958 			regno);
5959 		return -EACCES;
5960 	}
5961 
5962 	err = reg->range < 0 ? -EINVAL :
5963 	      __check_mem_access(env, regno, off, size, reg->range,
5964 				 zero_size_allowed);
5965 	if (err) {
5966 		verbose(env, "R%d offset is outside of the packet\n", regno);
5967 		return err;
5968 	}
5969 
5970 	/* __check_mem_access has made sure "off + size - 1" is within u16.
5971 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
5972 	 * otherwise find_good_pkt_pointers would have refused to set range info
5973 	 * that __check_mem_access would have rejected this pkt access.
5974 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
5975 	 */
5976 	env->prog->aux->max_pkt_offset =
5977 		max_t(u32, env->prog->aux->max_pkt_offset,
5978 		      off + reg->umax_value + size - 1);
5979 
5980 	return err;
5981 }
5982 
5983 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
check_ctx_access(struct bpf_verifier_env * env,int insn_idx,int off,int size,enum bpf_access_type t,enum bpf_reg_type * reg_type,struct btf ** btf,u32 * btf_id,bool * is_retval,bool is_ldsx)5984 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
5985 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
5986 			    struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx)
5987 {
5988 	struct bpf_insn_access_aux info = {
5989 		.reg_type = *reg_type,
5990 		.log = &env->log,
5991 		.is_retval = false,
5992 		.is_ldsx = is_ldsx,
5993 	};
5994 
5995 	if (env->ops->is_valid_access &&
5996 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
5997 		/* A non zero info.ctx_field_size indicates that this field is a
5998 		 * candidate for later verifier transformation to load the whole
5999 		 * field and then apply a mask when accessed with a narrower
6000 		 * access than actual ctx access size. A zero info.ctx_field_size
6001 		 * will only allow for whole field access and rejects any other
6002 		 * type of narrower access.
6003 		 */
6004 		*reg_type = info.reg_type;
6005 		*is_retval = info.is_retval;
6006 
6007 		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
6008 			*btf = info.btf;
6009 			*btf_id = info.btf_id;
6010 		} else {
6011 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
6012 		}
6013 		/* remember the offset of last byte accessed in ctx */
6014 		if (env->prog->aux->max_ctx_offset < off + size)
6015 			env->prog->aux->max_ctx_offset = off + size;
6016 		return 0;
6017 	}
6018 
6019 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
6020 	return -EACCES;
6021 }
6022 
check_flow_keys_access(struct bpf_verifier_env * env,int off,int size)6023 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
6024 				  int size)
6025 {
6026 	if (size < 0 || off < 0 ||
6027 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
6028 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
6029 			off, size);
6030 		return -EACCES;
6031 	}
6032 	return 0;
6033 }
6034 
check_sock_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int size,enum bpf_access_type t)6035 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
6036 			     u32 regno, int off, int size,
6037 			     enum bpf_access_type t)
6038 {
6039 	struct bpf_reg_state *regs = cur_regs(env);
6040 	struct bpf_reg_state *reg = &regs[regno];
6041 	struct bpf_insn_access_aux info = {};
6042 	bool valid;
6043 
6044 	if (reg->smin_value < 0) {
6045 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
6046 			regno);
6047 		return -EACCES;
6048 	}
6049 
6050 	switch (reg->type) {
6051 	case PTR_TO_SOCK_COMMON:
6052 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
6053 		break;
6054 	case PTR_TO_SOCKET:
6055 		valid = bpf_sock_is_valid_access(off, size, t, &info);
6056 		break;
6057 	case PTR_TO_TCP_SOCK:
6058 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
6059 		break;
6060 	case PTR_TO_XDP_SOCK:
6061 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
6062 		break;
6063 	default:
6064 		valid = false;
6065 	}
6066 
6067 
6068 	if (valid) {
6069 		env->insn_aux_data[insn_idx].ctx_field_size =
6070 			info.ctx_field_size;
6071 		return 0;
6072 	}
6073 
6074 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
6075 		regno, reg_type_str(env, reg->type), off, size);
6076 
6077 	return -EACCES;
6078 }
6079 
is_pointer_value(struct bpf_verifier_env * env,int regno)6080 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
6081 {
6082 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
6083 }
6084 
is_ctx_reg(struct bpf_verifier_env * env,int regno)6085 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
6086 {
6087 	const struct bpf_reg_state *reg = reg_state(env, regno);
6088 
6089 	return reg->type == PTR_TO_CTX;
6090 }
6091 
is_sk_reg(struct bpf_verifier_env * env,int regno)6092 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
6093 {
6094 	const struct bpf_reg_state *reg = reg_state(env, regno);
6095 
6096 	return type_is_sk_pointer(reg->type);
6097 }
6098 
is_pkt_reg(struct bpf_verifier_env * env,int regno)6099 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
6100 {
6101 	const struct bpf_reg_state *reg = reg_state(env, regno);
6102 
6103 	return type_is_pkt_pointer(reg->type);
6104 }
6105 
is_flow_key_reg(struct bpf_verifier_env * env,int regno)6106 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
6107 {
6108 	const struct bpf_reg_state *reg = reg_state(env, regno);
6109 
6110 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
6111 	return reg->type == PTR_TO_FLOW_KEYS;
6112 }
6113 
is_arena_reg(struct bpf_verifier_env * env,int regno)6114 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
6115 {
6116 	const struct bpf_reg_state *reg = reg_state(env, regno);
6117 
6118 	return reg->type == PTR_TO_ARENA;
6119 }
6120 
6121 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
6122 #ifdef CONFIG_NET
6123 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
6124 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
6125 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
6126 #endif
6127 	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
6128 };
6129 
is_trusted_reg(const struct bpf_reg_state * reg)6130 static bool is_trusted_reg(const struct bpf_reg_state *reg)
6131 {
6132 	/* A referenced register is always trusted. */
6133 	if (reg->ref_obj_id)
6134 		return true;
6135 
6136 	/* Types listed in the reg2btf_ids are always trusted */
6137 	if (reg2btf_ids[base_type(reg->type)] &&
6138 	    !bpf_type_has_unsafe_modifiers(reg->type))
6139 		return true;
6140 
6141 	/* If a register is not referenced, it is trusted if it has the
6142 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
6143 	 * other type modifiers may be safe, but we elect to take an opt-in
6144 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
6145 	 * not.
6146 	 *
6147 	 * Eventually, we should make PTR_TRUSTED the single source of truth
6148 	 * for whether a register is trusted.
6149 	 */
6150 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
6151 	       !bpf_type_has_unsafe_modifiers(reg->type);
6152 }
6153 
is_rcu_reg(const struct bpf_reg_state * reg)6154 static bool is_rcu_reg(const struct bpf_reg_state *reg)
6155 {
6156 	return reg->type & MEM_RCU;
6157 }
6158 
clear_trusted_flags(enum bpf_type_flag * flag)6159 static void clear_trusted_flags(enum bpf_type_flag *flag)
6160 {
6161 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
6162 }
6163 
check_pkt_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict)6164 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
6165 				   const struct bpf_reg_state *reg,
6166 				   int off, int size, bool strict)
6167 {
6168 	struct tnum reg_off;
6169 	int ip_align;
6170 
6171 	/* Byte size accesses are always allowed. */
6172 	if (!strict || size == 1)
6173 		return 0;
6174 
6175 	/* For platforms that do not have a Kconfig enabling
6176 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
6177 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
6178 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
6179 	 * to this code only in strict mode where we want to emulate
6180 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
6181 	 * unconditional IP align value of '2'.
6182 	 */
6183 	ip_align = 2;
6184 
6185 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
6186 	if (!tnum_is_aligned(reg_off, size)) {
6187 		char tn_buf[48];
6188 
6189 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6190 		verbose(env,
6191 			"misaligned packet access off %d+%s+%d+%d size %d\n",
6192 			ip_align, tn_buf, reg->off, off, size);
6193 		return -EACCES;
6194 	}
6195 
6196 	return 0;
6197 }
6198 
check_generic_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,const char * pointer_desc,int off,int size,bool strict)6199 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
6200 				       const struct bpf_reg_state *reg,
6201 				       const char *pointer_desc,
6202 				       int off, int size, bool strict)
6203 {
6204 	struct tnum reg_off;
6205 
6206 	/* Byte size accesses are always allowed. */
6207 	if (!strict || size == 1)
6208 		return 0;
6209 
6210 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
6211 	if (!tnum_is_aligned(reg_off, size)) {
6212 		char tn_buf[48];
6213 
6214 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6215 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
6216 			pointer_desc, tn_buf, reg->off, off, size);
6217 		return -EACCES;
6218 	}
6219 
6220 	return 0;
6221 }
6222 
check_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict_alignment_once)6223 static int check_ptr_alignment(struct bpf_verifier_env *env,
6224 			       const struct bpf_reg_state *reg, int off,
6225 			       int size, bool strict_alignment_once)
6226 {
6227 	bool strict = env->strict_alignment || strict_alignment_once;
6228 	const char *pointer_desc = "";
6229 
6230 	switch (reg->type) {
6231 	case PTR_TO_PACKET:
6232 	case PTR_TO_PACKET_META:
6233 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
6234 		 * right in front, treat it the very same way.
6235 		 */
6236 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
6237 	case PTR_TO_FLOW_KEYS:
6238 		pointer_desc = "flow keys ";
6239 		break;
6240 	case PTR_TO_MAP_KEY:
6241 		pointer_desc = "key ";
6242 		break;
6243 	case PTR_TO_MAP_VALUE:
6244 		pointer_desc = "value ";
6245 		break;
6246 	case PTR_TO_CTX:
6247 		pointer_desc = "context ";
6248 		break;
6249 	case PTR_TO_STACK:
6250 		pointer_desc = "stack ";
6251 		/* The stack spill tracking logic in check_stack_write_fixed_off()
6252 		 * and check_stack_read_fixed_off() relies on stack accesses being
6253 		 * aligned.
6254 		 */
6255 		strict = true;
6256 		break;
6257 	case PTR_TO_SOCKET:
6258 		pointer_desc = "sock ";
6259 		break;
6260 	case PTR_TO_SOCK_COMMON:
6261 		pointer_desc = "sock_common ";
6262 		break;
6263 	case PTR_TO_TCP_SOCK:
6264 		pointer_desc = "tcp_sock ";
6265 		break;
6266 	case PTR_TO_XDP_SOCK:
6267 		pointer_desc = "xdp_sock ";
6268 		break;
6269 	case PTR_TO_ARENA:
6270 		return 0;
6271 	default:
6272 		break;
6273 	}
6274 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
6275 					   strict);
6276 }
6277 
bpf_enable_priv_stack(struct bpf_prog * prog)6278 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
6279 {
6280 	if (!bpf_jit_supports_private_stack())
6281 		return NO_PRIV_STACK;
6282 
6283 	/* bpf_prog_check_recur() checks all prog types that use bpf trampoline
6284 	 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
6285 	 * explicitly.
6286 	 */
6287 	switch (prog->type) {
6288 	case BPF_PROG_TYPE_KPROBE:
6289 	case BPF_PROG_TYPE_TRACEPOINT:
6290 	case BPF_PROG_TYPE_PERF_EVENT:
6291 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
6292 		return PRIV_STACK_ADAPTIVE;
6293 	case BPF_PROG_TYPE_TRACING:
6294 	case BPF_PROG_TYPE_LSM:
6295 	case BPF_PROG_TYPE_STRUCT_OPS:
6296 		if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
6297 			return PRIV_STACK_ADAPTIVE;
6298 		fallthrough;
6299 	default:
6300 		break;
6301 	}
6302 
6303 	return NO_PRIV_STACK;
6304 }
6305 
round_up_stack_depth(struct bpf_verifier_env * env,int stack_depth)6306 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
6307 {
6308 	if (env->prog->jit_requested)
6309 		return round_up(stack_depth, 16);
6310 
6311 	/* round up to 32-bytes, since this is granularity
6312 	 * of interpreter stack size
6313 	 */
6314 	return round_up(max_t(u32, stack_depth, 1), 32);
6315 }
6316 
6317 /* starting from main bpf function walk all instructions of the function
6318  * and recursively walk all callees that given function can call.
6319  * Ignore jump and exit insns.
6320  * Since recursion is prevented by check_cfg() this algorithm
6321  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
6322  */
check_max_stack_depth_subprog(struct bpf_verifier_env * env,int idx,bool priv_stack_supported)6323 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
6324 					 bool priv_stack_supported)
6325 {
6326 	struct bpf_subprog_info *subprog = env->subprog_info;
6327 	struct bpf_insn *insn = env->prog->insnsi;
6328 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
6329 	bool tail_call_reachable = false;
6330 	int ret_insn[MAX_CALL_FRAMES];
6331 	int ret_prog[MAX_CALL_FRAMES];
6332 	int j;
6333 
6334 	i = subprog[idx].start;
6335 	if (!priv_stack_supported)
6336 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
6337 process_func:
6338 	/* protect against potential stack overflow that might happen when
6339 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
6340 	 * depth for such case down to 256 so that the worst case scenario
6341 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
6342 	 * 8k).
6343 	 *
6344 	 * To get the idea what might happen, see an example:
6345 	 * func1 -> sub rsp, 128
6346 	 *  subfunc1 -> sub rsp, 256
6347 	 *  tailcall1 -> add rsp, 256
6348 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
6349 	 *   subfunc2 -> sub rsp, 64
6350 	 *   subfunc22 -> sub rsp, 128
6351 	 *   tailcall2 -> add rsp, 128
6352 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
6353 	 *
6354 	 * tailcall will unwind the current stack frame but it will not get rid
6355 	 * of caller's stack as shown on the example above.
6356 	 */
6357 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
6358 		verbose(env,
6359 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
6360 			depth);
6361 		return -EACCES;
6362 	}
6363 
6364 	subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
6365 	if (priv_stack_supported) {
6366 		/* Request private stack support only if the subprog stack
6367 		 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
6368 		 * avoid jit penalty if the stack usage is small.
6369 		 */
6370 		if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
6371 		    subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
6372 			subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
6373 	}
6374 
6375 	if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
6376 		if (subprog_depth > MAX_BPF_STACK) {
6377 			verbose(env, "stack size of subprog %d is %d. Too large\n",
6378 				idx, subprog_depth);
6379 			return -EACCES;
6380 		}
6381 	} else {
6382 		depth += subprog_depth;
6383 		if (depth > MAX_BPF_STACK) {
6384 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
6385 				frame + 1, depth);
6386 			return -EACCES;
6387 		}
6388 	}
6389 continue_func:
6390 	subprog_end = subprog[idx + 1].start;
6391 	for (; i < subprog_end; i++) {
6392 		int next_insn, sidx;
6393 
6394 		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
6395 			bool err = false;
6396 
6397 			if (!is_bpf_throw_kfunc(insn + i))
6398 				continue;
6399 			if (subprog[idx].is_cb)
6400 				err = true;
6401 			for (int c = 0; c < frame && !err; c++) {
6402 				if (subprog[ret_prog[c]].is_cb) {
6403 					err = true;
6404 					break;
6405 				}
6406 			}
6407 			if (!err)
6408 				continue;
6409 			verbose(env,
6410 				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
6411 				i, idx);
6412 			return -EINVAL;
6413 		}
6414 
6415 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
6416 			continue;
6417 		/* remember insn and function to return to */
6418 		ret_insn[frame] = i + 1;
6419 		ret_prog[frame] = idx;
6420 
6421 		/* find the callee */
6422 		next_insn = i + insn[i].imm + 1;
6423 		sidx = find_subprog(env, next_insn);
6424 		if (sidx < 0) {
6425 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6426 				  next_insn);
6427 			return -EFAULT;
6428 		}
6429 		if (subprog[sidx].is_async_cb) {
6430 			if (subprog[sidx].has_tail_call) {
6431 				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
6432 				return -EFAULT;
6433 			}
6434 			/* async callbacks don't increase bpf prog stack size unless called directly */
6435 			if (!bpf_pseudo_call(insn + i))
6436 				continue;
6437 			if (subprog[sidx].is_exception_cb) {
6438 				verbose(env, "insn %d cannot call exception cb directly\n", i);
6439 				return -EINVAL;
6440 			}
6441 		}
6442 		i = next_insn;
6443 		idx = sidx;
6444 		if (!priv_stack_supported)
6445 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
6446 
6447 		if (subprog[idx].has_tail_call)
6448 			tail_call_reachable = true;
6449 
6450 		frame++;
6451 		if (frame >= MAX_CALL_FRAMES) {
6452 			verbose(env, "the call stack of %d frames is too deep !\n",
6453 				frame);
6454 			return -E2BIG;
6455 		}
6456 		goto process_func;
6457 	}
6458 	/* if tail call got detected across bpf2bpf calls then mark each of the
6459 	 * currently present subprog frames as tail call reachable subprogs;
6460 	 * this info will be utilized by JIT so that we will be preserving the
6461 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
6462 	 */
6463 	if (tail_call_reachable)
6464 		for (j = 0; j < frame; j++) {
6465 			if (subprog[ret_prog[j]].is_exception_cb) {
6466 				verbose(env, "cannot tail call within exception cb\n");
6467 				return -EINVAL;
6468 			}
6469 			subprog[ret_prog[j]].tail_call_reachable = true;
6470 		}
6471 	if (subprog[0].tail_call_reachable)
6472 		env->prog->aux->tail_call_reachable = true;
6473 
6474 	/* end of for() loop means the last insn of the 'subprog'
6475 	 * was reached. Doesn't matter whether it was JA or EXIT
6476 	 */
6477 	if (frame == 0)
6478 		return 0;
6479 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
6480 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
6481 	frame--;
6482 	i = ret_insn[frame];
6483 	idx = ret_prog[frame];
6484 	goto continue_func;
6485 }
6486 
check_max_stack_depth(struct bpf_verifier_env * env)6487 static int check_max_stack_depth(struct bpf_verifier_env *env)
6488 {
6489 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
6490 	struct bpf_subprog_info *si = env->subprog_info;
6491 	bool priv_stack_supported;
6492 	int ret;
6493 
6494 	for (int i = 0; i < env->subprog_cnt; i++) {
6495 		if (si[i].has_tail_call) {
6496 			priv_stack_mode = NO_PRIV_STACK;
6497 			break;
6498 		}
6499 	}
6500 
6501 	if (priv_stack_mode == PRIV_STACK_UNKNOWN)
6502 		priv_stack_mode = bpf_enable_priv_stack(env->prog);
6503 
6504 	/* All async_cb subprogs use normal kernel stack. If a particular
6505 	 * subprog appears in both main prog and async_cb subtree, that
6506 	 * subprog will use normal kernel stack to avoid potential nesting.
6507 	 * The reverse subprog traversal ensures when main prog subtree is
6508 	 * checked, the subprogs appearing in async_cb subtrees are already
6509 	 * marked as using normal kernel stack, so stack size checking can
6510 	 * be done properly.
6511 	 */
6512 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
6513 		if (!i || si[i].is_async_cb) {
6514 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
6515 			ret = check_max_stack_depth_subprog(env, i, priv_stack_supported);
6516 			if (ret < 0)
6517 				return ret;
6518 		}
6519 	}
6520 
6521 	for (int i = 0; i < env->subprog_cnt; i++) {
6522 		if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
6523 			env->prog->aux->jits_use_priv_stack = true;
6524 			break;
6525 		}
6526 	}
6527 
6528 	return 0;
6529 }
6530 
6531 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env * env,const struct bpf_insn * insn,int idx)6532 static int get_callee_stack_depth(struct bpf_verifier_env *env,
6533 				  const struct bpf_insn *insn, int idx)
6534 {
6535 	int start = idx + insn->imm + 1, subprog;
6536 
6537 	subprog = find_subprog(env, start);
6538 	if (subprog < 0) {
6539 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6540 			  start);
6541 		return -EFAULT;
6542 	}
6543 	return env->subprog_info[subprog].stack_depth;
6544 }
6545 #endif
6546 
__check_buffer_access(struct bpf_verifier_env * env,const char * buf_info,const struct bpf_reg_state * reg,int regno,int off,int size)6547 static int __check_buffer_access(struct bpf_verifier_env *env,
6548 				 const char *buf_info,
6549 				 const struct bpf_reg_state *reg,
6550 				 int regno, int off, int size)
6551 {
6552 	if (off < 0) {
6553 		verbose(env,
6554 			"R%d invalid %s buffer access: off=%d, size=%d\n",
6555 			regno, buf_info, off, size);
6556 		return -EACCES;
6557 	}
6558 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6559 		char tn_buf[48];
6560 
6561 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6562 		verbose(env,
6563 			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
6564 			regno, off, tn_buf);
6565 		return -EACCES;
6566 	}
6567 
6568 	return 0;
6569 }
6570 
check_tp_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size)6571 static int check_tp_buffer_access(struct bpf_verifier_env *env,
6572 				  const struct bpf_reg_state *reg,
6573 				  int regno, int off, int size)
6574 {
6575 	int err;
6576 
6577 	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
6578 	if (err)
6579 		return err;
6580 
6581 	if (off + size > env->prog->aux->max_tp_access)
6582 		env->prog->aux->max_tp_access = off + size;
6583 
6584 	return 0;
6585 }
6586 
check_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size,bool zero_size_allowed,u32 * max_access)6587 static int check_buffer_access(struct bpf_verifier_env *env,
6588 			       const struct bpf_reg_state *reg,
6589 			       int regno, int off, int size,
6590 			       bool zero_size_allowed,
6591 			       u32 *max_access)
6592 {
6593 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
6594 	int err;
6595 
6596 	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
6597 	if (err)
6598 		return err;
6599 
6600 	if (off + size > *max_access)
6601 		*max_access = off + size;
6602 
6603 	return 0;
6604 }
6605 
6606 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state * reg)6607 static void zext_32_to_64(struct bpf_reg_state *reg)
6608 {
6609 	reg->var_off = tnum_subreg(reg->var_off);
6610 	__reg_assign_32_into_64(reg);
6611 }
6612 
6613 /* truncate register to smaller size (in bytes)
6614  * must be called with size < BPF_REG_SIZE
6615  */
coerce_reg_to_size(struct bpf_reg_state * reg,int size)6616 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
6617 {
6618 	u64 mask;
6619 
6620 	/* clear high bits in bit representation */
6621 	reg->var_off = tnum_cast(reg->var_off, size);
6622 
6623 	/* fix arithmetic bounds */
6624 	mask = ((u64)1 << (size * 8)) - 1;
6625 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
6626 		reg->umin_value &= mask;
6627 		reg->umax_value &= mask;
6628 	} else {
6629 		reg->umin_value = 0;
6630 		reg->umax_value = mask;
6631 	}
6632 	reg->smin_value = reg->umin_value;
6633 	reg->smax_value = reg->umax_value;
6634 
6635 	/* If size is smaller than 32bit register the 32bit register
6636 	 * values are also truncated so we push 64-bit bounds into
6637 	 * 32-bit bounds. Above were truncated < 32-bits already.
6638 	 */
6639 	if (size < 4)
6640 		__mark_reg32_unbounded(reg);
6641 
6642 	reg_bounds_sync(reg);
6643 }
6644 
set_sext64_default_val(struct bpf_reg_state * reg,int size)6645 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
6646 {
6647 	if (size == 1) {
6648 		reg->smin_value = reg->s32_min_value = S8_MIN;
6649 		reg->smax_value = reg->s32_max_value = S8_MAX;
6650 	} else if (size == 2) {
6651 		reg->smin_value = reg->s32_min_value = S16_MIN;
6652 		reg->smax_value = reg->s32_max_value = S16_MAX;
6653 	} else {
6654 		/* size == 4 */
6655 		reg->smin_value = reg->s32_min_value = S32_MIN;
6656 		reg->smax_value = reg->s32_max_value = S32_MAX;
6657 	}
6658 	reg->umin_value = reg->u32_min_value = 0;
6659 	reg->umax_value = U64_MAX;
6660 	reg->u32_max_value = U32_MAX;
6661 	reg->var_off = tnum_unknown;
6662 }
6663 
coerce_reg_to_size_sx(struct bpf_reg_state * reg,int size)6664 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
6665 {
6666 	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
6667 	u64 top_smax_value, top_smin_value;
6668 	u64 num_bits = size * 8;
6669 
6670 	if (tnum_is_const(reg->var_off)) {
6671 		u64_cval = reg->var_off.value;
6672 		if (size == 1)
6673 			reg->var_off = tnum_const((s8)u64_cval);
6674 		else if (size == 2)
6675 			reg->var_off = tnum_const((s16)u64_cval);
6676 		else
6677 			/* size == 4 */
6678 			reg->var_off = tnum_const((s32)u64_cval);
6679 
6680 		u64_cval = reg->var_off.value;
6681 		reg->smax_value = reg->smin_value = u64_cval;
6682 		reg->umax_value = reg->umin_value = u64_cval;
6683 		reg->s32_max_value = reg->s32_min_value = u64_cval;
6684 		reg->u32_max_value = reg->u32_min_value = u64_cval;
6685 		return;
6686 	}
6687 
6688 	top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
6689 	top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
6690 
6691 	if (top_smax_value != top_smin_value)
6692 		goto out;
6693 
6694 	/* find the s64_min and s64_min after sign extension */
6695 	if (size == 1) {
6696 		init_s64_max = (s8)reg->smax_value;
6697 		init_s64_min = (s8)reg->smin_value;
6698 	} else if (size == 2) {
6699 		init_s64_max = (s16)reg->smax_value;
6700 		init_s64_min = (s16)reg->smin_value;
6701 	} else {
6702 		init_s64_max = (s32)reg->smax_value;
6703 		init_s64_min = (s32)reg->smin_value;
6704 	}
6705 
6706 	s64_max = max(init_s64_max, init_s64_min);
6707 	s64_min = min(init_s64_max, init_s64_min);
6708 
6709 	/* both of s64_max/s64_min positive or negative */
6710 	if ((s64_max >= 0) == (s64_min >= 0)) {
6711 		reg->s32_min_value = reg->smin_value = s64_min;
6712 		reg->s32_max_value = reg->smax_value = s64_max;
6713 		reg->u32_min_value = reg->umin_value = s64_min;
6714 		reg->u32_max_value = reg->umax_value = s64_max;
6715 		reg->var_off = tnum_range(s64_min, s64_max);
6716 		return;
6717 	}
6718 
6719 out:
6720 	set_sext64_default_val(reg, size);
6721 }
6722 
set_sext32_default_val(struct bpf_reg_state * reg,int size)6723 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
6724 {
6725 	if (size == 1) {
6726 		reg->s32_min_value = S8_MIN;
6727 		reg->s32_max_value = S8_MAX;
6728 	} else {
6729 		/* size == 2 */
6730 		reg->s32_min_value = S16_MIN;
6731 		reg->s32_max_value = S16_MAX;
6732 	}
6733 	reg->u32_min_value = 0;
6734 	reg->u32_max_value = U32_MAX;
6735 	reg->var_off = tnum_subreg(tnum_unknown);
6736 }
6737 
coerce_subreg_to_size_sx(struct bpf_reg_state * reg,int size)6738 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
6739 {
6740 	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
6741 	u32 top_smax_value, top_smin_value;
6742 	u32 num_bits = size * 8;
6743 
6744 	if (tnum_is_const(reg->var_off)) {
6745 		u32_val = reg->var_off.value;
6746 		if (size == 1)
6747 			reg->var_off = tnum_const((s8)u32_val);
6748 		else
6749 			reg->var_off = tnum_const((s16)u32_val);
6750 
6751 		u32_val = reg->var_off.value;
6752 		reg->s32_min_value = reg->s32_max_value = u32_val;
6753 		reg->u32_min_value = reg->u32_max_value = u32_val;
6754 		return;
6755 	}
6756 
6757 	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
6758 	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
6759 
6760 	if (top_smax_value != top_smin_value)
6761 		goto out;
6762 
6763 	/* find the s32_min and s32_min after sign extension */
6764 	if (size == 1) {
6765 		init_s32_max = (s8)reg->s32_max_value;
6766 		init_s32_min = (s8)reg->s32_min_value;
6767 	} else {
6768 		/* size == 2 */
6769 		init_s32_max = (s16)reg->s32_max_value;
6770 		init_s32_min = (s16)reg->s32_min_value;
6771 	}
6772 	s32_max = max(init_s32_max, init_s32_min);
6773 	s32_min = min(init_s32_max, init_s32_min);
6774 
6775 	if ((s32_min >= 0) == (s32_max >= 0)) {
6776 		reg->s32_min_value = s32_min;
6777 		reg->s32_max_value = s32_max;
6778 		reg->u32_min_value = (u32)s32_min;
6779 		reg->u32_max_value = (u32)s32_max;
6780 		reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
6781 		return;
6782 	}
6783 
6784 out:
6785 	set_sext32_default_val(reg, size);
6786 }
6787 
bpf_map_is_rdonly(const struct bpf_map * map)6788 static bool bpf_map_is_rdonly(const struct bpf_map *map)
6789 {
6790 	/* A map is considered read-only if the following condition are true:
6791 	 *
6792 	 * 1) BPF program side cannot change any of the map content. The
6793 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
6794 	 *    and was set at map creation time.
6795 	 * 2) The map value(s) have been initialized from user space by a
6796 	 *    loader and then "frozen", such that no new map update/delete
6797 	 *    operations from syscall side are possible for the rest of
6798 	 *    the map's lifetime from that point onwards.
6799 	 * 3) Any parallel/pending map update/delete operations from syscall
6800 	 *    side have been completed. Only after that point, it's safe to
6801 	 *    assume that map value(s) are immutable.
6802 	 */
6803 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
6804 	       READ_ONCE(map->frozen) &&
6805 	       !bpf_map_write_active(map);
6806 }
6807 
bpf_map_direct_read(struct bpf_map * map,int off,int size,u64 * val,bool is_ldsx)6808 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
6809 			       bool is_ldsx)
6810 {
6811 	void *ptr;
6812 	u64 addr;
6813 	int err;
6814 
6815 	err = map->ops->map_direct_value_addr(map, &addr, off);
6816 	if (err)
6817 		return err;
6818 	ptr = (void *)(long)addr + off;
6819 
6820 	switch (size) {
6821 	case sizeof(u8):
6822 		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
6823 		break;
6824 	case sizeof(u16):
6825 		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
6826 		break;
6827 	case sizeof(u32):
6828 		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
6829 		break;
6830 	case sizeof(u64):
6831 		*val = *(u64 *)ptr;
6832 		break;
6833 	default:
6834 		return -EINVAL;
6835 	}
6836 	return 0;
6837 }
6838 
6839 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
6840 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
6841 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
6842 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
6843 
6844 /*
6845  * Allow list few fields as RCU trusted or full trusted.
6846  * This logic doesn't allow mix tagging and will be removed once GCC supports
6847  * btf_type_tag.
6848  */
6849 
6850 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
BTF_TYPE_SAFE_RCU(struct task_struct)6851 BTF_TYPE_SAFE_RCU(struct task_struct) {
6852 	const cpumask_t *cpus_ptr;
6853 	struct css_set __rcu *cgroups;
6854 	struct task_struct __rcu *real_parent;
6855 	struct task_struct *group_leader;
6856 };
6857 
BTF_TYPE_SAFE_RCU(struct cgroup)6858 BTF_TYPE_SAFE_RCU(struct cgroup) {
6859 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
6860 	struct kernfs_node *kn;
6861 };
6862 
BTF_TYPE_SAFE_RCU(struct css_set)6863 BTF_TYPE_SAFE_RCU(struct css_set) {
6864 	struct cgroup *dfl_cgrp;
6865 };
6866 
6867 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)6868 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
6869 	struct file __rcu *exe_file;
6870 };
6871 
6872 /* skb->sk, req->sk are not RCU protected, but we mark them as such
6873  * because bpf prog accessible sockets are SOCK_RCU_FREE.
6874  */
BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)6875 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
6876 	struct sock *sk;
6877 };
6878 
BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)6879 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
6880 	struct sock *sk;
6881 };
6882 
6883 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)6884 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
6885 	struct seq_file *seq;
6886 };
6887 
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)6888 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
6889 	struct bpf_iter_meta *meta;
6890 	struct task_struct *task;
6891 };
6892 
BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)6893 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
6894 	struct file *file;
6895 };
6896 
BTF_TYPE_SAFE_TRUSTED(struct file)6897 BTF_TYPE_SAFE_TRUSTED(struct file) {
6898 	struct inode *f_inode;
6899 };
6900 
BTF_TYPE_SAFE_TRUSTED(struct dentry)6901 BTF_TYPE_SAFE_TRUSTED(struct dentry) {
6902 	/* no negative dentry-s in places where bpf can see it */
6903 	struct inode *d_inode;
6904 };
6905 
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)6906 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
6907 	struct sock *sk;
6908 };
6909 
type_is_rcu(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)6910 static bool type_is_rcu(struct bpf_verifier_env *env,
6911 			struct bpf_reg_state *reg,
6912 			const char *field_name, u32 btf_id)
6913 {
6914 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
6915 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
6916 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
6917 
6918 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
6919 }
6920 
type_is_rcu_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)6921 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
6922 				struct bpf_reg_state *reg,
6923 				const char *field_name, u32 btf_id)
6924 {
6925 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
6926 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
6927 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
6928 
6929 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
6930 }
6931 
type_is_trusted(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)6932 static bool type_is_trusted(struct bpf_verifier_env *env,
6933 			    struct bpf_reg_state *reg,
6934 			    const char *field_name, u32 btf_id)
6935 {
6936 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
6937 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
6938 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
6939 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
6940 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
6941 
6942 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
6943 }
6944 
type_is_trusted_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)6945 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
6946 				    struct bpf_reg_state *reg,
6947 				    const char *field_name, u32 btf_id)
6948 {
6949 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
6950 
6951 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
6952 					  "__safe_trusted_or_null");
6953 }
6954 
check_ptr_to_btf_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)6955 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
6956 				   struct bpf_reg_state *regs,
6957 				   int regno, int off, int size,
6958 				   enum bpf_access_type atype,
6959 				   int value_regno)
6960 {
6961 	struct bpf_reg_state *reg = regs + regno;
6962 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
6963 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
6964 	const char *field_name = NULL;
6965 	enum bpf_type_flag flag = 0;
6966 	u32 btf_id = 0;
6967 	int ret;
6968 
6969 	if (!env->allow_ptr_leaks) {
6970 		verbose(env,
6971 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6972 			tname);
6973 		return -EPERM;
6974 	}
6975 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
6976 		verbose(env,
6977 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6978 			tname);
6979 		return -EINVAL;
6980 	}
6981 	if (off < 0) {
6982 		verbose(env,
6983 			"R%d is ptr_%s invalid negative access: off=%d\n",
6984 			regno, tname, off);
6985 		return -EACCES;
6986 	}
6987 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6988 		char tn_buf[48];
6989 
6990 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6991 		verbose(env,
6992 			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6993 			regno, tname, off, tn_buf);
6994 		return -EACCES;
6995 	}
6996 
6997 	if (reg->type & MEM_USER) {
6998 		verbose(env,
6999 			"R%d is ptr_%s access user memory: off=%d\n",
7000 			regno, tname, off);
7001 		return -EACCES;
7002 	}
7003 
7004 	if (reg->type & MEM_PERCPU) {
7005 		verbose(env,
7006 			"R%d is ptr_%s access percpu memory: off=%d\n",
7007 			regno, tname, off);
7008 		return -EACCES;
7009 	}
7010 
7011 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
7012 		if (!btf_is_kernel(reg->btf)) {
7013 			verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
7014 			return -EFAULT;
7015 		}
7016 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
7017 	} else {
7018 		/* Writes are permitted with default btf_struct_access for
7019 		 * program allocated objects (which always have ref_obj_id > 0),
7020 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
7021 		 */
7022 		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
7023 			verbose(env, "only read is supported\n");
7024 			return -EACCES;
7025 		}
7026 
7027 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
7028 		    !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
7029 			verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
7030 			return -EFAULT;
7031 		}
7032 
7033 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
7034 	}
7035 
7036 	if (ret < 0)
7037 		return ret;
7038 
7039 	if (ret != PTR_TO_BTF_ID) {
7040 		/* just mark; */
7041 
7042 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
7043 		/* If this is an untrusted pointer, all pointers formed by walking it
7044 		 * also inherit the untrusted flag.
7045 		 */
7046 		flag = PTR_UNTRUSTED;
7047 
7048 	} else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
7049 		/* By default any pointer obtained from walking a trusted pointer is no
7050 		 * longer trusted, unless the field being accessed has explicitly been
7051 		 * marked as inheriting its parent's state of trust (either full or RCU).
7052 		 * For example:
7053 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
7054 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
7055 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
7056 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
7057 		 *
7058 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
7059 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
7060 		 */
7061 		if (type_is_trusted(env, reg, field_name, btf_id)) {
7062 			flag |= PTR_TRUSTED;
7063 		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
7064 			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
7065 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
7066 			if (type_is_rcu(env, reg, field_name, btf_id)) {
7067 				/* ignore __rcu tag and mark it MEM_RCU */
7068 				flag |= MEM_RCU;
7069 			} else if (flag & MEM_RCU ||
7070 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
7071 				/* __rcu tagged pointers can be NULL */
7072 				flag |= MEM_RCU | PTR_MAYBE_NULL;
7073 
7074 				/* We always trust them */
7075 				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
7076 				    flag & PTR_UNTRUSTED)
7077 					flag &= ~PTR_UNTRUSTED;
7078 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
7079 				/* keep as-is */
7080 			} else {
7081 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
7082 				clear_trusted_flags(&flag);
7083 			}
7084 		} else {
7085 			/*
7086 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
7087 			 * aggressively mark as untrusted otherwise such
7088 			 * pointers will be plain PTR_TO_BTF_ID without flags
7089 			 * and will be allowed to be passed into helpers for
7090 			 * compat reasons.
7091 			 */
7092 			flag = PTR_UNTRUSTED;
7093 		}
7094 	} else {
7095 		/* Old compat. Deprecated */
7096 		clear_trusted_flags(&flag);
7097 	}
7098 
7099 	if (atype == BPF_READ && value_regno >= 0)
7100 		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
7101 
7102 	return 0;
7103 }
7104 
check_ptr_to_map_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)7105 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
7106 				   struct bpf_reg_state *regs,
7107 				   int regno, int off, int size,
7108 				   enum bpf_access_type atype,
7109 				   int value_regno)
7110 {
7111 	struct bpf_reg_state *reg = regs + regno;
7112 	struct bpf_map *map = reg->map_ptr;
7113 	struct bpf_reg_state map_reg;
7114 	enum bpf_type_flag flag = 0;
7115 	const struct btf_type *t;
7116 	const char *tname;
7117 	u32 btf_id;
7118 	int ret;
7119 
7120 	if (!btf_vmlinux) {
7121 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
7122 		return -ENOTSUPP;
7123 	}
7124 
7125 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
7126 		verbose(env, "map_ptr access not supported for map type %d\n",
7127 			map->map_type);
7128 		return -ENOTSUPP;
7129 	}
7130 
7131 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
7132 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
7133 
7134 	if (!env->allow_ptr_leaks) {
7135 		verbose(env,
7136 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
7137 			tname);
7138 		return -EPERM;
7139 	}
7140 
7141 	if (off < 0) {
7142 		verbose(env, "R%d is %s invalid negative access: off=%d\n",
7143 			regno, tname, off);
7144 		return -EACCES;
7145 	}
7146 
7147 	if (atype != BPF_READ) {
7148 		verbose(env, "only read from %s is supported\n", tname);
7149 		return -EACCES;
7150 	}
7151 
7152 	/* Simulate access to a PTR_TO_BTF_ID */
7153 	memset(&map_reg, 0, sizeof(map_reg));
7154 	mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
7155 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
7156 	if (ret < 0)
7157 		return ret;
7158 
7159 	if (value_regno >= 0)
7160 		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
7161 
7162 	return 0;
7163 }
7164 
7165 /* Check that the stack access at the given offset is within bounds. The
7166  * maximum valid offset is -1.
7167  *
7168  * The minimum valid offset is -MAX_BPF_STACK for writes, and
7169  * -state->allocated_stack for reads.
7170  */
check_stack_slot_within_bounds(struct bpf_verifier_env * env,s64 off,struct bpf_func_state * state,enum bpf_access_type t)7171 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
7172                                           s64 off,
7173                                           struct bpf_func_state *state,
7174                                           enum bpf_access_type t)
7175 {
7176 	int min_valid_off;
7177 
7178 	if (t == BPF_WRITE || env->allow_uninit_stack)
7179 		min_valid_off = -MAX_BPF_STACK;
7180 	else
7181 		min_valid_off = -state->allocated_stack;
7182 
7183 	if (off < min_valid_off || off > -1)
7184 		return -EACCES;
7185 	return 0;
7186 }
7187 
7188 /* Check that the stack access at 'regno + off' falls within the maximum stack
7189  * bounds.
7190  *
7191  * 'off' includes `regno->offset`, but not its dynamic part (if any).
7192  */
check_stack_access_within_bounds(struct bpf_verifier_env * env,int regno,int off,int access_size,enum bpf_access_type type)7193 static int check_stack_access_within_bounds(
7194 		struct bpf_verifier_env *env,
7195 		int regno, int off, int access_size,
7196 		enum bpf_access_type type)
7197 {
7198 	struct bpf_reg_state *regs = cur_regs(env);
7199 	struct bpf_reg_state *reg = regs + regno;
7200 	struct bpf_func_state *state = func(env, reg);
7201 	s64 min_off, max_off;
7202 	int err;
7203 	char *err_extra;
7204 
7205 	if (type == BPF_READ)
7206 		err_extra = " read from";
7207 	else
7208 		err_extra = " write to";
7209 
7210 	if (tnum_is_const(reg->var_off)) {
7211 		min_off = (s64)reg->var_off.value + off;
7212 		max_off = min_off + access_size;
7213 	} else {
7214 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
7215 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
7216 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
7217 				err_extra, regno);
7218 			return -EACCES;
7219 		}
7220 		min_off = reg->smin_value + off;
7221 		max_off = reg->smax_value + off + access_size;
7222 	}
7223 
7224 	err = check_stack_slot_within_bounds(env, min_off, state, type);
7225 	if (!err && max_off > 0)
7226 		err = -EINVAL; /* out of stack access into non-negative offsets */
7227 	if (!err && access_size < 0)
7228 		/* access_size should not be negative (or overflow an int); others checks
7229 		 * along the way should have prevented such an access.
7230 		 */
7231 		err = -EFAULT; /* invalid negative access size; integer overflow? */
7232 
7233 	if (err) {
7234 		if (tnum_is_const(reg->var_off)) {
7235 			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
7236 				err_extra, regno, off, access_size);
7237 		} else {
7238 			char tn_buf[48];
7239 
7240 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7241 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
7242 				err_extra, regno, tn_buf, off, access_size);
7243 		}
7244 		return err;
7245 	}
7246 
7247 	/* Note that there is no stack access with offset zero, so the needed stack
7248 	 * size is -min_off, not -min_off+1.
7249 	 */
7250 	return grow_stack_state(env, state, -min_off /* size */);
7251 }
7252 
get_func_retval_range(struct bpf_prog * prog,struct bpf_retval_range * range)7253 static bool get_func_retval_range(struct bpf_prog *prog,
7254 				  struct bpf_retval_range *range)
7255 {
7256 	if (prog->type == BPF_PROG_TYPE_LSM &&
7257 		prog->expected_attach_type == BPF_LSM_MAC &&
7258 		!bpf_lsm_get_retval_range(prog, range)) {
7259 		return true;
7260 	}
7261 	return false;
7262 }
7263 
7264 /* check whether memory at (regno + off) is accessible for t = (read | write)
7265  * if t==write, value_regno is a register which value is stored into memory
7266  * if t==read, value_regno is a register which will receive the value from memory
7267  * if t==write && value_regno==-1, some unknown value is stored into memory
7268  * if t==read && value_regno==-1, don't care what we read from memory
7269  */
check_mem_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int bpf_size,enum bpf_access_type t,int value_regno,bool strict_alignment_once,bool is_ldsx)7270 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
7271 			    int off, int bpf_size, enum bpf_access_type t,
7272 			    int value_regno, bool strict_alignment_once, bool is_ldsx)
7273 {
7274 	struct bpf_reg_state *regs = cur_regs(env);
7275 	struct bpf_reg_state *reg = regs + regno;
7276 	int size, err = 0;
7277 
7278 	size = bpf_size_to_bytes(bpf_size);
7279 	if (size < 0)
7280 		return size;
7281 
7282 	/* alignment checks will add in reg->off themselves */
7283 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
7284 	if (err)
7285 		return err;
7286 
7287 	/* for access checks, reg->off is just part of off */
7288 	off += reg->off;
7289 
7290 	if (reg->type == PTR_TO_MAP_KEY) {
7291 		if (t == BPF_WRITE) {
7292 			verbose(env, "write to change key R%d not allowed\n", regno);
7293 			return -EACCES;
7294 		}
7295 
7296 		err = check_mem_region_access(env, regno, off, size,
7297 					      reg->map_ptr->key_size, false);
7298 		if (err)
7299 			return err;
7300 		if (value_regno >= 0)
7301 			mark_reg_unknown(env, regs, value_regno);
7302 	} else if (reg->type == PTR_TO_MAP_VALUE) {
7303 		struct btf_field *kptr_field = NULL;
7304 
7305 		if (t == BPF_WRITE && value_regno >= 0 &&
7306 		    is_pointer_value(env, value_regno)) {
7307 			verbose(env, "R%d leaks addr into map\n", value_regno);
7308 			return -EACCES;
7309 		}
7310 		err = check_map_access_type(env, regno, off, size, t);
7311 		if (err)
7312 			return err;
7313 		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
7314 		if (err)
7315 			return err;
7316 		if (tnum_is_const(reg->var_off))
7317 			kptr_field = btf_record_find(reg->map_ptr->record,
7318 						     off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
7319 		if (kptr_field) {
7320 			err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
7321 		} else if (t == BPF_READ && value_regno >= 0) {
7322 			struct bpf_map *map = reg->map_ptr;
7323 
7324 			/* if map is read-only, track its contents as scalars */
7325 			if (tnum_is_const(reg->var_off) &&
7326 			    bpf_map_is_rdonly(map) &&
7327 			    map->ops->map_direct_value_addr) {
7328 				int map_off = off + reg->var_off.value;
7329 				u64 val = 0;
7330 
7331 				err = bpf_map_direct_read(map, map_off, size,
7332 							  &val, is_ldsx);
7333 				if (err)
7334 					return err;
7335 
7336 				regs[value_regno].type = SCALAR_VALUE;
7337 				__mark_reg_known(&regs[value_regno], val);
7338 			} else {
7339 				mark_reg_unknown(env, regs, value_regno);
7340 			}
7341 		}
7342 	} else if (base_type(reg->type) == PTR_TO_MEM) {
7343 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
7344 
7345 		if (type_may_be_null(reg->type)) {
7346 			verbose(env, "R%d invalid mem access '%s'\n", regno,
7347 				reg_type_str(env, reg->type));
7348 			return -EACCES;
7349 		}
7350 
7351 		if (t == BPF_WRITE && rdonly_mem) {
7352 			verbose(env, "R%d cannot write into %s\n",
7353 				regno, reg_type_str(env, reg->type));
7354 			return -EACCES;
7355 		}
7356 
7357 		if (t == BPF_WRITE && value_regno >= 0 &&
7358 		    is_pointer_value(env, value_regno)) {
7359 			verbose(env, "R%d leaks addr into mem\n", value_regno);
7360 			return -EACCES;
7361 		}
7362 
7363 		err = check_mem_region_access(env, regno, off, size,
7364 					      reg->mem_size, false);
7365 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
7366 			mark_reg_unknown(env, regs, value_regno);
7367 	} else if (reg->type == PTR_TO_CTX) {
7368 		bool is_retval = false;
7369 		struct bpf_retval_range range;
7370 		enum bpf_reg_type reg_type = SCALAR_VALUE;
7371 		struct btf *btf = NULL;
7372 		u32 btf_id = 0;
7373 
7374 		if (t == BPF_WRITE && value_regno >= 0 &&
7375 		    is_pointer_value(env, value_regno)) {
7376 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
7377 			return -EACCES;
7378 		}
7379 
7380 		err = check_ptr_off_reg(env, reg, regno);
7381 		if (err < 0)
7382 			return err;
7383 
7384 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
7385 				       &btf_id, &is_retval, is_ldsx);
7386 		if (err)
7387 			verbose_linfo(env, insn_idx, "; ");
7388 		if (!err && t == BPF_READ && value_regno >= 0) {
7389 			/* ctx access returns either a scalar, or a
7390 			 * PTR_TO_PACKET[_META,_END]. In the latter
7391 			 * case, we know the offset is zero.
7392 			 */
7393 			if (reg_type == SCALAR_VALUE) {
7394 				if (is_retval && get_func_retval_range(env->prog, &range)) {
7395 					err = __mark_reg_s32_range(env, regs, value_regno,
7396 								   range.minval, range.maxval);
7397 					if (err)
7398 						return err;
7399 				} else {
7400 					mark_reg_unknown(env, regs, value_regno);
7401 				}
7402 			} else {
7403 				mark_reg_known_zero(env, regs,
7404 						    value_regno);
7405 				if (type_may_be_null(reg_type))
7406 					regs[value_regno].id = ++env->id_gen;
7407 				/* A load of ctx field could have different
7408 				 * actual load size with the one encoded in the
7409 				 * insn. When the dst is PTR, it is for sure not
7410 				 * a sub-register.
7411 				 */
7412 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
7413 				if (base_type(reg_type) == PTR_TO_BTF_ID) {
7414 					regs[value_regno].btf = btf;
7415 					regs[value_regno].btf_id = btf_id;
7416 				}
7417 			}
7418 			regs[value_regno].type = reg_type;
7419 		}
7420 
7421 	} else if (reg->type == PTR_TO_STACK) {
7422 		/* Basic bounds checks. */
7423 		err = check_stack_access_within_bounds(env, regno, off, size, t);
7424 		if (err)
7425 			return err;
7426 
7427 		if (t == BPF_READ)
7428 			err = check_stack_read(env, regno, off, size,
7429 					       value_regno);
7430 		else
7431 			err = check_stack_write(env, regno, off, size,
7432 						value_regno, insn_idx);
7433 	} else if (reg_is_pkt_pointer(reg)) {
7434 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
7435 			verbose(env, "cannot write into packet\n");
7436 			return -EACCES;
7437 		}
7438 		if (t == BPF_WRITE && value_regno >= 0 &&
7439 		    is_pointer_value(env, value_regno)) {
7440 			verbose(env, "R%d leaks addr into packet\n",
7441 				value_regno);
7442 			return -EACCES;
7443 		}
7444 		err = check_packet_access(env, regno, off, size, false);
7445 		if (!err && t == BPF_READ && value_regno >= 0)
7446 			mark_reg_unknown(env, regs, value_regno);
7447 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
7448 		if (t == BPF_WRITE && value_regno >= 0 &&
7449 		    is_pointer_value(env, value_regno)) {
7450 			verbose(env, "R%d leaks addr into flow keys\n",
7451 				value_regno);
7452 			return -EACCES;
7453 		}
7454 
7455 		err = check_flow_keys_access(env, off, size);
7456 		if (!err && t == BPF_READ && value_regno >= 0)
7457 			mark_reg_unknown(env, regs, value_regno);
7458 	} else if (type_is_sk_pointer(reg->type)) {
7459 		if (t == BPF_WRITE) {
7460 			verbose(env, "R%d cannot write into %s\n",
7461 				regno, reg_type_str(env, reg->type));
7462 			return -EACCES;
7463 		}
7464 		err = check_sock_access(env, insn_idx, regno, off, size, t);
7465 		if (!err && value_regno >= 0)
7466 			mark_reg_unknown(env, regs, value_regno);
7467 	} else if (reg->type == PTR_TO_TP_BUFFER) {
7468 		err = check_tp_buffer_access(env, reg, regno, off, size);
7469 		if (!err && t == BPF_READ && value_regno >= 0)
7470 			mark_reg_unknown(env, regs, value_regno);
7471 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
7472 		   !type_may_be_null(reg->type)) {
7473 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
7474 					      value_regno);
7475 	} else if (reg->type == CONST_PTR_TO_MAP) {
7476 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
7477 					      value_regno);
7478 	} else if (base_type(reg->type) == PTR_TO_BUF) {
7479 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
7480 		u32 *max_access;
7481 
7482 		if (rdonly_mem) {
7483 			if (t == BPF_WRITE) {
7484 				verbose(env, "R%d cannot write into %s\n",
7485 					regno, reg_type_str(env, reg->type));
7486 				return -EACCES;
7487 			}
7488 			max_access = &env->prog->aux->max_rdonly_access;
7489 		} else {
7490 			max_access = &env->prog->aux->max_rdwr_access;
7491 		}
7492 
7493 		err = check_buffer_access(env, reg, regno, off, size, false,
7494 					  max_access);
7495 
7496 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
7497 			mark_reg_unknown(env, regs, value_regno);
7498 	} else if (reg->type == PTR_TO_ARENA) {
7499 		if (t == BPF_READ && value_regno >= 0)
7500 			mark_reg_unknown(env, regs, value_regno);
7501 	} else {
7502 		verbose(env, "R%d invalid mem access '%s'\n", regno,
7503 			reg_type_str(env, reg->type));
7504 		return -EACCES;
7505 	}
7506 
7507 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
7508 	    regs[value_regno].type == SCALAR_VALUE) {
7509 		if (!is_ldsx)
7510 			/* b/h/w load zero-extends, mark upper bits as known 0 */
7511 			coerce_reg_to_size(&regs[value_regno], size);
7512 		else
7513 			coerce_reg_to_size_sx(&regs[value_regno], size);
7514 	}
7515 	return err;
7516 }
7517 
7518 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
7519 			     bool allow_trust_mismatch);
7520 
check_atomic(struct bpf_verifier_env * env,int insn_idx,struct bpf_insn * insn)7521 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
7522 {
7523 	int load_reg;
7524 	int err;
7525 
7526 	switch (insn->imm) {
7527 	case BPF_ADD:
7528 	case BPF_ADD | BPF_FETCH:
7529 	case BPF_AND:
7530 	case BPF_AND | BPF_FETCH:
7531 	case BPF_OR:
7532 	case BPF_OR | BPF_FETCH:
7533 	case BPF_XOR:
7534 	case BPF_XOR | BPF_FETCH:
7535 	case BPF_XCHG:
7536 	case BPF_CMPXCHG:
7537 		break;
7538 	default:
7539 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
7540 		return -EINVAL;
7541 	}
7542 
7543 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
7544 		verbose(env, "invalid atomic operand size\n");
7545 		return -EINVAL;
7546 	}
7547 
7548 	/* check src1 operand */
7549 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
7550 	if (err)
7551 		return err;
7552 
7553 	/* check src2 operand */
7554 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7555 	if (err)
7556 		return err;
7557 
7558 	if (insn->imm == BPF_CMPXCHG) {
7559 		/* Check comparison of R0 with memory location */
7560 		const u32 aux_reg = BPF_REG_0;
7561 
7562 		err = check_reg_arg(env, aux_reg, SRC_OP);
7563 		if (err)
7564 			return err;
7565 
7566 		if (is_pointer_value(env, aux_reg)) {
7567 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
7568 			return -EACCES;
7569 		}
7570 	}
7571 
7572 	if (is_pointer_value(env, insn->src_reg)) {
7573 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
7574 		return -EACCES;
7575 	}
7576 
7577 	if (is_ctx_reg(env, insn->dst_reg) ||
7578 	    is_pkt_reg(env, insn->dst_reg) ||
7579 	    is_flow_key_reg(env, insn->dst_reg) ||
7580 	    is_sk_reg(env, insn->dst_reg) ||
7581 	    (is_arena_reg(env, insn->dst_reg) && !bpf_jit_supports_insn(insn, true))) {
7582 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
7583 			insn->dst_reg,
7584 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
7585 		return -EACCES;
7586 	}
7587 
7588 	if (insn->imm & BPF_FETCH) {
7589 		if (insn->imm == BPF_CMPXCHG)
7590 			load_reg = BPF_REG_0;
7591 		else
7592 			load_reg = insn->src_reg;
7593 
7594 		/* check and record load of old value */
7595 		err = check_reg_arg(env, load_reg, DST_OP);
7596 		if (err)
7597 			return err;
7598 	} else {
7599 		/* This instruction accesses a memory location but doesn't
7600 		 * actually load it into a register.
7601 		 */
7602 		load_reg = -1;
7603 	}
7604 
7605 	/* Check whether we can read the memory, with second call for fetch
7606 	 * case to simulate the register fill.
7607 	 */
7608 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7609 			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
7610 	if (!err && load_reg >= 0)
7611 		err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7612 				       BPF_SIZE(insn->code), BPF_READ, load_reg,
7613 				       true, false);
7614 	if (err)
7615 		return err;
7616 
7617 	if (is_arena_reg(env, insn->dst_reg)) {
7618 		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
7619 		if (err)
7620 			return err;
7621 	}
7622 	/* Check whether we can write into the same memory. */
7623 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7624 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
7625 	if (err)
7626 		return err;
7627 	return 0;
7628 }
7629 
7630 /* When register 'regno' is used to read the stack (either directly or through
7631  * a helper function) make sure that it's within stack boundary and, depending
7632  * on the access type and privileges, that all elements of the stack are
7633  * initialized.
7634  *
7635  * 'off' includes 'regno->off', but not its dynamic part (if any).
7636  *
7637  * All registers that have been spilled on the stack in the slots within the
7638  * read offsets are marked as read.
7639  */
check_stack_range_initialized(struct bpf_verifier_env * env,int regno,int off,int access_size,bool zero_size_allowed,enum bpf_access_type type,struct bpf_call_arg_meta * meta)7640 static int check_stack_range_initialized(
7641 		struct bpf_verifier_env *env, int regno, int off,
7642 		int access_size, bool zero_size_allowed,
7643 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
7644 {
7645 	struct bpf_reg_state *reg = reg_state(env, regno);
7646 	struct bpf_func_state *state = func(env, reg);
7647 	int err, min_off, max_off, i, j, slot, spi;
7648 	/* Some accesses can write anything into the stack, others are
7649 	 * read-only.
7650 	 */
7651 	bool clobber = false;
7652 
7653 	if (access_size == 0 && !zero_size_allowed) {
7654 		verbose(env, "invalid zero-sized read\n");
7655 		return -EACCES;
7656 	}
7657 
7658 	if (type == BPF_WRITE)
7659 		clobber = true;
7660 
7661 	err = check_stack_access_within_bounds(env, regno, off, access_size, type);
7662 	if (err)
7663 		return err;
7664 
7665 
7666 	if (tnum_is_const(reg->var_off)) {
7667 		min_off = max_off = reg->var_off.value + off;
7668 	} else {
7669 		/* Variable offset is prohibited for unprivileged mode for
7670 		 * simplicity since it requires corresponding support in
7671 		 * Spectre masking for stack ALU.
7672 		 * See also retrieve_ptr_limit().
7673 		 */
7674 		if (!env->bypass_spec_v1) {
7675 			char tn_buf[48];
7676 
7677 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7678 			verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
7679 				regno, tn_buf);
7680 			return -EACCES;
7681 		}
7682 		/* Only initialized buffer on stack is allowed to be accessed
7683 		 * with variable offset. With uninitialized buffer it's hard to
7684 		 * guarantee that whole memory is marked as initialized on
7685 		 * helper return since specific bounds are unknown what may
7686 		 * cause uninitialized stack leaking.
7687 		 */
7688 		if (meta && meta->raw_mode)
7689 			meta = NULL;
7690 
7691 		min_off = reg->smin_value + off;
7692 		max_off = reg->smax_value + off;
7693 	}
7694 
7695 	if (meta && meta->raw_mode) {
7696 		/* Ensure we won't be overwriting dynptrs when simulating byte
7697 		 * by byte access in check_helper_call using meta.access_size.
7698 		 * This would be a problem if we have a helper in the future
7699 		 * which takes:
7700 		 *
7701 		 *	helper(uninit_mem, len, dynptr)
7702 		 *
7703 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
7704 		 * may end up writing to dynptr itself when touching memory from
7705 		 * arg 1. This can be relaxed on a case by case basis for known
7706 		 * safe cases, but reject due to the possibilitiy of aliasing by
7707 		 * default.
7708 		 */
7709 		for (i = min_off; i < max_off + access_size; i++) {
7710 			int stack_off = -i - 1;
7711 
7712 			spi = __get_spi(i);
7713 			/* raw_mode may write past allocated_stack */
7714 			if (state->allocated_stack <= stack_off)
7715 				continue;
7716 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
7717 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
7718 				return -EACCES;
7719 			}
7720 		}
7721 		meta->access_size = access_size;
7722 		meta->regno = regno;
7723 		return 0;
7724 	}
7725 
7726 	for (i = min_off; i < max_off + access_size; i++) {
7727 		u8 *stype;
7728 
7729 		slot = -i - 1;
7730 		spi = slot / BPF_REG_SIZE;
7731 		if (state->allocated_stack <= slot) {
7732 			verbose(env, "verifier bug: allocated_stack too small\n");
7733 			return -EFAULT;
7734 		}
7735 
7736 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
7737 		if (*stype == STACK_MISC)
7738 			goto mark;
7739 		if ((*stype == STACK_ZERO) ||
7740 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
7741 			if (clobber) {
7742 				/* helper can write anything into the stack */
7743 				*stype = STACK_MISC;
7744 			}
7745 			goto mark;
7746 		}
7747 
7748 		if (is_spilled_reg(&state->stack[spi]) &&
7749 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
7750 		     env->allow_ptr_leaks)) {
7751 			if (clobber) {
7752 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
7753 				for (j = 0; j < BPF_REG_SIZE; j++)
7754 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
7755 			}
7756 			goto mark;
7757 		}
7758 
7759 		if (tnum_is_const(reg->var_off)) {
7760 			verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
7761 				regno, min_off, i - min_off, access_size);
7762 		} else {
7763 			char tn_buf[48];
7764 
7765 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7766 			verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n",
7767 				regno, tn_buf, i - min_off, access_size);
7768 		}
7769 		return -EACCES;
7770 mark:
7771 		/* reading any byte out of 8-byte 'spill_slot' will cause
7772 		 * the whole slot to be marked as 'read'
7773 		 */
7774 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
7775 			      state->stack[spi].spilled_ptr.parent,
7776 			      REG_LIVE_READ64);
7777 		/* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
7778 		 * be sure that whether stack slot is written to or not. Hence,
7779 		 * we must still conservatively propagate reads upwards even if
7780 		 * helper may write to the entire memory range.
7781 		 */
7782 	}
7783 	return 0;
7784 }
7785 
check_helper_mem_access(struct bpf_verifier_env * env,int regno,int access_size,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)7786 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
7787 				   int access_size, enum bpf_access_type access_type,
7788 				   bool zero_size_allowed,
7789 				   struct bpf_call_arg_meta *meta)
7790 {
7791 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7792 	u32 *max_access;
7793 
7794 	switch (base_type(reg->type)) {
7795 	case PTR_TO_PACKET:
7796 	case PTR_TO_PACKET_META:
7797 		return check_packet_access(env, regno, reg->off, access_size,
7798 					   zero_size_allowed);
7799 	case PTR_TO_MAP_KEY:
7800 		if (access_type == BPF_WRITE) {
7801 			verbose(env, "R%d cannot write into %s\n", regno,
7802 				reg_type_str(env, reg->type));
7803 			return -EACCES;
7804 		}
7805 		return check_mem_region_access(env, regno, reg->off, access_size,
7806 					       reg->map_ptr->key_size, false);
7807 	case PTR_TO_MAP_VALUE:
7808 		if (check_map_access_type(env, regno, reg->off, access_size, access_type))
7809 			return -EACCES;
7810 		return check_map_access(env, regno, reg->off, access_size,
7811 					zero_size_allowed, ACCESS_HELPER);
7812 	case PTR_TO_MEM:
7813 		if (type_is_rdonly_mem(reg->type)) {
7814 			if (access_type == BPF_WRITE) {
7815 				verbose(env, "R%d cannot write into %s\n", regno,
7816 					reg_type_str(env, reg->type));
7817 				return -EACCES;
7818 			}
7819 		}
7820 		return check_mem_region_access(env, regno, reg->off,
7821 					       access_size, reg->mem_size,
7822 					       zero_size_allowed);
7823 	case PTR_TO_BUF:
7824 		if (type_is_rdonly_mem(reg->type)) {
7825 			if (access_type == BPF_WRITE) {
7826 				verbose(env, "R%d cannot write into %s\n", regno,
7827 					reg_type_str(env, reg->type));
7828 				return -EACCES;
7829 			}
7830 
7831 			max_access = &env->prog->aux->max_rdonly_access;
7832 		} else {
7833 			max_access = &env->prog->aux->max_rdwr_access;
7834 		}
7835 		return check_buffer_access(env, reg, regno, reg->off,
7836 					   access_size, zero_size_allowed,
7837 					   max_access);
7838 	case PTR_TO_STACK:
7839 		return check_stack_range_initialized(
7840 				env,
7841 				regno, reg->off, access_size,
7842 				zero_size_allowed, access_type, meta);
7843 	case PTR_TO_BTF_ID:
7844 		return check_ptr_to_btf_access(env, regs, regno, reg->off,
7845 					       access_size, BPF_READ, -1);
7846 	case PTR_TO_CTX:
7847 		/* in case the function doesn't know how to access the context,
7848 		 * (because we are in a program of type SYSCALL for example), we
7849 		 * can not statically check its size.
7850 		 * Dynamically check it now.
7851 		 */
7852 		if (!env->ops->convert_ctx_access) {
7853 			int offset = access_size - 1;
7854 
7855 			/* Allow zero-byte read from PTR_TO_CTX */
7856 			if (access_size == 0)
7857 				return zero_size_allowed ? 0 : -EACCES;
7858 
7859 			return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
7860 						access_type, -1, false, false);
7861 		}
7862 
7863 		fallthrough;
7864 	default: /* scalar_value or invalid ptr */
7865 		/* Allow zero-byte read from NULL, regardless of pointer type */
7866 		if (zero_size_allowed && access_size == 0 &&
7867 		    register_is_null(reg))
7868 			return 0;
7869 
7870 		verbose(env, "R%d type=%s ", regno,
7871 			reg_type_str(env, reg->type));
7872 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
7873 		return -EACCES;
7874 	}
7875 }
7876 
7877 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
7878  * size.
7879  *
7880  * @regno is the register containing the access size. regno-1 is the register
7881  * containing the pointer.
7882  */
check_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)7883 static int check_mem_size_reg(struct bpf_verifier_env *env,
7884 			      struct bpf_reg_state *reg, u32 regno,
7885 			      enum bpf_access_type access_type,
7886 			      bool zero_size_allowed,
7887 			      struct bpf_call_arg_meta *meta)
7888 {
7889 	int err;
7890 
7891 	/* This is used to refine r0 return value bounds for helpers
7892 	 * that enforce this value as an upper bound on return values.
7893 	 * See do_refine_retval_range() for helpers that can refine
7894 	 * the return value. C type of helper is u32 so we pull register
7895 	 * bound from umax_value however, if negative verifier errors
7896 	 * out. Only upper bounds can be learned because retval is an
7897 	 * int type and negative retvals are allowed.
7898 	 */
7899 	meta->msize_max_value = reg->umax_value;
7900 
7901 	/* The register is SCALAR_VALUE; the access check happens using
7902 	 * its boundaries. For unprivileged variable accesses, disable
7903 	 * raw mode so that the program is required to initialize all
7904 	 * the memory that the helper could just partially fill up.
7905 	 */
7906 	if (!tnum_is_const(reg->var_off))
7907 		meta = NULL;
7908 
7909 	if (reg->smin_value < 0) {
7910 		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
7911 			regno);
7912 		return -EACCES;
7913 	}
7914 
7915 	if (reg->umin_value == 0 && !zero_size_allowed) {
7916 		verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
7917 			regno, reg->umin_value, reg->umax_value);
7918 		return -EACCES;
7919 	}
7920 
7921 	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7922 		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7923 			regno);
7924 		return -EACCES;
7925 	}
7926 	err = check_helper_mem_access(env, regno - 1, reg->umax_value,
7927 				      access_type, zero_size_allowed, meta);
7928 	if (!err)
7929 		err = mark_chain_precision(env, regno);
7930 	return err;
7931 }
7932 
check_mem_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,u32 mem_size)7933 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7934 			 u32 regno, u32 mem_size)
7935 {
7936 	bool may_be_null = type_may_be_null(reg->type);
7937 	struct bpf_reg_state saved_reg;
7938 	int err;
7939 
7940 	if (register_is_null(reg))
7941 		return 0;
7942 
7943 	/* Assuming that the register contains a value check if the memory
7944 	 * access is safe. Temporarily save and restore the register's state as
7945 	 * the conversion shouldn't be visible to a caller.
7946 	 */
7947 	if (may_be_null) {
7948 		saved_reg = *reg;
7949 		mark_ptr_not_null_reg(reg);
7950 	}
7951 
7952 	err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL);
7953 	err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL);
7954 
7955 	if (may_be_null)
7956 		*reg = saved_reg;
7957 
7958 	return err;
7959 }
7960 
check_kfunc_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)7961 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7962 				    u32 regno)
7963 {
7964 	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
7965 	bool may_be_null = type_may_be_null(mem_reg->type);
7966 	struct bpf_reg_state saved_reg;
7967 	struct bpf_call_arg_meta meta;
7968 	int err;
7969 
7970 	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
7971 
7972 	memset(&meta, 0, sizeof(meta));
7973 
7974 	if (may_be_null) {
7975 		saved_reg = *mem_reg;
7976 		mark_ptr_not_null_reg(mem_reg);
7977 	}
7978 
7979 	err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta);
7980 	err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta);
7981 
7982 	if (may_be_null)
7983 		*mem_reg = saved_reg;
7984 
7985 	return err;
7986 }
7987 
7988 /* Implementation details:
7989  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7990  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
7991  * Two bpf_map_lookups (even with the same key) will have different reg->id.
7992  * Two separate bpf_obj_new will also have different reg->id.
7993  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
7994  * clears reg->id after value_or_null->value transition, since the verifier only
7995  * cares about the range of access to valid map value pointer and doesn't care
7996  * about actual address of the map element.
7997  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7998  * reg->id > 0 after value_or_null->value transition. By doing so
7999  * two bpf_map_lookups will be considered two different pointers that
8000  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
8001  * returned from bpf_obj_new.
8002  * The verifier allows taking only one bpf_spin_lock at a time to avoid
8003  * dead-locks.
8004  * Since only one bpf_spin_lock is allowed the checks are simpler than
8005  * reg_is_refcounted() logic. The verifier needs to remember only
8006  * one spin_lock instead of array of acquired_refs.
8007  * env->cur_state->active_locks remembers which map value element or allocated
8008  * object got locked and clears it after bpf_spin_unlock.
8009  */
process_spin_lock(struct bpf_verifier_env * env,int regno,bool is_lock)8010 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
8011 			     bool is_lock)
8012 {
8013 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8014 	struct bpf_verifier_state *cur = env->cur_state;
8015 	bool is_const = tnum_is_const(reg->var_off);
8016 	u64 val = reg->var_off.value;
8017 	struct bpf_map *map = NULL;
8018 	struct btf *btf = NULL;
8019 	struct btf_record *rec;
8020 	int err;
8021 
8022 	if (!is_const) {
8023 		verbose(env,
8024 			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
8025 			regno);
8026 		return -EINVAL;
8027 	}
8028 	if (reg->type == PTR_TO_MAP_VALUE) {
8029 		map = reg->map_ptr;
8030 		if (!map->btf) {
8031 			verbose(env,
8032 				"map '%s' has to have BTF in order to use bpf_spin_lock\n",
8033 				map->name);
8034 			return -EINVAL;
8035 		}
8036 	} else {
8037 		btf = reg->btf;
8038 	}
8039 
8040 	rec = reg_btf_record(reg);
8041 	if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
8042 		verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
8043 			map ? map->name : "kptr");
8044 		return -EINVAL;
8045 	}
8046 	if (rec->spin_lock_off != val + reg->off) {
8047 		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
8048 			val + reg->off, rec->spin_lock_off);
8049 		return -EINVAL;
8050 	}
8051 	if (is_lock) {
8052 		void *ptr;
8053 
8054 		if (map)
8055 			ptr = map;
8056 		else
8057 			ptr = btf;
8058 
8059 		if (cur->active_locks) {
8060 			verbose(env,
8061 				"Locking two bpf_spin_locks are not allowed\n");
8062 			return -EINVAL;
8063 		}
8064 		err = acquire_lock_state(env, env->insn_idx, REF_TYPE_LOCK, reg->id, ptr);
8065 		if (err < 0) {
8066 			verbose(env, "Failed to acquire lock state\n");
8067 			return err;
8068 		}
8069 	} else {
8070 		void *ptr;
8071 
8072 		if (map)
8073 			ptr = map;
8074 		else
8075 			ptr = btf;
8076 
8077 		if (!cur->active_locks) {
8078 			verbose(env, "bpf_spin_unlock without taking a lock\n");
8079 			return -EINVAL;
8080 		}
8081 
8082 		if (release_lock_state(env->cur_state, REF_TYPE_LOCK, reg->id, ptr)) {
8083 			verbose(env, "bpf_spin_unlock of different lock\n");
8084 			return -EINVAL;
8085 		}
8086 
8087 		invalidate_non_owning_refs(env);
8088 	}
8089 	return 0;
8090 }
8091 
process_timer_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)8092 static int process_timer_func(struct bpf_verifier_env *env, int regno,
8093 			      struct bpf_call_arg_meta *meta)
8094 {
8095 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8096 	bool is_const = tnum_is_const(reg->var_off);
8097 	struct bpf_map *map = reg->map_ptr;
8098 	u64 val = reg->var_off.value;
8099 
8100 	if (!is_const) {
8101 		verbose(env,
8102 			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
8103 			regno);
8104 		return -EINVAL;
8105 	}
8106 	if (!map->btf) {
8107 		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
8108 			map->name);
8109 		return -EINVAL;
8110 	}
8111 	if (!btf_record_has_field(map->record, BPF_TIMER)) {
8112 		verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
8113 		return -EINVAL;
8114 	}
8115 	if (map->record->timer_off != val + reg->off) {
8116 		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
8117 			val + reg->off, map->record->timer_off);
8118 		return -EINVAL;
8119 	}
8120 	if (meta->map_ptr) {
8121 		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
8122 		return -EFAULT;
8123 	}
8124 	meta->map_uid = reg->map_uid;
8125 	meta->map_ptr = map;
8126 	return 0;
8127 }
8128 
process_wq_func(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)8129 static int process_wq_func(struct bpf_verifier_env *env, int regno,
8130 			   struct bpf_kfunc_call_arg_meta *meta)
8131 {
8132 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8133 	struct bpf_map *map = reg->map_ptr;
8134 	u64 val = reg->var_off.value;
8135 
8136 	if (map->record->wq_off != val + reg->off) {
8137 		verbose(env, "off %lld doesn't point to 'struct bpf_wq' that is at %d\n",
8138 			val + reg->off, map->record->wq_off);
8139 		return -EINVAL;
8140 	}
8141 	meta->map.uid = reg->map_uid;
8142 	meta->map.ptr = map;
8143 	return 0;
8144 }
8145 
process_kptr_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)8146 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
8147 			     struct bpf_call_arg_meta *meta)
8148 {
8149 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8150 	struct btf_field *kptr_field;
8151 	struct bpf_map *map_ptr;
8152 	struct btf_record *rec;
8153 	u32 kptr_off;
8154 
8155 	if (type_is_ptr_alloc_obj(reg->type)) {
8156 		rec = reg_btf_record(reg);
8157 	} else { /* PTR_TO_MAP_VALUE */
8158 		map_ptr = reg->map_ptr;
8159 		if (!map_ptr->btf) {
8160 			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
8161 				map_ptr->name);
8162 			return -EINVAL;
8163 		}
8164 		rec = map_ptr->record;
8165 		meta->map_ptr = map_ptr;
8166 	}
8167 
8168 	if (!tnum_is_const(reg->var_off)) {
8169 		verbose(env,
8170 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
8171 			regno);
8172 		return -EINVAL;
8173 	}
8174 
8175 	if (!btf_record_has_field(rec, BPF_KPTR)) {
8176 		verbose(env, "R%d has no valid kptr\n", regno);
8177 		return -EINVAL;
8178 	}
8179 
8180 	kptr_off = reg->off + reg->var_off.value;
8181 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
8182 	if (!kptr_field) {
8183 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
8184 		return -EACCES;
8185 	}
8186 	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
8187 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
8188 		return -EACCES;
8189 	}
8190 	meta->kptr_field = kptr_field;
8191 	return 0;
8192 }
8193 
8194 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
8195  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
8196  *
8197  * In both cases we deal with the first 8 bytes, but need to mark the next 8
8198  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
8199  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
8200  *
8201  * Mutability of bpf_dynptr is at two levels, one is at the level of struct
8202  * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
8203  * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
8204  * mutate the view of the dynptr and also possibly destroy it. In the latter
8205  * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
8206  * memory that dynptr points to.
8207  *
8208  * The verifier will keep track both levels of mutation (bpf_dynptr's in
8209  * reg->type and the memory's in reg->dynptr.type), but there is no support for
8210  * readonly dynptr view yet, hence only the first case is tracked and checked.
8211  *
8212  * This is consistent with how C applies the const modifier to a struct object,
8213  * where the pointer itself inside bpf_dynptr becomes const but not what it
8214  * points to.
8215  *
8216  * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
8217  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
8218  */
process_dynptr_func(struct bpf_verifier_env * env,int regno,int insn_idx,enum bpf_arg_type arg_type,int clone_ref_obj_id)8219 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
8220 			       enum bpf_arg_type arg_type, int clone_ref_obj_id)
8221 {
8222 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8223 	int err;
8224 
8225 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
8226 		verbose(env,
8227 			"arg#%d expected pointer to stack or const struct bpf_dynptr\n",
8228 			regno - 1);
8229 		return -EINVAL;
8230 	}
8231 
8232 	/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
8233 	 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
8234 	 */
8235 	if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
8236 		verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
8237 		return -EFAULT;
8238 	}
8239 
8240 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
8241 	 *		 constructing a mutable bpf_dynptr object.
8242 	 *
8243 	 *		 Currently, this is only possible with PTR_TO_STACK
8244 	 *		 pointing to a region of at least 16 bytes which doesn't
8245 	 *		 contain an existing bpf_dynptr.
8246 	 *
8247 	 *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
8248 	 *		 mutated or destroyed. However, the memory it points to
8249 	 *		 may be mutated.
8250 	 *
8251 	 *  None       - Points to a initialized dynptr that can be mutated and
8252 	 *		 destroyed, including mutation of the memory it points
8253 	 *		 to.
8254 	 */
8255 	if (arg_type & MEM_UNINIT) {
8256 		int i;
8257 
8258 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
8259 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
8260 			return -EINVAL;
8261 		}
8262 
8263 		/* we write BPF_DW bits (8 bytes) at a time */
8264 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
8265 			err = check_mem_access(env, insn_idx, regno,
8266 					       i, BPF_DW, BPF_WRITE, -1, false, false);
8267 			if (err)
8268 				return err;
8269 		}
8270 
8271 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
8272 	} else /* MEM_RDONLY and None case from above */ {
8273 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
8274 		if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
8275 			verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
8276 			return -EINVAL;
8277 		}
8278 
8279 		if (!is_dynptr_reg_valid_init(env, reg)) {
8280 			verbose(env,
8281 				"Expected an initialized dynptr as arg #%d\n",
8282 				regno - 1);
8283 			return -EINVAL;
8284 		}
8285 
8286 		/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
8287 		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
8288 			verbose(env,
8289 				"Expected a dynptr of type %s as arg #%d\n",
8290 				dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1);
8291 			return -EINVAL;
8292 		}
8293 
8294 		err = mark_dynptr_read(env, reg);
8295 	}
8296 	return err;
8297 }
8298 
iter_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi)8299 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
8300 {
8301 	struct bpf_func_state *state = func(env, reg);
8302 
8303 	return state->stack[spi].spilled_ptr.ref_obj_id;
8304 }
8305 
is_iter_kfunc(struct bpf_kfunc_call_arg_meta * meta)8306 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8307 {
8308 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
8309 }
8310 
is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta * meta)8311 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8312 {
8313 	return meta->kfunc_flags & KF_ITER_NEW;
8314 }
8315 
is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta * meta)8316 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8317 {
8318 	return meta->kfunc_flags & KF_ITER_NEXT;
8319 }
8320 
is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta * meta)8321 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8322 {
8323 	return meta->kfunc_flags & KF_ITER_DESTROY;
8324 }
8325 
is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta * meta,int arg_idx,const struct btf_param * arg)8326 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
8327 			      const struct btf_param *arg)
8328 {
8329 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
8330 	 * kfunc is iter state pointer
8331 	 */
8332 	if (is_iter_kfunc(meta))
8333 		return arg_idx == 0;
8334 
8335 	/* iter passed as an argument to a generic kfunc */
8336 	return btf_param_match_suffix(meta->btf, arg, "__iter");
8337 }
8338 
process_iter_arg(struct bpf_verifier_env * env,int regno,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)8339 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
8340 			    struct bpf_kfunc_call_arg_meta *meta)
8341 {
8342 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8343 	const struct btf_type *t;
8344 	int spi, err, i, nr_slots, btf_id;
8345 
8346 	if (reg->type != PTR_TO_STACK) {
8347 		verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1);
8348 		return -EINVAL;
8349 	}
8350 
8351 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
8352 	 * ensures struct convention, so we wouldn't need to do any BTF
8353 	 * validation here. But given iter state can be passed as a parameter
8354 	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
8355 	 * conservative here.
8356 	 */
8357 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
8358 	if (btf_id < 0) {
8359 		verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1);
8360 		return -EINVAL;
8361 	}
8362 	t = btf_type_by_id(meta->btf, btf_id);
8363 	nr_slots = t->size / BPF_REG_SIZE;
8364 
8365 	if (is_iter_new_kfunc(meta)) {
8366 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
8367 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
8368 			verbose(env, "expected uninitialized iter_%s as arg #%d\n",
8369 				iter_type_str(meta->btf, btf_id), regno - 1);
8370 			return -EINVAL;
8371 		}
8372 
8373 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
8374 			err = check_mem_access(env, insn_idx, regno,
8375 					       i, BPF_DW, BPF_WRITE, -1, false, false);
8376 			if (err)
8377 				return err;
8378 		}
8379 
8380 		err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
8381 		if (err)
8382 			return err;
8383 	} else {
8384 		/* iter_next() or iter_destroy(), as well as any kfunc
8385 		 * accepting iter argument, expect initialized iter state
8386 		 */
8387 		err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
8388 		switch (err) {
8389 		case 0:
8390 			break;
8391 		case -EINVAL:
8392 			verbose(env, "expected an initialized iter_%s as arg #%d\n",
8393 				iter_type_str(meta->btf, btf_id), regno - 1);
8394 			return err;
8395 		case -EPROTO:
8396 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
8397 			return err;
8398 		default:
8399 			return err;
8400 		}
8401 
8402 		spi = iter_get_spi(env, reg, nr_slots);
8403 		if (spi < 0)
8404 			return spi;
8405 
8406 		err = mark_iter_read(env, reg, spi, nr_slots);
8407 		if (err)
8408 			return err;
8409 
8410 		/* remember meta->iter info for process_iter_next_call() */
8411 		meta->iter.spi = spi;
8412 		meta->iter.frameno = reg->frameno;
8413 		meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
8414 
8415 		if (is_iter_destroy_kfunc(meta)) {
8416 			err = unmark_stack_slots_iter(env, reg, nr_slots);
8417 			if (err)
8418 				return err;
8419 		}
8420 	}
8421 
8422 	return 0;
8423 }
8424 
8425 /* Look for a previous loop entry at insn_idx: nearest parent state
8426  * stopped at insn_idx with callsites matching those in cur->frame.
8427  */
find_prev_entry(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,int insn_idx)8428 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
8429 						  struct bpf_verifier_state *cur,
8430 						  int insn_idx)
8431 {
8432 	struct bpf_verifier_state_list *sl;
8433 	struct bpf_verifier_state *st;
8434 
8435 	/* Explored states are pushed in stack order, most recent states come first */
8436 	sl = *explored_state(env, insn_idx);
8437 	for (; sl; sl = sl->next) {
8438 		/* If st->branches != 0 state is a part of current DFS verification path,
8439 		 * hence cur & st for a loop.
8440 		 */
8441 		st = &sl->state;
8442 		if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
8443 		    st->dfs_depth < cur->dfs_depth)
8444 			return st;
8445 	}
8446 
8447 	return NULL;
8448 }
8449 
8450 static void reset_idmap_scratch(struct bpf_verifier_env *env);
8451 static bool regs_exact(const struct bpf_reg_state *rold,
8452 		       const struct bpf_reg_state *rcur,
8453 		       struct bpf_idmap *idmap);
8454 
maybe_widen_reg(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur,struct bpf_idmap * idmap)8455 static void maybe_widen_reg(struct bpf_verifier_env *env,
8456 			    struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
8457 			    struct bpf_idmap *idmap)
8458 {
8459 	if (rold->type != SCALAR_VALUE)
8460 		return;
8461 	if (rold->type != rcur->type)
8462 		return;
8463 	if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
8464 		return;
8465 	__mark_reg_unknown(env, rcur);
8466 }
8467 
widen_imprecise_scalars(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur)8468 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
8469 				   struct bpf_verifier_state *old,
8470 				   struct bpf_verifier_state *cur)
8471 {
8472 	struct bpf_func_state *fold, *fcur;
8473 	int i, fr;
8474 
8475 	reset_idmap_scratch(env);
8476 	for (fr = old->curframe; fr >= 0; fr--) {
8477 		fold = old->frame[fr];
8478 		fcur = cur->frame[fr];
8479 
8480 		for (i = 0; i < MAX_BPF_REG; i++)
8481 			maybe_widen_reg(env,
8482 					&fold->regs[i],
8483 					&fcur->regs[i],
8484 					&env->idmap_scratch);
8485 
8486 		for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
8487 			if (!is_spilled_reg(&fold->stack[i]) ||
8488 			    !is_spilled_reg(&fcur->stack[i]))
8489 				continue;
8490 
8491 			maybe_widen_reg(env,
8492 					&fold->stack[i].spilled_ptr,
8493 					&fcur->stack[i].spilled_ptr,
8494 					&env->idmap_scratch);
8495 		}
8496 	}
8497 	return 0;
8498 }
8499 
get_iter_from_state(struct bpf_verifier_state * cur_st,struct bpf_kfunc_call_arg_meta * meta)8500 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
8501 						 struct bpf_kfunc_call_arg_meta *meta)
8502 {
8503 	int iter_frameno = meta->iter.frameno;
8504 	int iter_spi = meta->iter.spi;
8505 
8506 	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
8507 }
8508 
8509 /* process_iter_next_call() is called when verifier gets to iterator's next
8510  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
8511  * to it as just "iter_next()" in comments below.
8512  *
8513  * BPF verifier relies on a crucial contract for any iter_next()
8514  * implementation: it should *eventually* return NULL, and once that happens
8515  * it should keep returning NULL. That is, once iterator exhausts elements to
8516  * iterate, it should never reset or spuriously return new elements.
8517  *
8518  * With the assumption of such contract, process_iter_next_call() simulates
8519  * a fork in the verifier state to validate loop logic correctness and safety
8520  * without having to simulate infinite amount of iterations.
8521  *
8522  * In current state, we first assume that iter_next() returned NULL and
8523  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
8524  * conditions we should not form an infinite loop and should eventually reach
8525  * exit.
8526  *
8527  * Besides that, we also fork current state and enqueue it for later
8528  * verification. In a forked state we keep iterator state as ACTIVE
8529  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
8530  * also bump iteration depth to prevent erroneous infinite loop detection
8531  * later on (see iter_active_depths_differ() comment for details). In this
8532  * state we assume that we'll eventually loop back to another iter_next()
8533  * calls (it could be in exactly same location or in some other instruction,
8534  * it doesn't matter, we don't make any unnecessary assumptions about this,
8535  * everything revolves around iterator state in a stack slot, not which
8536  * instruction is calling iter_next()). When that happens, we either will come
8537  * to iter_next() with equivalent state and can conclude that next iteration
8538  * will proceed in exactly the same way as we just verified, so it's safe to
8539  * assume that loop converges. If not, we'll go on another iteration
8540  * simulation with a different input state, until all possible starting states
8541  * are validated or we reach maximum number of instructions limit.
8542  *
8543  * This way, we will either exhaustively discover all possible input states
8544  * that iterator loop can start with and eventually will converge, or we'll
8545  * effectively regress into bounded loop simulation logic and either reach
8546  * maximum number of instructions if loop is not provably convergent, or there
8547  * is some statically known limit on number of iterations (e.g., if there is
8548  * an explicit `if n > 100 then break;` statement somewhere in the loop).
8549  *
8550  * Iteration convergence logic in is_state_visited() relies on exact
8551  * states comparison, which ignores read and precision marks.
8552  * This is necessary because read and precision marks are not finalized
8553  * while in the loop. Exact comparison might preclude convergence for
8554  * simple programs like below:
8555  *
8556  *     i = 0;
8557  *     while(iter_next(&it))
8558  *       i++;
8559  *
8560  * At each iteration step i++ would produce a new distinct state and
8561  * eventually instruction processing limit would be reached.
8562  *
8563  * To avoid such behavior speculatively forget (widen) range for
8564  * imprecise scalar registers, if those registers were not precise at the
8565  * end of the previous iteration and do not match exactly.
8566  *
8567  * This is a conservative heuristic that allows to verify wide range of programs,
8568  * however it precludes verification of programs that conjure an
8569  * imprecise value on the first loop iteration and use it as precise on a second.
8570  * For example, the following safe program would fail to verify:
8571  *
8572  *     struct bpf_num_iter it;
8573  *     int arr[10];
8574  *     int i = 0, a = 0;
8575  *     bpf_iter_num_new(&it, 0, 10);
8576  *     while (bpf_iter_num_next(&it)) {
8577  *       if (a == 0) {
8578  *         a = 1;
8579  *         i = 7; // Because i changed verifier would forget
8580  *                // it's range on second loop entry.
8581  *       } else {
8582  *         arr[i] = 42; // This would fail to verify.
8583  *       }
8584  *     }
8585  *     bpf_iter_num_destroy(&it);
8586  */
process_iter_next_call(struct bpf_verifier_env * env,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)8587 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
8588 				  struct bpf_kfunc_call_arg_meta *meta)
8589 {
8590 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
8591 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
8592 	struct bpf_reg_state *cur_iter, *queued_iter;
8593 
8594 	BTF_TYPE_EMIT(struct bpf_iter);
8595 
8596 	cur_iter = get_iter_from_state(cur_st, meta);
8597 
8598 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
8599 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
8600 		verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
8601 			cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
8602 		return -EFAULT;
8603 	}
8604 
8605 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
8606 		/* Because iter_next() call is a checkpoint is_state_visitied()
8607 		 * should guarantee parent state with same call sites and insn_idx.
8608 		 */
8609 		if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
8610 		    !same_callsites(cur_st->parent, cur_st)) {
8611 			verbose(env, "bug: bad parent state for iter next call");
8612 			return -EFAULT;
8613 		}
8614 		/* Note cur_st->parent in the call below, it is necessary to skip
8615 		 * checkpoint created for cur_st by is_state_visited()
8616 		 * right at this instruction.
8617 		 */
8618 		prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
8619 		/* branch out active iter state */
8620 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
8621 		if (!queued_st)
8622 			return -ENOMEM;
8623 
8624 		queued_iter = get_iter_from_state(queued_st, meta);
8625 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
8626 		queued_iter->iter.depth++;
8627 		if (prev_st)
8628 			widen_imprecise_scalars(env, prev_st, queued_st);
8629 
8630 		queued_fr = queued_st->frame[queued_st->curframe];
8631 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
8632 	}
8633 
8634 	/* switch to DRAINED state, but keep the depth unchanged */
8635 	/* mark current iter state as drained and assume returned NULL */
8636 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
8637 	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
8638 
8639 	return 0;
8640 }
8641 
arg_type_is_mem_size(enum bpf_arg_type type)8642 static bool arg_type_is_mem_size(enum bpf_arg_type type)
8643 {
8644 	return type == ARG_CONST_SIZE ||
8645 	       type == ARG_CONST_SIZE_OR_ZERO;
8646 }
8647 
arg_type_is_raw_mem(enum bpf_arg_type type)8648 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
8649 {
8650 	return base_type(type) == ARG_PTR_TO_MEM &&
8651 	       type & MEM_UNINIT;
8652 }
8653 
arg_type_is_release(enum bpf_arg_type type)8654 static bool arg_type_is_release(enum bpf_arg_type type)
8655 {
8656 	return type & OBJ_RELEASE;
8657 }
8658 
arg_type_is_dynptr(enum bpf_arg_type type)8659 static bool arg_type_is_dynptr(enum bpf_arg_type type)
8660 {
8661 	return base_type(type) == ARG_PTR_TO_DYNPTR;
8662 }
8663 
resolve_map_arg_type(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_arg_type * arg_type)8664 static int resolve_map_arg_type(struct bpf_verifier_env *env,
8665 				 const struct bpf_call_arg_meta *meta,
8666 				 enum bpf_arg_type *arg_type)
8667 {
8668 	if (!meta->map_ptr) {
8669 		/* kernel subsystem misconfigured verifier */
8670 		verbose(env, "invalid map_ptr to access map->type\n");
8671 		return -EACCES;
8672 	}
8673 
8674 	switch (meta->map_ptr->map_type) {
8675 	case BPF_MAP_TYPE_SOCKMAP:
8676 	case BPF_MAP_TYPE_SOCKHASH:
8677 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
8678 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
8679 		} else {
8680 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
8681 			return -EINVAL;
8682 		}
8683 		break;
8684 	case BPF_MAP_TYPE_BLOOM_FILTER:
8685 		if (meta->func_id == BPF_FUNC_map_peek_elem)
8686 			*arg_type = ARG_PTR_TO_MAP_VALUE;
8687 		break;
8688 	default:
8689 		break;
8690 	}
8691 	return 0;
8692 }
8693 
8694 struct bpf_reg_types {
8695 	const enum bpf_reg_type types[10];
8696 	u32 *btf_id;
8697 };
8698 
8699 static const struct bpf_reg_types sock_types = {
8700 	.types = {
8701 		PTR_TO_SOCK_COMMON,
8702 		PTR_TO_SOCKET,
8703 		PTR_TO_TCP_SOCK,
8704 		PTR_TO_XDP_SOCK,
8705 	},
8706 };
8707 
8708 #ifdef CONFIG_NET
8709 static const struct bpf_reg_types btf_id_sock_common_types = {
8710 	.types = {
8711 		PTR_TO_SOCK_COMMON,
8712 		PTR_TO_SOCKET,
8713 		PTR_TO_TCP_SOCK,
8714 		PTR_TO_XDP_SOCK,
8715 		PTR_TO_BTF_ID,
8716 		PTR_TO_BTF_ID | PTR_TRUSTED,
8717 	},
8718 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
8719 };
8720 #endif
8721 
8722 static const struct bpf_reg_types mem_types = {
8723 	.types = {
8724 		PTR_TO_STACK,
8725 		PTR_TO_PACKET,
8726 		PTR_TO_PACKET_META,
8727 		PTR_TO_MAP_KEY,
8728 		PTR_TO_MAP_VALUE,
8729 		PTR_TO_MEM,
8730 		PTR_TO_MEM | MEM_RINGBUF,
8731 		PTR_TO_BUF,
8732 		PTR_TO_BTF_ID | PTR_TRUSTED,
8733 	},
8734 };
8735 
8736 static const struct bpf_reg_types spin_lock_types = {
8737 	.types = {
8738 		PTR_TO_MAP_VALUE,
8739 		PTR_TO_BTF_ID | MEM_ALLOC,
8740 	}
8741 };
8742 
8743 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
8744 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
8745 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
8746 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
8747 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
8748 static const struct bpf_reg_types btf_ptr_types = {
8749 	.types = {
8750 		PTR_TO_BTF_ID,
8751 		PTR_TO_BTF_ID | PTR_TRUSTED,
8752 		PTR_TO_BTF_ID | MEM_RCU,
8753 	},
8754 };
8755 static const struct bpf_reg_types percpu_btf_ptr_types = {
8756 	.types = {
8757 		PTR_TO_BTF_ID | MEM_PERCPU,
8758 		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
8759 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
8760 	}
8761 };
8762 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
8763 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
8764 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
8765 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
8766 static const struct bpf_reg_types kptr_xchg_dest_types = {
8767 	.types = {
8768 		PTR_TO_MAP_VALUE,
8769 		PTR_TO_BTF_ID | MEM_ALLOC
8770 	}
8771 };
8772 static const struct bpf_reg_types dynptr_types = {
8773 	.types = {
8774 		PTR_TO_STACK,
8775 		CONST_PTR_TO_DYNPTR,
8776 	}
8777 };
8778 
8779 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
8780 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
8781 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
8782 	[ARG_CONST_SIZE]		= &scalar_types,
8783 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
8784 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
8785 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
8786 	[ARG_PTR_TO_CTX]		= &context_types,
8787 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
8788 #ifdef CONFIG_NET
8789 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
8790 #endif
8791 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
8792 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
8793 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
8794 	[ARG_PTR_TO_MEM]		= &mem_types,
8795 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
8796 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
8797 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
8798 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
8799 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
8800 	[ARG_PTR_TO_TIMER]		= &timer_types,
8801 	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
8802 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
8803 };
8804 
check_reg_type(struct bpf_verifier_env * env,u32 regno,enum bpf_arg_type arg_type,const u32 * arg_btf_id,struct bpf_call_arg_meta * meta)8805 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
8806 			  enum bpf_arg_type arg_type,
8807 			  const u32 *arg_btf_id,
8808 			  struct bpf_call_arg_meta *meta)
8809 {
8810 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8811 	enum bpf_reg_type expected, type = reg->type;
8812 	const struct bpf_reg_types *compatible;
8813 	int i, j;
8814 
8815 	compatible = compatible_reg_types[base_type(arg_type)];
8816 	if (!compatible) {
8817 		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
8818 		return -EFAULT;
8819 	}
8820 
8821 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
8822 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8823 	 *
8824 	 * Same for MAYBE_NULL:
8825 	 *
8826 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8827 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8828 	 *
8829 	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8830 	 *
8831 	 * Therefore we fold these flags depending on the arg_type before comparison.
8832 	 */
8833 	if (arg_type & MEM_RDONLY)
8834 		type &= ~MEM_RDONLY;
8835 	if (arg_type & PTR_MAYBE_NULL)
8836 		type &= ~PTR_MAYBE_NULL;
8837 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
8838 		type &= ~DYNPTR_TYPE_FLAG_MASK;
8839 
8840 	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
8841 	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
8842 		type &= ~MEM_ALLOC;
8843 		type &= ~MEM_PERCPU;
8844 	}
8845 
8846 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
8847 		expected = compatible->types[i];
8848 		if (expected == NOT_INIT)
8849 			break;
8850 
8851 		if (type == expected)
8852 			goto found;
8853 	}
8854 
8855 	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
8856 	for (j = 0; j + 1 < i; j++)
8857 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
8858 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
8859 	return -EACCES;
8860 
8861 found:
8862 	if (base_type(reg->type) != PTR_TO_BTF_ID)
8863 		return 0;
8864 
8865 	if (compatible == &mem_types) {
8866 		if (!(arg_type & MEM_RDONLY)) {
8867 			verbose(env,
8868 				"%s() may write into memory pointed by R%d type=%s\n",
8869 				func_id_name(meta->func_id),
8870 				regno, reg_type_str(env, reg->type));
8871 			return -EACCES;
8872 		}
8873 		return 0;
8874 	}
8875 
8876 	switch ((int)reg->type) {
8877 	case PTR_TO_BTF_ID:
8878 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8879 	case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
8880 	case PTR_TO_BTF_ID | MEM_RCU:
8881 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
8882 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
8883 	{
8884 		/* For bpf_sk_release, it needs to match against first member
8885 		 * 'struct sock_common', hence make an exception for it. This
8886 		 * allows bpf_sk_release to work for multiple socket types.
8887 		 */
8888 		bool strict_type_match = arg_type_is_release(arg_type) &&
8889 					 meta->func_id != BPF_FUNC_sk_release;
8890 
8891 		if (type_may_be_null(reg->type) &&
8892 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
8893 			verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
8894 			return -EACCES;
8895 		}
8896 
8897 		if (!arg_btf_id) {
8898 			if (!compatible->btf_id) {
8899 				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
8900 				return -EFAULT;
8901 			}
8902 			arg_btf_id = compatible->btf_id;
8903 		}
8904 
8905 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
8906 			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8907 				return -EACCES;
8908 		} else {
8909 			if (arg_btf_id == BPF_PTR_POISON) {
8910 				verbose(env, "verifier internal error:");
8911 				verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
8912 					regno);
8913 				return -EACCES;
8914 			}
8915 
8916 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
8917 						  btf_vmlinux, *arg_btf_id,
8918 						  strict_type_match)) {
8919 				verbose(env, "R%d is of type %s but %s is expected\n",
8920 					regno, btf_type_name(reg->btf, reg->btf_id),
8921 					btf_type_name(btf_vmlinux, *arg_btf_id));
8922 				return -EACCES;
8923 			}
8924 		}
8925 		break;
8926 	}
8927 	case PTR_TO_BTF_ID | MEM_ALLOC:
8928 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
8929 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
8930 		    meta->func_id != BPF_FUNC_kptr_xchg) {
8931 			verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
8932 			return -EFAULT;
8933 		}
8934 		/* Check if local kptr in src arg matches kptr in dst arg */
8935 		if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
8936 			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8937 				return -EACCES;
8938 		}
8939 		break;
8940 	case PTR_TO_BTF_ID | MEM_PERCPU:
8941 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
8942 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
8943 		/* Handled by helper specific checks */
8944 		break;
8945 	default:
8946 		verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
8947 		return -EFAULT;
8948 	}
8949 	return 0;
8950 }
8951 
8952 static struct btf_field *
reg_find_field_offset(const struct bpf_reg_state * reg,s32 off,u32 fields)8953 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
8954 {
8955 	struct btf_field *field;
8956 	struct btf_record *rec;
8957 
8958 	rec = reg_btf_record(reg);
8959 	if (!rec)
8960 		return NULL;
8961 
8962 	field = btf_record_find(rec, off, fields);
8963 	if (!field)
8964 		return NULL;
8965 
8966 	return field;
8967 }
8968 
check_func_arg_reg_off(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,enum bpf_arg_type arg_type)8969 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
8970 				  const struct bpf_reg_state *reg, int regno,
8971 				  enum bpf_arg_type arg_type)
8972 {
8973 	u32 type = reg->type;
8974 
8975 	/* When referenced register is passed to release function, its fixed
8976 	 * offset must be 0.
8977 	 *
8978 	 * We will check arg_type_is_release reg has ref_obj_id when storing
8979 	 * meta->release_regno.
8980 	 */
8981 	if (arg_type_is_release(arg_type)) {
8982 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8983 		 * may not directly point to the object being released, but to
8984 		 * dynptr pointing to such object, which might be at some offset
8985 		 * on the stack. In that case, we simply to fallback to the
8986 		 * default handling.
8987 		 */
8988 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8989 			return 0;
8990 
8991 		/* Doing check_ptr_off_reg check for the offset will catch this
8992 		 * because fixed_off_ok is false, but checking here allows us
8993 		 * to give the user a better error message.
8994 		 */
8995 		if (reg->off) {
8996 			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8997 				regno);
8998 			return -EINVAL;
8999 		}
9000 		return __check_ptr_off_reg(env, reg, regno, false);
9001 	}
9002 
9003 	switch (type) {
9004 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
9005 	case PTR_TO_STACK:
9006 	case PTR_TO_PACKET:
9007 	case PTR_TO_PACKET_META:
9008 	case PTR_TO_MAP_KEY:
9009 	case PTR_TO_MAP_VALUE:
9010 	case PTR_TO_MEM:
9011 	case PTR_TO_MEM | MEM_RDONLY:
9012 	case PTR_TO_MEM | MEM_RINGBUF:
9013 	case PTR_TO_BUF:
9014 	case PTR_TO_BUF | MEM_RDONLY:
9015 	case PTR_TO_ARENA:
9016 	case SCALAR_VALUE:
9017 		return 0;
9018 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
9019 	 * fixed offset.
9020 	 */
9021 	case PTR_TO_BTF_ID:
9022 	case PTR_TO_BTF_ID | MEM_ALLOC:
9023 	case PTR_TO_BTF_ID | PTR_TRUSTED:
9024 	case PTR_TO_BTF_ID | MEM_RCU:
9025 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
9026 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
9027 		/* When referenced PTR_TO_BTF_ID is passed to release function,
9028 		 * its fixed offset must be 0. In the other cases, fixed offset
9029 		 * can be non-zero. This was already checked above. So pass
9030 		 * fixed_off_ok as true to allow fixed offset for all other
9031 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
9032 		 * still need to do checks instead of returning.
9033 		 */
9034 		return __check_ptr_off_reg(env, reg, regno, true);
9035 	default:
9036 		return __check_ptr_off_reg(env, reg, regno, false);
9037 	}
9038 }
9039 
get_dynptr_arg_reg(struct bpf_verifier_env * env,const struct bpf_func_proto * fn,struct bpf_reg_state * regs)9040 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
9041 						const struct bpf_func_proto *fn,
9042 						struct bpf_reg_state *regs)
9043 {
9044 	struct bpf_reg_state *state = NULL;
9045 	int i;
9046 
9047 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
9048 		if (arg_type_is_dynptr(fn->arg_type[i])) {
9049 			if (state) {
9050 				verbose(env, "verifier internal error: multiple dynptr args\n");
9051 				return NULL;
9052 			}
9053 			state = &regs[BPF_REG_1 + i];
9054 		}
9055 
9056 	if (!state)
9057 		verbose(env, "verifier internal error: no dynptr arg found\n");
9058 
9059 	return state;
9060 }
9061 
dynptr_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9062 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9063 {
9064 	struct bpf_func_state *state = func(env, reg);
9065 	int spi;
9066 
9067 	if (reg->type == CONST_PTR_TO_DYNPTR)
9068 		return reg->id;
9069 	spi = dynptr_get_spi(env, reg);
9070 	if (spi < 0)
9071 		return spi;
9072 	return state->stack[spi].spilled_ptr.id;
9073 }
9074 
dynptr_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9075 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9076 {
9077 	struct bpf_func_state *state = func(env, reg);
9078 	int spi;
9079 
9080 	if (reg->type == CONST_PTR_TO_DYNPTR)
9081 		return reg->ref_obj_id;
9082 	spi = dynptr_get_spi(env, reg);
9083 	if (spi < 0)
9084 		return spi;
9085 	return state->stack[spi].spilled_ptr.ref_obj_id;
9086 }
9087 
dynptr_get_type(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9088 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
9089 					    struct bpf_reg_state *reg)
9090 {
9091 	struct bpf_func_state *state = func(env, reg);
9092 	int spi;
9093 
9094 	if (reg->type == CONST_PTR_TO_DYNPTR)
9095 		return reg->dynptr.type;
9096 
9097 	spi = __get_spi(reg->off);
9098 	if (spi < 0) {
9099 		verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
9100 		return BPF_DYNPTR_TYPE_INVALID;
9101 	}
9102 
9103 	return state->stack[spi].spilled_ptr.dynptr.type;
9104 }
9105 
check_reg_const_str(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)9106 static int check_reg_const_str(struct bpf_verifier_env *env,
9107 			       struct bpf_reg_state *reg, u32 regno)
9108 {
9109 	struct bpf_map *map = reg->map_ptr;
9110 	int err;
9111 	int map_off;
9112 	u64 map_addr;
9113 	char *str_ptr;
9114 
9115 	if (reg->type != PTR_TO_MAP_VALUE)
9116 		return -EINVAL;
9117 
9118 	if (!bpf_map_is_rdonly(map)) {
9119 		verbose(env, "R%d does not point to a readonly map'\n", regno);
9120 		return -EACCES;
9121 	}
9122 
9123 	if (!tnum_is_const(reg->var_off)) {
9124 		verbose(env, "R%d is not a constant address'\n", regno);
9125 		return -EACCES;
9126 	}
9127 
9128 	if (!map->ops->map_direct_value_addr) {
9129 		verbose(env, "no direct value access support for this map type\n");
9130 		return -EACCES;
9131 	}
9132 
9133 	err = check_map_access(env, regno, reg->off,
9134 			       map->value_size - reg->off, false,
9135 			       ACCESS_HELPER);
9136 	if (err)
9137 		return err;
9138 
9139 	map_off = reg->off + reg->var_off.value;
9140 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
9141 	if (err) {
9142 		verbose(env, "direct value access on string failed\n");
9143 		return err;
9144 	}
9145 
9146 	str_ptr = (char *)(long)(map_addr);
9147 	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
9148 		verbose(env, "string is not zero-terminated\n");
9149 		return -EINVAL;
9150 	}
9151 	return 0;
9152 }
9153 
9154 /* Returns constant key value in `value` if possible, else negative error */
get_constant_map_key(struct bpf_verifier_env * env,struct bpf_reg_state * key,u32 key_size,s64 * value)9155 static int get_constant_map_key(struct bpf_verifier_env *env,
9156 				struct bpf_reg_state *key,
9157 				u32 key_size,
9158 				s64 *value)
9159 {
9160 	struct bpf_func_state *state = func(env, key);
9161 	struct bpf_reg_state *reg;
9162 	int slot, spi, off;
9163 	int spill_size = 0;
9164 	int zero_size = 0;
9165 	int stack_off;
9166 	int i, err;
9167 	u8 *stype;
9168 
9169 	if (!env->bpf_capable)
9170 		return -EOPNOTSUPP;
9171 	if (key->type != PTR_TO_STACK)
9172 		return -EOPNOTSUPP;
9173 	if (!tnum_is_const(key->var_off))
9174 		return -EOPNOTSUPP;
9175 
9176 	stack_off = key->off + key->var_off.value;
9177 	slot = -stack_off - 1;
9178 	spi = slot / BPF_REG_SIZE;
9179 	off = slot % BPF_REG_SIZE;
9180 	stype = state->stack[spi].slot_type;
9181 
9182 	/* First handle precisely tracked STACK_ZERO */
9183 	for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
9184 		zero_size++;
9185 	if (zero_size >= key_size) {
9186 		*value = 0;
9187 		return 0;
9188 	}
9189 
9190 	/* Check that stack contains a scalar spill of expected size */
9191 	if (!is_spilled_scalar_reg(&state->stack[spi]))
9192 		return -EOPNOTSUPP;
9193 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
9194 		spill_size++;
9195 	if (spill_size != key_size)
9196 		return -EOPNOTSUPP;
9197 
9198 	reg = &state->stack[spi].spilled_ptr;
9199 	if (!tnum_is_const(reg->var_off))
9200 		/* Stack value not statically known */
9201 		return -EOPNOTSUPP;
9202 
9203 	/* We are relying on a constant value. So mark as precise
9204 	 * to prevent pruning on it.
9205 	 */
9206 	bt_set_frame_slot(&env->bt, key->frameno, spi);
9207 	err = mark_chain_precision_batch(env);
9208 	if (err < 0)
9209 		return err;
9210 
9211 	*value = reg->var_off.value;
9212 	return 0;
9213 }
9214 
9215 static bool can_elide_value_nullness(enum bpf_map_type type);
9216 
check_func_arg(struct bpf_verifier_env * env,u32 arg,struct bpf_call_arg_meta * meta,const struct bpf_func_proto * fn,int insn_idx)9217 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
9218 			  struct bpf_call_arg_meta *meta,
9219 			  const struct bpf_func_proto *fn,
9220 			  int insn_idx)
9221 {
9222 	u32 regno = BPF_REG_1 + arg;
9223 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
9224 	enum bpf_arg_type arg_type = fn->arg_type[arg];
9225 	enum bpf_reg_type type = reg->type;
9226 	u32 *arg_btf_id = NULL;
9227 	u32 key_size;
9228 	int err = 0;
9229 
9230 	if (arg_type == ARG_DONTCARE)
9231 		return 0;
9232 
9233 	err = check_reg_arg(env, regno, SRC_OP);
9234 	if (err)
9235 		return err;
9236 
9237 	if (arg_type == ARG_ANYTHING) {
9238 		if (is_pointer_value(env, regno)) {
9239 			verbose(env, "R%d leaks addr into helper function\n",
9240 				regno);
9241 			return -EACCES;
9242 		}
9243 		return 0;
9244 	}
9245 
9246 	if (type_is_pkt_pointer(type) &&
9247 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
9248 		verbose(env, "helper access to the packet is not allowed\n");
9249 		return -EACCES;
9250 	}
9251 
9252 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
9253 		err = resolve_map_arg_type(env, meta, &arg_type);
9254 		if (err)
9255 			return err;
9256 	}
9257 
9258 	if (register_is_null(reg) && type_may_be_null(arg_type))
9259 		/* A NULL register has a SCALAR_VALUE type, so skip
9260 		 * type checking.
9261 		 */
9262 		goto skip_type_check;
9263 
9264 	/* arg_btf_id and arg_size are in a union. */
9265 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
9266 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
9267 		arg_btf_id = fn->arg_btf_id[arg];
9268 
9269 	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
9270 	if (err)
9271 		return err;
9272 
9273 	err = check_func_arg_reg_off(env, reg, regno, arg_type);
9274 	if (err)
9275 		return err;
9276 
9277 skip_type_check:
9278 	if (arg_type_is_release(arg_type)) {
9279 		if (arg_type_is_dynptr(arg_type)) {
9280 			struct bpf_func_state *state = func(env, reg);
9281 			int spi;
9282 
9283 			/* Only dynptr created on stack can be released, thus
9284 			 * the get_spi and stack state checks for spilled_ptr
9285 			 * should only be done before process_dynptr_func for
9286 			 * PTR_TO_STACK.
9287 			 */
9288 			if (reg->type == PTR_TO_STACK) {
9289 				spi = dynptr_get_spi(env, reg);
9290 				if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
9291 					verbose(env, "arg %d is an unacquired reference\n", regno);
9292 					return -EINVAL;
9293 				}
9294 			} else {
9295 				verbose(env, "cannot release unowned const bpf_dynptr\n");
9296 				return -EINVAL;
9297 			}
9298 		} else if (!reg->ref_obj_id && !register_is_null(reg)) {
9299 			verbose(env, "R%d must be referenced when passed to release function\n",
9300 				regno);
9301 			return -EINVAL;
9302 		}
9303 		if (meta->release_regno) {
9304 			verbose(env, "verifier internal error: more than one release argument\n");
9305 			return -EFAULT;
9306 		}
9307 		meta->release_regno = regno;
9308 	}
9309 
9310 	if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
9311 		if (meta->ref_obj_id) {
9312 			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
9313 				regno, reg->ref_obj_id,
9314 				meta->ref_obj_id);
9315 			return -EFAULT;
9316 		}
9317 		meta->ref_obj_id = reg->ref_obj_id;
9318 	}
9319 
9320 	switch (base_type(arg_type)) {
9321 	case ARG_CONST_MAP_PTR:
9322 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
9323 		if (meta->map_ptr) {
9324 			/* Use map_uid (which is unique id of inner map) to reject:
9325 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
9326 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
9327 			 * if (inner_map1 && inner_map2) {
9328 			 *     timer = bpf_map_lookup_elem(inner_map1);
9329 			 *     if (timer)
9330 			 *         // mismatch would have been allowed
9331 			 *         bpf_timer_init(timer, inner_map2);
9332 			 * }
9333 			 *
9334 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
9335 			 */
9336 			if (meta->map_ptr != reg->map_ptr ||
9337 			    meta->map_uid != reg->map_uid) {
9338 				verbose(env,
9339 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
9340 					meta->map_uid, reg->map_uid);
9341 				return -EINVAL;
9342 			}
9343 		}
9344 		meta->map_ptr = reg->map_ptr;
9345 		meta->map_uid = reg->map_uid;
9346 		break;
9347 	case ARG_PTR_TO_MAP_KEY:
9348 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
9349 		 * check that [key, key + map->key_size) are within
9350 		 * stack limits and initialized
9351 		 */
9352 		if (!meta->map_ptr) {
9353 			/* in function declaration map_ptr must come before
9354 			 * map_key, so that it's verified and known before
9355 			 * we have to check map_key here. Otherwise it means
9356 			 * that kernel subsystem misconfigured verifier
9357 			 */
9358 			verbose(env, "invalid map_ptr to access map->key\n");
9359 			return -EACCES;
9360 		}
9361 		key_size = meta->map_ptr->key_size;
9362 		err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
9363 		if (err)
9364 			return err;
9365 		if (can_elide_value_nullness(meta->map_ptr->map_type)) {
9366 			err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
9367 			if (err < 0) {
9368 				meta->const_map_key = -1;
9369 				if (err == -EOPNOTSUPP)
9370 					err = 0;
9371 				else
9372 					return err;
9373 			}
9374 		}
9375 		break;
9376 	case ARG_PTR_TO_MAP_VALUE:
9377 		if (type_may_be_null(arg_type) && register_is_null(reg))
9378 			return 0;
9379 
9380 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
9381 		 * check [value, value + map->value_size) validity
9382 		 */
9383 		if (!meta->map_ptr) {
9384 			/* kernel subsystem misconfigured verifier */
9385 			verbose(env, "invalid map_ptr to access map->value\n");
9386 			return -EACCES;
9387 		}
9388 		meta->raw_mode = arg_type & MEM_UNINIT;
9389 		err = check_helper_mem_access(env, regno, meta->map_ptr->value_size,
9390 					      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
9391 					      false, meta);
9392 		break;
9393 	case ARG_PTR_TO_PERCPU_BTF_ID:
9394 		if (!reg->btf_id) {
9395 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
9396 			return -EACCES;
9397 		}
9398 		meta->ret_btf = reg->btf;
9399 		meta->ret_btf_id = reg->btf_id;
9400 		break;
9401 	case ARG_PTR_TO_SPIN_LOCK:
9402 		if (in_rbtree_lock_required_cb(env)) {
9403 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
9404 			return -EACCES;
9405 		}
9406 		if (meta->func_id == BPF_FUNC_spin_lock) {
9407 			err = process_spin_lock(env, regno, true);
9408 			if (err)
9409 				return err;
9410 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
9411 			err = process_spin_lock(env, regno, false);
9412 			if (err)
9413 				return err;
9414 		} else {
9415 			verbose(env, "verifier internal error\n");
9416 			return -EFAULT;
9417 		}
9418 		break;
9419 	case ARG_PTR_TO_TIMER:
9420 		err = process_timer_func(env, regno, meta);
9421 		if (err)
9422 			return err;
9423 		break;
9424 	case ARG_PTR_TO_FUNC:
9425 		meta->subprogno = reg->subprogno;
9426 		break;
9427 	case ARG_PTR_TO_MEM:
9428 		/* The access to this pointer is only checked when we hit the
9429 		 * next is_mem_size argument below.
9430 		 */
9431 		meta->raw_mode = arg_type & MEM_UNINIT;
9432 		if (arg_type & MEM_FIXED_SIZE) {
9433 			err = check_helper_mem_access(env, regno, fn->arg_size[arg],
9434 						      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
9435 						      false, meta);
9436 			if (err)
9437 				return err;
9438 			if (arg_type & MEM_ALIGNED)
9439 				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
9440 		}
9441 		break;
9442 	case ARG_CONST_SIZE:
9443 		err = check_mem_size_reg(env, reg, regno,
9444 					 fn->arg_type[arg - 1] & MEM_WRITE ?
9445 					 BPF_WRITE : BPF_READ,
9446 					 false, meta);
9447 		break;
9448 	case ARG_CONST_SIZE_OR_ZERO:
9449 		err = check_mem_size_reg(env, reg, regno,
9450 					 fn->arg_type[arg - 1] & MEM_WRITE ?
9451 					 BPF_WRITE : BPF_READ,
9452 					 true, meta);
9453 		break;
9454 	case ARG_PTR_TO_DYNPTR:
9455 		err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
9456 		if (err)
9457 			return err;
9458 		break;
9459 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
9460 		if (!tnum_is_const(reg->var_off)) {
9461 			verbose(env, "R%d is not a known constant'\n",
9462 				regno);
9463 			return -EACCES;
9464 		}
9465 		meta->mem_size = reg->var_off.value;
9466 		err = mark_chain_precision(env, regno);
9467 		if (err)
9468 			return err;
9469 		break;
9470 	case ARG_PTR_TO_CONST_STR:
9471 	{
9472 		err = check_reg_const_str(env, reg, regno);
9473 		if (err)
9474 			return err;
9475 		break;
9476 	}
9477 	case ARG_KPTR_XCHG_DEST:
9478 		err = process_kptr_func(env, regno, meta);
9479 		if (err)
9480 			return err;
9481 		break;
9482 	}
9483 
9484 	return err;
9485 }
9486 
may_update_sockmap(struct bpf_verifier_env * env,int func_id)9487 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
9488 {
9489 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
9490 	enum bpf_prog_type type = resolve_prog_type(env->prog);
9491 
9492 	if (func_id != BPF_FUNC_map_update_elem &&
9493 	    func_id != BPF_FUNC_map_delete_elem)
9494 		return false;
9495 
9496 	/* It's not possible to get access to a locked struct sock in these
9497 	 * contexts, so updating is safe.
9498 	 */
9499 	switch (type) {
9500 	case BPF_PROG_TYPE_TRACING:
9501 		if (eatype == BPF_TRACE_ITER)
9502 			return true;
9503 		break;
9504 	case BPF_PROG_TYPE_SOCK_OPS:
9505 		/* map_update allowed only via dedicated helpers with event type checks */
9506 		if (func_id == BPF_FUNC_map_delete_elem)
9507 			return true;
9508 		break;
9509 	case BPF_PROG_TYPE_SOCKET_FILTER:
9510 	case BPF_PROG_TYPE_SCHED_CLS:
9511 	case BPF_PROG_TYPE_SCHED_ACT:
9512 	case BPF_PROG_TYPE_XDP:
9513 	case BPF_PROG_TYPE_SK_REUSEPORT:
9514 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
9515 	case BPF_PROG_TYPE_SK_LOOKUP:
9516 		return true;
9517 	default:
9518 		break;
9519 	}
9520 
9521 	verbose(env, "cannot update sockmap in this context\n");
9522 	return false;
9523 }
9524 
allow_tail_call_in_subprogs(struct bpf_verifier_env * env)9525 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
9526 {
9527 	return env->prog->jit_requested &&
9528 	       bpf_jit_supports_subprog_tailcalls();
9529 }
9530 
check_map_func_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,int func_id)9531 static int check_map_func_compatibility(struct bpf_verifier_env *env,
9532 					struct bpf_map *map, int func_id)
9533 {
9534 	if (!map)
9535 		return 0;
9536 
9537 	/* We need a two way check, first is from map perspective ... */
9538 	switch (map->map_type) {
9539 	case BPF_MAP_TYPE_PROG_ARRAY:
9540 		if (func_id != BPF_FUNC_tail_call)
9541 			goto error;
9542 		break;
9543 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
9544 		if (func_id != BPF_FUNC_perf_event_read &&
9545 		    func_id != BPF_FUNC_perf_event_output &&
9546 		    func_id != BPF_FUNC_skb_output &&
9547 		    func_id != BPF_FUNC_perf_event_read_value &&
9548 		    func_id != BPF_FUNC_xdp_output)
9549 			goto error;
9550 		break;
9551 	case BPF_MAP_TYPE_RINGBUF:
9552 		if (func_id != BPF_FUNC_ringbuf_output &&
9553 		    func_id != BPF_FUNC_ringbuf_reserve &&
9554 		    func_id != BPF_FUNC_ringbuf_query &&
9555 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
9556 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
9557 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
9558 			goto error;
9559 		break;
9560 	case BPF_MAP_TYPE_USER_RINGBUF:
9561 		if (func_id != BPF_FUNC_user_ringbuf_drain)
9562 			goto error;
9563 		break;
9564 	case BPF_MAP_TYPE_STACK_TRACE:
9565 		if (func_id != BPF_FUNC_get_stackid)
9566 			goto error;
9567 		break;
9568 	case BPF_MAP_TYPE_CGROUP_ARRAY:
9569 		if (func_id != BPF_FUNC_skb_under_cgroup &&
9570 		    func_id != BPF_FUNC_current_task_under_cgroup)
9571 			goto error;
9572 		break;
9573 	case BPF_MAP_TYPE_CGROUP_STORAGE:
9574 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
9575 		if (func_id != BPF_FUNC_get_local_storage)
9576 			goto error;
9577 		break;
9578 	case BPF_MAP_TYPE_DEVMAP:
9579 	case BPF_MAP_TYPE_DEVMAP_HASH:
9580 		if (func_id != BPF_FUNC_redirect_map &&
9581 		    func_id != BPF_FUNC_map_lookup_elem)
9582 			goto error;
9583 		break;
9584 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
9585 	 * appear.
9586 	 */
9587 	case BPF_MAP_TYPE_CPUMAP:
9588 		if (func_id != BPF_FUNC_redirect_map)
9589 			goto error;
9590 		break;
9591 	case BPF_MAP_TYPE_XSKMAP:
9592 		if (func_id != BPF_FUNC_redirect_map &&
9593 		    func_id != BPF_FUNC_map_lookup_elem)
9594 			goto error;
9595 		break;
9596 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
9597 	case BPF_MAP_TYPE_HASH_OF_MAPS:
9598 		if (func_id != BPF_FUNC_map_lookup_elem)
9599 			goto error;
9600 		break;
9601 	case BPF_MAP_TYPE_SOCKMAP:
9602 		if (func_id != BPF_FUNC_sk_redirect_map &&
9603 		    func_id != BPF_FUNC_sock_map_update &&
9604 		    func_id != BPF_FUNC_msg_redirect_map &&
9605 		    func_id != BPF_FUNC_sk_select_reuseport &&
9606 		    func_id != BPF_FUNC_map_lookup_elem &&
9607 		    !may_update_sockmap(env, func_id))
9608 			goto error;
9609 		break;
9610 	case BPF_MAP_TYPE_SOCKHASH:
9611 		if (func_id != BPF_FUNC_sk_redirect_hash &&
9612 		    func_id != BPF_FUNC_sock_hash_update &&
9613 		    func_id != BPF_FUNC_msg_redirect_hash &&
9614 		    func_id != BPF_FUNC_sk_select_reuseport &&
9615 		    func_id != BPF_FUNC_map_lookup_elem &&
9616 		    !may_update_sockmap(env, func_id))
9617 			goto error;
9618 		break;
9619 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
9620 		if (func_id != BPF_FUNC_sk_select_reuseport)
9621 			goto error;
9622 		break;
9623 	case BPF_MAP_TYPE_QUEUE:
9624 	case BPF_MAP_TYPE_STACK:
9625 		if (func_id != BPF_FUNC_map_peek_elem &&
9626 		    func_id != BPF_FUNC_map_pop_elem &&
9627 		    func_id != BPF_FUNC_map_push_elem)
9628 			goto error;
9629 		break;
9630 	case BPF_MAP_TYPE_SK_STORAGE:
9631 		if (func_id != BPF_FUNC_sk_storage_get &&
9632 		    func_id != BPF_FUNC_sk_storage_delete &&
9633 		    func_id != BPF_FUNC_kptr_xchg)
9634 			goto error;
9635 		break;
9636 	case BPF_MAP_TYPE_INODE_STORAGE:
9637 		if (func_id != BPF_FUNC_inode_storage_get &&
9638 		    func_id != BPF_FUNC_inode_storage_delete &&
9639 		    func_id != BPF_FUNC_kptr_xchg)
9640 			goto error;
9641 		break;
9642 	case BPF_MAP_TYPE_TASK_STORAGE:
9643 		if (func_id != BPF_FUNC_task_storage_get &&
9644 		    func_id != BPF_FUNC_task_storage_delete &&
9645 		    func_id != BPF_FUNC_kptr_xchg)
9646 			goto error;
9647 		break;
9648 	case BPF_MAP_TYPE_CGRP_STORAGE:
9649 		if (func_id != BPF_FUNC_cgrp_storage_get &&
9650 		    func_id != BPF_FUNC_cgrp_storage_delete &&
9651 		    func_id != BPF_FUNC_kptr_xchg)
9652 			goto error;
9653 		break;
9654 	case BPF_MAP_TYPE_BLOOM_FILTER:
9655 		if (func_id != BPF_FUNC_map_peek_elem &&
9656 		    func_id != BPF_FUNC_map_push_elem)
9657 			goto error;
9658 		break;
9659 	default:
9660 		break;
9661 	}
9662 
9663 	/* ... and second from the function itself. */
9664 	switch (func_id) {
9665 	case BPF_FUNC_tail_call:
9666 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
9667 			goto error;
9668 		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
9669 			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
9670 			return -EINVAL;
9671 		}
9672 		break;
9673 	case BPF_FUNC_perf_event_read:
9674 	case BPF_FUNC_perf_event_output:
9675 	case BPF_FUNC_perf_event_read_value:
9676 	case BPF_FUNC_skb_output:
9677 	case BPF_FUNC_xdp_output:
9678 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
9679 			goto error;
9680 		break;
9681 	case BPF_FUNC_ringbuf_output:
9682 	case BPF_FUNC_ringbuf_reserve:
9683 	case BPF_FUNC_ringbuf_query:
9684 	case BPF_FUNC_ringbuf_reserve_dynptr:
9685 	case BPF_FUNC_ringbuf_submit_dynptr:
9686 	case BPF_FUNC_ringbuf_discard_dynptr:
9687 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
9688 			goto error;
9689 		break;
9690 	case BPF_FUNC_user_ringbuf_drain:
9691 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
9692 			goto error;
9693 		break;
9694 	case BPF_FUNC_get_stackid:
9695 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
9696 			goto error;
9697 		break;
9698 	case BPF_FUNC_current_task_under_cgroup:
9699 	case BPF_FUNC_skb_under_cgroup:
9700 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
9701 			goto error;
9702 		break;
9703 	case BPF_FUNC_redirect_map:
9704 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
9705 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
9706 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
9707 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
9708 			goto error;
9709 		break;
9710 	case BPF_FUNC_sk_redirect_map:
9711 	case BPF_FUNC_msg_redirect_map:
9712 	case BPF_FUNC_sock_map_update:
9713 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
9714 			goto error;
9715 		break;
9716 	case BPF_FUNC_sk_redirect_hash:
9717 	case BPF_FUNC_msg_redirect_hash:
9718 	case BPF_FUNC_sock_hash_update:
9719 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
9720 			goto error;
9721 		break;
9722 	case BPF_FUNC_get_local_storage:
9723 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
9724 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
9725 			goto error;
9726 		break;
9727 	case BPF_FUNC_sk_select_reuseport:
9728 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
9729 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
9730 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
9731 			goto error;
9732 		break;
9733 	case BPF_FUNC_map_pop_elem:
9734 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9735 		    map->map_type != BPF_MAP_TYPE_STACK)
9736 			goto error;
9737 		break;
9738 	case BPF_FUNC_map_peek_elem:
9739 	case BPF_FUNC_map_push_elem:
9740 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9741 		    map->map_type != BPF_MAP_TYPE_STACK &&
9742 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
9743 			goto error;
9744 		break;
9745 	case BPF_FUNC_map_lookup_percpu_elem:
9746 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
9747 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
9748 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
9749 			goto error;
9750 		break;
9751 	case BPF_FUNC_sk_storage_get:
9752 	case BPF_FUNC_sk_storage_delete:
9753 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
9754 			goto error;
9755 		break;
9756 	case BPF_FUNC_inode_storage_get:
9757 	case BPF_FUNC_inode_storage_delete:
9758 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
9759 			goto error;
9760 		break;
9761 	case BPF_FUNC_task_storage_get:
9762 	case BPF_FUNC_task_storage_delete:
9763 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
9764 			goto error;
9765 		break;
9766 	case BPF_FUNC_cgrp_storage_get:
9767 	case BPF_FUNC_cgrp_storage_delete:
9768 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
9769 			goto error;
9770 		break;
9771 	default:
9772 		break;
9773 	}
9774 
9775 	return 0;
9776 error:
9777 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
9778 		map->map_type, func_id_name(func_id), func_id);
9779 	return -EINVAL;
9780 }
9781 
check_raw_mode_ok(const struct bpf_func_proto * fn)9782 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
9783 {
9784 	int count = 0;
9785 
9786 	if (arg_type_is_raw_mem(fn->arg1_type))
9787 		count++;
9788 	if (arg_type_is_raw_mem(fn->arg2_type))
9789 		count++;
9790 	if (arg_type_is_raw_mem(fn->arg3_type))
9791 		count++;
9792 	if (arg_type_is_raw_mem(fn->arg4_type))
9793 		count++;
9794 	if (arg_type_is_raw_mem(fn->arg5_type))
9795 		count++;
9796 
9797 	/* We only support one arg being in raw mode at the moment,
9798 	 * which is sufficient for the helper functions we have
9799 	 * right now.
9800 	 */
9801 	return count <= 1;
9802 }
9803 
check_args_pair_invalid(const struct bpf_func_proto * fn,int arg)9804 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
9805 {
9806 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
9807 	bool has_size = fn->arg_size[arg] != 0;
9808 	bool is_next_size = false;
9809 
9810 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
9811 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
9812 
9813 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
9814 		return is_next_size;
9815 
9816 	return has_size == is_next_size || is_next_size == is_fixed;
9817 }
9818 
check_arg_pair_ok(const struct bpf_func_proto * fn)9819 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
9820 {
9821 	/* bpf_xxx(..., buf, len) call will access 'len'
9822 	 * bytes from memory 'buf'. Both arg types need
9823 	 * to be paired, so make sure there's no buggy
9824 	 * helper function specification.
9825 	 */
9826 	if (arg_type_is_mem_size(fn->arg1_type) ||
9827 	    check_args_pair_invalid(fn, 0) ||
9828 	    check_args_pair_invalid(fn, 1) ||
9829 	    check_args_pair_invalid(fn, 2) ||
9830 	    check_args_pair_invalid(fn, 3) ||
9831 	    check_args_pair_invalid(fn, 4))
9832 		return false;
9833 
9834 	return true;
9835 }
9836 
check_btf_id_ok(const struct bpf_func_proto * fn)9837 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
9838 {
9839 	int i;
9840 
9841 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
9842 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
9843 			return !!fn->arg_btf_id[i];
9844 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
9845 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
9846 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
9847 		    /* arg_btf_id and arg_size are in a union. */
9848 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
9849 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
9850 			return false;
9851 	}
9852 
9853 	return true;
9854 }
9855 
check_func_proto(const struct bpf_func_proto * fn,int func_id)9856 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
9857 {
9858 	return check_raw_mode_ok(fn) &&
9859 	       check_arg_pair_ok(fn) &&
9860 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
9861 }
9862 
9863 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
9864  * are now invalid, so turn them into unknown SCALAR_VALUE.
9865  *
9866  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
9867  * since these slices point to packet data.
9868  */
clear_all_pkt_pointers(struct bpf_verifier_env * env)9869 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
9870 {
9871 	struct bpf_func_state *state;
9872 	struct bpf_reg_state *reg;
9873 
9874 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9875 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
9876 			mark_reg_invalid(env, reg);
9877 	}));
9878 }
9879 
9880 enum {
9881 	AT_PKT_END = -1,
9882 	BEYOND_PKT_END = -2,
9883 };
9884 
mark_pkt_end(struct bpf_verifier_state * vstate,int regn,bool range_open)9885 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
9886 {
9887 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
9888 	struct bpf_reg_state *reg = &state->regs[regn];
9889 
9890 	if (reg->type != PTR_TO_PACKET)
9891 		/* PTR_TO_PACKET_META is not supported yet */
9892 		return;
9893 
9894 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
9895 	 * How far beyond pkt_end it goes is unknown.
9896 	 * if (!range_open) it's the case of pkt >= pkt_end
9897 	 * if (range_open) it's the case of pkt > pkt_end
9898 	 * hence this pointer is at least 1 byte bigger than pkt_end
9899 	 */
9900 	if (range_open)
9901 		reg->range = BEYOND_PKT_END;
9902 	else
9903 		reg->range = AT_PKT_END;
9904 }
9905 
release_reference_nomark(struct bpf_verifier_state * state,int ref_obj_id)9906 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id)
9907 {
9908 	int i;
9909 
9910 	for (i = 0; i < state->acquired_refs; i++) {
9911 		if (state->refs[i].type != REF_TYPE_PTR)
9912 			continue;
9913 		if (state->refs[i].id == ref_obj_id) {
9914 			release_reference_state(state, i);
9915 			return 0;
9916 		}
9917 	}
9918 	return -EINVAL;
9919 }
9920 
9921 /* The pointer with the specified id has released its reference to kernel
9922  * resources. Identify all copies of the same pointer and clear the reference.
9923  *
9924  * This is the release function corresponding to acquire_reference(). Idempotent.
9925  */
release_reference(struct bpf_verifier_env * env,int ref_obj_id)9926 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
9927 {
9928 	struct bpf_verifier_state *vstate = env->cur_state;
9929 	struct bpf_func_state *state;
9930 	struct bpf_reg_state *reg;
9931 	int err;
9932 
9933 	err = release_reference_nomark(vstate, ref_obj_id);
9934 	if (err)
9935 		return err;
9936 
9937 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
9938 		if (reg->ref_obj_id == ref_obj_id)
9939 			mark_reg_invalid(env, reg);
9940 	}));
9941 
9942 	return 0;
9943 }
9944 
invalidate_non_owning_refs(struct bpf_verifier_env * env)9945 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
9946 {
9947 	struct bpf_func_state *unused;
9948 	struct bpf_reg_state *reg;
9949 
9950 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9951 		if (type_is_non_owning_ref(reg->type))
9952 			mark_reg_invalid(env, reg);
9953 	}));
9954 }
9955 
clear_caller_saved_regs(struct bpf_verifier_env * env,struct bpf_reg_state * regs)9956 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9957 				    struct bpf_reg_state *regs)
9958 {
9959 	int i;
9960 
9961 	/* after the call registers r0 - r5 were scratched */
9962 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9963 		mark_reg_not_init(env, regs, caller_saved[i]);
9964 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9965 	}
9966 }
9967 
9968 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9969 				   struct bpf_func_state *caller,
9970 				   struct bpf_func_state *callee,
9971 				   int insn_idx);
9972 
9973 static int set_callee_state(struct bpf_verifier_env *env,
9974 			    struct bpf_func_state *caller,
9975 			    struct bpf_func_state *callee, int insn_idx);
9976 
setup_func_entry(struct bpf_verifier_env * env,int subprog,int callsite,set_callee_state_fn set_callee_state_cb,struct bpf_verifier_state * state)9977 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9978 			    set_callee_state_fn set_callee_state_cb,
9979 			    struct bpf_verifier_state *state)
9980 {
9981 	struct bpf_func_state *caller, *callee;
9982 	int err;
9983 
9984 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9985 		verbose(env, "the call stack of %d frames is too deep\n",
9986 			state->curframe + 2);
9987 		return -E2BIG;
9988 	}
9989 
9990 	if (state->frame[state->curframe + 1]) {
9991 		verbose(env, "verifier bug. Frame %d already allocated\n",
9992 			state->curframe + 1);
9993 		return -EFAULT;
9994 	}
9995 
9996 	caller = state->frame[state->curframe];
9997 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
9998 	if (!callee)
9999 		return -ENOMEM;
10000 	state->frame[state->curframe + 1] = callee;
10001 
10002 	/* callee cannot access r0, r6 - r9 for reading and has to write
10003 	 * into its own stack before reading from it.
10004 	 * callee can read/write into caller's stack
10005 	 */
10006 	init_func_state(env, callee,
10007 			/* remember the callsite, it will be used by bpf_exit */
10008 			callsite,
10009 			state->curframe + 1 /* frameno within this callchain */,
10010 			subprog /* subprog number within this prog */);
10011 	err = set_callee_state_cb(env, caller, callee, callsite);
10012 	if (err)
10013 		goto err_out;
10014 
10015 	/* only increment it after check_reg_arg() finished */
10016 	state->curframe++;
10017 
10018 	return 0;
10019 
10020 err_out:
10021 	free_func_state(callee);
10022 	state->frame[state->curframe + 1] = NULL;
10023 	return err;
10024 }
10025 
btf_check_func_arg_match(struct bpf_verifier_env * env,int subprog,const struct btf * btf,struct bpf_reg_state * regs)10026 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
10027 				    const struct btf *btf,
10028 				    struct bpf_reg_state *regs)
10029 {
10030 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
10031 	struct bpf_verifier_log *log = &env->log;
10032 	u32 i;
10033 	int ret;
10034 
10035 	ret = btf_prepare_func_args(env, subprog);
10036 	if (ret)
10037 		return ret;
10038 
10039 	/* check that BTF function arguments match actual types that the
10040 	 * verifier sees.
10041 	 */
10042 	for (i = 0; i < sub->arg_cnt; i++) {
10043 		u32 regno = i + 1;
10044 		struct bpf_reg_state *reg = &regs[regno];
10045 		struct bpf_subprog_arg_info *arg = &sub->args[i];
10046 
10047 		if (arg->arg_type == ARG_ANYTHING) {
10048 			if (reg->type != SCALAR_VALUE) {
10049 				bpf_log(log, "R%d is not a scalar\n", regno);
10050 				return -EINVAL;
10051 			}
10052 		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
10053 			ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
10054 			if (ret < 0)
10055 				return ret;
10056 			/* If function expects ctx type in BTF check that caller
10057 			 * is passing PTR_TO_CTX.
10058 			 */
10059 			if (reg->type != PTR_TO_CTX) {
10060 				bpf_log(log, "arg#%d expects pointer to ctx\n", i);
10061 				return -EINVAL;
10062 			}
10063 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
10064 			ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
10065 			if (ret < 0)
10066 				return ret;
10067 			if (check_mem_reg(env, reg, regno, arg->mem_size))
10068 				return -EINVAL;
10069 			if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
10070 				bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
10071 				return -EINVAL;
10072 			}
10073 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
10074 			/*
10075 			 * Can pass any value and the kernel won't crash, but
10076 			 * only PTR_TO_ARENA or SCALAR make sense. Everything
10077 			 * else is a bug in the bpf program. Point it out to
10078 			 * the user at the verification time instead of
10079 			 * run-time debug nightmare.
10080 			 */
10081 			if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
10082 				bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
10083 				return -EINVAL;
10084 			}
10085 		} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
10086 			ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR);
10087 			if (ret)
10088 				return ret;
10089 
10090 			ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
10091 			if (ret)
10092 				return ret;
10093 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
10094 			struct bpf_call_arg_meta meta;
10095 			int err;
10096 
10097 			if (register_is_null(reg) && type_may_be_null(arg->arg_type))
10098 				continue;
10099 
10100 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
10101 			err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta);
10102 			err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type);
10103 			if (err)
10104 				return err;
10105 		} else {
10106 			bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n",
10107 				i, arg->arg_type);
10108 			return -EFAULT;
10109 		}
10110 	}
10111 
10112 	return 0;
10113 }
10114 
10115 /* Compare BTF of a function call with given bpf_reg_state.
10116  * Returns:
10117  * EFAULT - there is a verifier bug. Abort verification.
10118  * EINVAL - there is a type mismatch or BTF is not available.
10119  * 0 - BTF matches with what bpf_reg_state expects.
10120  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
10121  */
btf_check_subprog_call(struct bpf_verifier_env * env,int subprog,struct bpf_reg_state * regs)10122 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
10123 				  struct bpf_reg_state *regs)
10124 {
10125 	struct bpf_prog *prog = env->prog;
10126 	struct btf *btf = prog->aux->btf;
10127 	u32 btf_id;
10128 	int err;
10129 
10130 	if (!prog->aux->func_info)
10131 		return -EINVAL;
10132 
10133 	btf_id = prog->aux->func_info[subprog].type_id;
10134 	if (!btf_id)
10135 		return -EFAULT;
10136 
10137 	if (prog->aux->func_info_aux[subprog].unreliable)
10138 		return -EINVAL;
10139 
10140 	err = btf_check_func_arg_match(env, subprog, btf, regs);
10141 	/* Compiler optimizations can remove arguments from static functions
10142 	 * or mismatched type can be passed into a global function.
10143 	 * In such cases mark the function as unreliable from BTF point of view.
10144 	 */
10145 	if (err)
10146 		prog->aux->func_info_aux[subprog].unreliable = true;
10147 	return err;
10148 }
10149 
push_callback_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int insn_idx,int subprog,set_callee_state_fn set_callee_state_cb)10150 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10151 			      int insn_idx, int subprog,
10152 			      set_callee_state_fn set_callee_state_cb)
10153 {
10154 	struct bpf_verifier_state *state = env->cur_state, *callback_state;
10155 	struct bpf_func_state *caller, *callee;
10156 	int err;
10157 
10158 	caller = state->frame[state->curframe];
10159 	err = btf_check_subprog_call(env, subprog, caller->regs);
10160 	if (err == -EFAULT)
10161 		return err;
10162 
10163 	/* set_callee_state is used for direct subprog calls, but we are
10164 	 * interested in validating only BPF helpers that can call subprogs as
10165 	 * callbacks
10166 	 */
10167 	env->subprog_info[subprog].is_cb = true;
10168 	if (bpf_pseudo_kfunc_call(insn) &&
10169 	    !is_callback_calling_kfunc(insn->imm)) {
10170 		verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
10171 			func_id_name(insn->imm), insn->imm);
10172 		return -EFAULT;
10173 	} else if (!bpf_pseudo_kfunc_call(insn) &&
10174 		   !is_callback_calling_function(insn->imm)) { /* helper */
10175 		verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
10176 			func_id_name(insn->imm), insn->imm);
10177 		return -EFAULT;
10178 	}
10179 
10180 	if (is_async_callback_calling_insn(insn)) {
10181 		struct bpf_verifier_state *async_cb;
10182 
10183 		/* there is no real recursion here. timer and workqueue callbacks are async */
10184 		env->subprog_info[subprog].is_async_cb = true;
10185 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
10186 					 insn_idx, subprog,
10187 					 is_bpf_wq_set_callback_impl_kfunc(insn->imm));
10188 		if (!async_cb)
10189 			return -EFAULT;
10190 		callee = async_cb->frame[0];
10191 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
10192 
10193 		/* Convert bpf_timer_set_callback() args into timer callback args */
10194 		err = set_callee_state_cb(env, caller, callee, insn_idx);
10195 		if (err)
10196 			return err;
10197 
10198 		return 0;
10199 	}
10200 
10201 	/* for callback functions enqueue entry to callback and
10202 	 * proceed with next instruction within current frame.
10203 	 */
10204 	callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
10205 	if (!callback_state)
10206 		return -ENOMEM;
10207 
10208 	err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
10209 			       callback_state);
10210 	if (err)
10211 		return err;
10212 
10213 	callback_state->callback_unroll_depth++;
10214 	callback_state->frame[callback_state->curframe - 1]->callback_depth++;
10215 	caller->callback_depth = 0;
10216 	return 0;
10217 }
10218 
check_func_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)10219 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10220 			   int *insn_idx)
10221 {
10222 	struct bpf_verifier_state *state = env->cur_state;
10223 	struct bpf_func_state *caller;
10224 	int err, subprog, target_insn;
10225 
10226 	target_insn = *insn_idx + insn->imm + 1;
10227 	subprog = find_subprog(env, target_insn);
10228 	if (subprog < 0) {
10229 		verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
10230 		return -EFAULT;
10231 	}
10232 
10233 	caller = state->frame[state->curframe];
10234 	err = btf_check_subprog_call(env, subprog, caller->regs);
10235 	if (err == -EFAULT)
10236 		return err;
10237 	if (subprog_is_global(env, subprog)) {
10238 		const char *sub_name = subprog_name(env, subprog);
10239 
10240 		/* Only global subprogs cannot be called with a lock held. */
10241 		if (env->cur_state->active_locks) {
10242 			verbose(env, "global function calls are not allowed while holding a lock,\n"
10243 				     "use static function instead\n");
10244 			return -EINVAL;
10245 		}
10246 
10247 		/* Only global subprogs cannot be called with preemption disabled. */
10248 		if (env->cur_state->active_preempt_locks) {
10249 			verbose(env, "global function calls are not allowed with preemption disabled,\n"
10250 				     "use static function instead\n");
10251 			return -EINVAL;
10252 		}
10253 
10254 		if (env->cur_state->active_irq_id) {
10255 			verbose(env, "global function calls are not allowed with IRQs disabled,\n"
10256 				     "use static function instead\n");
10257 			return -EINVAL;
10258 		}
10259 
10260 		if (err) {
10261 			verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
10262 				subprog, sub_name);
10263 			return err;
10264 		}
10265 
10266 		verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
10267 			subprog, sub_name);
10268 		if (env->subprog_info[subprog].changes_pkt_data)
10269 			clear_all_pkt_pointers(env);
10270 		/* mark global subprog for verifying after main prog */
10271 		subprog_aux(env, subprog)->called = true;
10272 		clear_caller_saved_regs(env, caller->regs);
10273 
10274 		/* All global functions return a 64-bit SCALAR_VALUE */
10275 		mark_reg_unknown(env, caller->regs, BPF_REG_0);
10276 		caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10277 
10278 		/* continue with next insn after call */
10279 		return 0;
10280 	}
10281 
10282 	/* for regular function entry setup new frame and continue
10283 	 * from that frame.
10284 	 */
10285 	err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
10286 	if (err)
10287 		return err;
10288 
10289 	clear_caller_saved_regs(env, caller->regs);
10290 
10291 	/* and go analyze first insn of the callee */
10292 	*insn_idx = env->subprog_info[subprog].start - 1;
10293 
10294 	if (env->log.level & BPF_LOG_LEVEL) {
10295 		verbose(env, "caller:\n");
10296 		print_verifier_state(env, state, caller->frameno, true);
10297 		verbose(env, "callee:\n");
10298 		print_verifier_state(env, state, state->curframe, true);
10299 	}
10300 
10301 	return 0;
10302 }
10303 
map_set_for_each_callback_args(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee)10304 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
10305 				   struct bpf_func_state *caller,
10306 				   struct bpf_func_state *callee)
10307 {
10308 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
10309 	 *      void *callback_ctx, u64 flags);
10310 	 * callback_fn(struct bpf_map *map, void *key, void *value,
10311 	 *      void *callback_ctx);
10312 	 */
10313 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
10314 
10315 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
10316 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
10317 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
10318 
10319 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
10320 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
10321 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
10322 
10323 	/* pointer to stack or null */
10324 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
10325 
10326 	/* unused */
10327 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10328 	return 0;
10329 }
10330 
set_callee_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10331 static int set_callee_state(struct bpf_verifier_env *env,
10332 			    struct bpf_func_state *caller,
10333 			    struct bpf_func_state *callee, int insn_idx)
10334 {
10335 	int i;
10336 
10337 	/* copy r1 - r5 args that callee can access.  The copy includes parent
10338 	 * pointers, which connects us up to the liveness chain
10339 	 */
10340 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
10341 		callee->regs[i] = caller->regs[i];
10342 	return 0;
10343 }
10344 
set_map_elem_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10345 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
10346 				       struct bpf_func_state *caller,
10347 				       struct bpf_func_state *callee,
10348 				       int insn_idx)
10349 {
10350 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
10351 	struct bpf_map *map;
10352 	int err;
10353 
10354 	/* valid map_ptr and poison value does not matter */
10355 	map = insn_aux->map_ptr_state.map_ptr;
10356 	if (!map->ops->map_set_for_each_callback_args ||
10357 	    !map->ops->map_for_each_callback) {
10358 		verbose(env, "callback function not allowed for map\n");
10359 		return -ENOTSUPP;
10360 	}
10361 
10362 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
10363 	if (err)
10364 		return err;
10365 
10366 	callee->in_callback_fn = true;
10367 	callee->callback_ret_range = retval_range(0, 1);
10368 	return 0;
10369 }
10370 
set_loop_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10371 static int set_loop_callback_state(struct bpf_verifier_env *env,
10372 				   struct bpf_func_state *caller,
10373 				   struct bpf_func_state *callee,
10374 				   int insn_idx)
10375 {
10376 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
10377 	 *	    u64 flags);
10378 	 * callback_fn(u64 index, void *callback_ctx);
10379 	 */
10380 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
10381 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
10382 
10383 	/* unused */
10384 	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
10385 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10386 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10387 
10388 	callee->in_callback_fn = true;
10389 	callee->callback_ret_range = retval_range(0, 1);
10390 	return 0;
10391 }
10392 
set_timer_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10393 static int set_timer_callback_state(struct bpf_verifier_env *env,
10394 				    struct bpf_func_state *caller,
10395 				    struct bpf_func_state *callee,
10396 				    int insn_idx)
10397 {
10398 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
10399 
10400 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
10401 	 * callback_fn(struct bpf_map *map, void *key, void *value);
10402 	 */
10403 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
10404 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
10405 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
10406 
10407 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
10408 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
10409 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
10410 
10411 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
10412 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
10413 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
10414 
10415 	/* unused */
10416 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10417 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10418 	callee->in_async_callback_fn = true;
10419 	callee->callback_ret_range = retval_range(0, 1);
10420 	return 0;
10421 }
10422 
set_find_vma_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10423 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
10424 				       struct bpf_func_state *caller,
10425 				       struct bpf_func_state *callee,
10426 				       int insn_idx)
10427 {
10428 	/* bpf_find_vma(struct task_struct *task, u64 addr,
10429 	 *               void *callback_fn, void *callback_ctx, u64 flags)
10430 	 * (callback_fn)(struct task_struct *task,
10431 	 *               struct vm_area_struct *vma, void *callback_ctx);
10432 	 */
10433 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
10434 
10435 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
10436 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
10437 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
10438 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
10439 
10440 	/* pointer to stack or null */
10441 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
10442 
10443 	/* unused */
10444 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10445 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10446 	callee->in_callback_fn = true;
10447 	callee->callback_ret_range = retval_range(0, 1);
10448 	return 0;
10449 }
10450 
set_user_ringbuf_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10451 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
10452 					   struct bpf_func_state *caller,
10453 					   struct bpf_func_state *callee,
10454 					   int insn_idx)
10455 {
10456 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
10457 	 *			  callback_ctx, u64 flags);
10458 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
10459 	 */
10460 	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
10461 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
10462 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
10463 
10464 	/* unused */
10465 	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
10466 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10467 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10468 
10469 	callee->in_callback_fn = true;
10470 	callee->callback_ret_range = retval_range(0, 1);
10471 	return 0;
10472 }
10473 
set_rbtree_add_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10474 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
10475 					 struct bpf_func_state *caller,
10476 					 struct bpf_func_state *callee,
10477 					 int insn_idx)
10478 {
10479 	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
10480 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
10481 	 *
10482 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
10483 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
10484 	 * by this point, so look at 'root'
10485 	 */
10486 	struct btf_field *field;
10487 
10488 	field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
10489 				      BPF_RB_ROOT);
10490 	if (!field || !field->graph_root.value_btf_id)
10491 		return -EFAULT;
10492 
10493 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
10494 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
10495 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
10496 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
10497 
10498 	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
10499 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10500 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10501 	callee->in_callback_fn = true;
10502 	callee->callback_ret_range = retval_range(0, 1);
10503 	return 0;
10504 }
10505 
10506 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
10507 
10508 /* Are we currently verifying the callback for a rbtree helper that must
10509  * be called with lock held? If so, no need to complain about unreleased
10510  * lock
10511  */
in_rbtree_lock_required_cb(struct bpf_verifier_env * env)10512 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
10513 {
10514 	struct bpf_verifier_state *state = env->cur_state;
10515 	struct bpf_insn *insn = env->prog->insnsi;
10516 	struct bpf_func_state *callee;
10517 	int kfunc_btf_id;
10518 
10519 	if (!state->curframe)
10520 		return false;
10521 
10522 	callee = state->frame[state->curframe];
10523 
10524 	if (!callee->in_callback_fn)
10525 		return false;
10526 
10527 	kfunc_btf_id = insn[callee->callsite].imm;
10528 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
10529 }
10530 
retval_range_within(struct bpf_retval_range range,const struct bpf_reg_state * reg,bool return_32bit)10531 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
10532 				bool return_32bit)
10533 {
10534 	if (return_32bit)
10535 		return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
10536 	else
10537 		return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
10538 }
10539 
prepare_func_exit(struct bpf_verifier_env * env,int * insn_idx)10540 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
10541 {
10542 	struct bpf_verifier_state *state = env->cur_state, *prev_st;
10543 	struct bpf_func_state *caller, *callee;
10544 	struct bpf_reg_state *r0;
10545 	bool in_callback_fn;
10546 	int err;
10547 
10548 	callee = state->frame[state->curframe];
10549 	r0 = &callee->regs[BPF_REG_0];
10550 	if (r0->type == PTR_TO_STACK) {
10551 		/* technically it's ok to return caller's stack pointer
10552 		 * (or caller's caller's pointer) back to the caller,
10553 		 * since these pointers are valid. Only current stack
10554 		 * pointer will be invalid as soon as function exits,
10555 		 * but let's be conservative
10556 		 */
10557 		verbose(env, "cannot return stack pointer to the caller\n");
10558 		return -EINVAL;
10559 	}
10560 
10561 	caller = state->frame[state->curframe - 1];
10562 	if (callee->in_callback_fn) {
10563 		if (r0->type != SCALAR_VALUE) {
10564 			verbose(env, "R0 not a scalar value\n");
10565 			return -EACCES;
10566 		}
10567 
10568 		/* we are going to rely on register's precise value */
10569 		err = mark_reg_read(env, r0, r0->parent, REG_LIVE_READ64);
10570 		err = err ?: mark_chain_precision(env, BPF_REG_0);
10571 		if (err)
10572 			return err;
10573 
10574 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
10575 		if (!retval_range_within(callee->callback_ret_range, r0, false)) {
10576 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
10577 					       "At callback return", "R0");
10578 			return -EINVAL;
10579 		}
10580 		if (!calls_callback(env, callee->callsite)) {
10581 			verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
10582 				*insn_idx, callee->callsite);
10583 			return -EFAULT;
10584 		}
10585 	} else {
10586 		/* return to the caller whatever r0 had in the callee */
10587 		caller->regs[BPF_REG_0] = *r0;
10588 	}
10589 
10590 	/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
10591 	 * there function call logic would reschedule callback visit. If iteration
10592 	 * converges is_state_visited() would prune that visit eventually.
10593 	 */
10594 	in_callback_fn = callee->in_callback_fn;
10595 	if (in_callback_fn)
10596 		*insn_idx = callee->callsite;
10597 	else
10598 		*insn_idx = callee->callsite + 1;
10599 
10600 	if (env->log.level & BPF_LOG_LEVEL) {
10601 		verbose(env, "returning from callee:\n");
10602 		print_verifier_state(env, state, callee->frameno, true);
10603 		verbose(env, "to caller at %d:\n", *insn_idx);
10604 		print_verifier_state(env, state, caller->frameno, true);
10605 	}
10606 	/* clear everything in the callee. In case of exceptional exits using
10607 	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
10608 	free_func_state(callee);
10609 	state->frame[state->curframe--] = NULL;
10610 
10611 	/* for callbacks widen imprecise scalars to make programs like below verify:
10612 	 *
10613 	 *   struct ctx { int i; }
10614 	 *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
10615 	 *   ...
10616 	 *   struct ctx = { .i = 0; }
10617 	 *   bpf_loop(100, cb, &ctx, 0);
10618 	 *
10619 	 * This is similar to what is done in process_iter_next_call() for open
10620 	 * coded iterators.
10621 	 */
10622 	prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
10623 	if (prev_st) {
10624 		err = widen_imprecise_scalars(env, prev_st, state);
10625 		if (err)
10626 			return err;
10627 	}
10628 	return 0;
10629 }
10630 
do_refine_retval_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int ret_type,int func_id,struct bpf_call_arg_meta * meta)10631 static int do_refine_retval_range(struct bpf_verifier_env *env,
10632 				  struct bpf_reg_state *regs, int ret_type,
10633 				  int func_id,
10634 				  struct bpf_call_arg_meta *meta)
10635 {
10636 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
10637 
10638 	if (ret_type != RET_INTEGER)
10639 		return 0;
10640 
10641 	switch (func_id) {
10642 	case BPF_FUNC_get_stack:
10643 	case BPF_FUNC_get_task_stack:
10644 	case BPF_FUNC_probe_read_str:
10645 	case BPF_FUNC_probe_read_kernel_str:
10646 	case BPF_FUNC_probe_read_user_str:
10647 		ret_reg->smax_value = meta->msize_max_value;
10648 		ret_reg->s32_max_value = meta->msize_max_value;
10649 		ret_reg->smin_value = -MAX_ERRNO;
10650 		ret_reg->s32_min_value = -MAX_ERRNO;
10651 		reg_bounds_sync(ret_reg);
10652 		break;
10653 	case BPF_FUNC_get_smp_processor_id:
10654 		ret_reg->umax_value = nr_cpu_ids - 1;
10655 		ret_reg->u32_max_value = nr_cpu_ids - 1;
10656 		ret_reg->smax_value = nr_cpu_ids - 1;
10657 		ret_reg->s32_max_value = nr_cpu_ids - 1;
10658 		ret_reg->umin_value = 0;
10659 		ret_reg->u32_min_value = 0;
10660 		ret_reg->smin_value = 0;
10661 		ret_reg->s32_min_value = 0;
10662 		reg_bounds_sync(ret_reg);
10663 		break;
10664 	}
10665 
10666 	return reg_bounds_sanity_check(env, ret_reg, "retval");
10667 }
10668 
10669 static int
record_func_map(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)10670 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
10671 		int func_id, int insn_idx)
10672 {
10673 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10674 	struct bpf_map *map = meta->map_ptr;
10675 
10676 	if (func_id != BPF_FUNC_tail_call &&
10677 	    func_id != BPF_FUNC_map_lookup_elem &&
10678 	    func_id != BPF_FUNC_map_update_elem &&
10679 	    func_id != BPF_FUNC_map_delete_elem &&
10680 	    func_id != BPF_FUNC_map_push_elem &&
10681 	    func_id != BPF_FUNC_map_pop_elem &&
10682 	    func_id != BPF_FUNC_map_peek_elem &&
10683 	    func_id != BPF_FUNC_for_each_map_elem &&
10684 	    func_id != BPF_FUNC_redirect_map &&
10685 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
10686 		return 0;
10687 
10688 	if (map == NULL) {
10689 		verbose(env, "kernel subsystem misconfigured verifier\n");
10690 		return -EINVAL;
10691 	}
10692 
10693 	/* In case of read-only, some additional restrictions
10694 	 * need to be applied in order to prevent altering the
10695 	 * state of the map from program side.
10696 	 */
10697 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
10698 	    (func_id == BPF_FUNC_map_delete_elem ||
10699 	     func_id == BPF_FUNC_map_update_elem ||
10700 	     func_id == BPF_FUNC_map_push_elem ||
10701 	     func_id == BPF_FUNC_map_pop_elem)) {
10702 		verbose(env, "write into map forbidden\n");
10703 		return -EACCES;
10704 	}
10705 
10706 	if (!aux->map_ptr_state.map_ptr)
10707 		bpf_map_ptr_store(aux, meta->map_ptr,
10708 				  !meta->map_ptr->bypass_spec_v1, false);
10709 	else if (aux->map_ptr_state.map_ptr != meta->map_ptr)
10710 		bpf_map_ptr_store(aux, meta->map_ptr,
10711 				  !meta->map_ptr->bypass_spec_v1, true);
10712 	return 0;
10713 }
10714 
10715 static int
record_func_key(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)10716 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
10717 		int func_id, int insn_idx)
10718 {
10719 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10720 	struct bpf_reg_state *regs = cur_regs(env), *reg;
10721 	struct bpf_map *map = meta->map_ptr;
10722 	u64 val, max;
10723 	int err;
10724 
10725 	if (func_id != BPF_FUNC_tail_call)
10726 		return 0;
10727 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
10728 		verbose(env, "kernel subsystem misconfigured verifier\n");
10729 		return -EINVAL;
10730 	}
10731 
10732 	reg = &regs[BPF_REG_3];
10733 	val = reg->var_off.value;
10734 	max = map->max_entries;
10735 
10736 	if (!(is_reg_const(reg, false) && val < max)) {
10737 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10738 		return 0;
10739 	}
10740 
10741 	err = mark_chain_precision(env, BPF_REG_3);
10742 	if (err)
10743 		return err;
10744 	if (bpf_map_key_unseen(aux))
10745 		bpf_map_key_store(aux, val);
10746 	else if (!bpf_map_key_poisoned(aux) &&
10747 		  bpf_map_key_immediate(aux) != val)
10748 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10749 	return 0;
10750 }
10751 
check_reference_leak(struct bpf_verifier_env * env,bool exception_exit)10752 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
10753 {
10754 	struct bpf_verifier_state *state = env->cur_state;
10755 	bool refs_lingering = false;
10756 	int i;
10757 
10758 	if (!exception_exit && cur_func(env)->frameno)
10759 		return 0;
10760 
10761 	for (i = 0; i < state->acquired_refs; i++) {
10762 		if (state->refs[i].type != REF_TYPE_PTR)
10763 			continue;
10764 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
10765 			state->refs[i].id, state->refs[i].insn_idx);
10766 		refs_lingering = true;
10767 	}
10768 	return refs_lingering ? -EINVAL : 0;
10769 }
10770 
check_resource_leak(struct bpf_verifier_env * env,bool exception_exit,bool check_lock,const char * prefix)10771 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
10772 {
10773 	int err;
10774 
10775 	if (check_lock && env->cur_state->active_locks) {
10776 		verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
10777 		return -EINVAL;
10778 	}
10779 
10780 	err = check_reference_leak(env, exception_exit);
10781 	if (err) {
10782 		verbose(env, "%s would lead to reference leak\n", prefix);
10783 		return err;
10784 	}
10785 
10786 	if (check_lock && env->cur_state->active_irq_id) {
10787 		verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
10788 		return -EINVAL;
10789 	}
10790 
10791 	if (check_lock && env->cur_state->active_rcu_lock) {
10792 		verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
10793 		return -EINVAL;
10794 	}
10795 
10796 	if (check_lock && env->cur_state->active_preempt_locks) {
10797 		verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
10798 		return -EINVAL;
10799 	}
10800 
10801 	return 0;
10802 }
10803 
check_bpf_snprintf_call(struct bpf_verifier_env * env,struct bpf_reg_state * regs)10804 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
10805 				   struct bpf_reg_state *regs)
10806 {
10807 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
10808 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
10809 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
10810 	struct bpf_bprintf_data data = {};
10811 	int err, fmt_map_off, num_args;
10812 	u64 fmt_addr;
10813 	char *fmt;
10814 
10815 	/* data must be an array of u64 */
10816 	if (data_len_reg->var_off.value % 8)
10817 		return -EINVAL;
10818 	num_args = data_len_reg->var_off.value / 8;
10819 
10820 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
10821 	 * and map_direct_value_addr is set.
10822 	 */
10823 	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
10824 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
10825 						  fmt_map_off);
10826 	if (err) {
10827 		verbose(env, "verifier bug\n");
10828 		return -EFAULT;
10829 	}
10830 	fmt = (char *)(long)fmt_addr + fmt_map_off;
10831 
10832 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10833 	 * can focus on validating the format specifiers.
10834 	 */
10835 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
10836 	if (err < 0)
10837 		verbose(env, "Invalid format string\n");
10838 
10839 	return err;
10840 }
10841 
check_get_func_ip(struct bpf_verifier_env * env)10842 static int check_get_func_ip(struct bpf_verifier_env *env)
10843 {
10844 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10845 	int func_id = BPF_FUNC_get_func_ip;
10846 
10847 	if (type == BPF_PROG_TYPE_TRACING) {
10848 		if (!bpf_prog_has_trampoline(env->prog)) {
10849 			verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
10850 				func_id_name(func_id), func_id);
10851 			return -ENOTSUPP;
10852 		}
10853 		return 0;
10854 	} else if (type == BPF_PROG_TYPE_KPROBE) {
10855 		return 0;
10856 	}
10857 
10858 	verbose(env, "func %s#%d not supported for program type %d\n",
10859 		func_id_name(func_id), func_id, type);
10860 	return -ENOTSUPP;
10861 }
10862 
cur_aux(struct bpf_verifier_env * env)10863 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
10864 {
10865 	return &env->insn_aux_data[env->insn_idx];
10866 }
10867 
loop_flag_is_zero(struct bpf_verifier_env * env)10868 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10869 {
10870 	struct bpf_reg_state *regs = cur_regs(env);
10871 	struct bpf_reg_state *reg = &regs[BPF_REG_4];
10872 	bool reg_is_null = register_is_null(reg);
10873 
10874 	if (reg_is_null)
10875 		mark_chain_precision(env, BPF_REG_4);
10876 
10877 	return reg_is_null;
10878 }
10879 
update_loop_inline_state(struct bpf_verifier_env * env,u32 subprogno)10880 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10881 {
10882 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10883 
10884 	if (!state->initialized) {
10885 		state->initialized = 1;
10886 		state->fit_for_inline = loop_flag_is_zero(env);
10887 		state->callback_subprogno = subprogno;
10888 		return;
10889 	}
10890 
10891 	if (!state->fit_for_inline)
10892 		return;
10893 
10894 	state->fit_for_inline = (loop_flag_is_zero(env) &&
10895 				 state->callback_subprogno == subprogno);
10896 }
10897 
10898 /* Returns whether or not the given map type can potentially elide
10899  * lookup return value nullness check. This is possible if the key
10900  * is statically known.
10901  */
can_elide_value_nullness(enum bpf_map_type type)10902 static bool can_elide_value_nullness(enum bpf_map_type type)
10903 {
10904 	switch (type) {
10905 	case BPF_MAP_TYPE_ARRAY:
10906 	case BPF_MAP_TYPE_PERCPU_ARRAY:
10907 		return true;
10908 	default:
10909 		return false;
10910 	}
10911 }
10912 
get_helper_proto(struct bpf_verifier_env * env,int func_id,const struct bpf_func_proto ** ptr)10913 static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
10914 			    const struct bpf_func_proto **ptr)
10915 {
10916 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
10917 		return -ERANGE;
10918 
10919 	if (!env->ops->get_func_proto)
10920 		return -EINVAL;
10921 
10922 	*ptr = env->ops->get_func_proto(func_id, env->prog);
10923 	return *ptr ? 0 : -EINVAL;
10924 }
10925 
check_helper_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)10926 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10927 			     int *insn_idx_p)
10928 {
10929 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10930 	bool returns_cpu_specific_alloc_ptr = false;
10931 	const struct bpf_func_proto *fn = NULL;
10932 	enum bpf_return_type ret_type;
10933 	enum bpf_type_flag ret_flag;
10934 	struct bpf_reg_state *regs;
10935 	struct bpf_call_arg_meta meta;
10936 	int insn_idx = *insn_idx_p;
10937 	bool changes_data;
10938 	int i, err, func_id;
10939 
10940 	/* find function prototype */
10941 	func_id = insn->imm;
10942 	err = get_helper_proto(env, insn->imm, &fn);
10943 	if (err == -ERANGE) {
10944 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
10945 		return -EINVAL;
10946 	}
10947 
10948 	if (err) {
10949 		verbose(env, "program of this type cannot use helper %s#%d\n",
10950 			func_id_name(func_id), func_id);
10951 		return err;
10952 	}
10953 
10954 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
10955 	if (!env->prog->gpl_compatible && fn->gpl_only) {
10956 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10957 		return -EINVAL;
10958 	}
10959 
10960 	if (fn->allowed && !fn->allowed(env->prog)) {
10961 		verbose(env, "helper call is not allowed in probe\n");
10962 		return -EINVAL;
10963 	}
10964 
10965 	if (!in_sleepable(env) && fn->might_sleep) {
10966 		verbose(env, "helper call might sleep in a non-sleepable prog\n");
10967 		return -EINVAL;
10968 	}
10969 
10970 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
10971 	changes_data = bpf_helper_changes_pkt_data(func_id);
10972 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10973 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
10974 			func_id_name(func_id), func_id);
10975 		return -EINVAL;
10976 	}
10977 
10978 	memset(&meta, 0, sizeof(meta));
10979 	meta.pkt_access = fn->pkt_access;
10980 
10981 	err = check_func_proto(fn, func_id);
10982 	if (err) {
10983 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
10984 			func_id_name(func_id), func_id);
10985 		return err;
10986 	}
10987 
10988 	if (env->cur_state->active_rcu_lock) {
10989 		if (fn->might_sleep) {
10990 			verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
10991 				func_id_name(func_id), func_id);
10992 			return -EINVAL;
10993 		}
10994 
10995 		if (in_sleepable(env) && is_storage_get_function(func_id))
10996 			env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
10997 	}
10998 
10999 	if (env->cur_state->active_preempt_locks) {
11000 		if (fn->might_sleep) {
11001 			verbose(env, "sleepable helper %s#%d in non-preemptible region\n",
11002 				func_id_name(func_id), func_id);
11003 			return -EINVAL;
11004 		}
11005 
11006 		if (in_sleepable(env) && is_storage_get_function(func_id))
11007 			env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
11008 	}
11009 
11010 	if (env->cur_state->active_irq_id) {
11011 		if (fn->might_sleep) {
11012 			verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n",
11013 				func_id_name(func_id), func_id);
11014 			return -EINVAL;
11015 		}
11016 
11017 		if (in_sleepable(env) && is_storage_get_function(func_id))
11018 			env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
11019 	}
11020 
11021 	meta.func_id = func_id;
11022 	/* check args */
11023 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
11024 		err = check_func_arg(env, i, &meta, fn, insn_idx);
11025 		if (err)
11026 			return err;
11027 	}
11028 
11029 	err = record_func_map(env, &meta, func_id, insn_idx);
11030 	if (err)
11031 		return err;
11032 
11033 	err = record_func_key(env, &meta, func_id, insn_idx);
11034 	if (err)
11035 		return err;
11036 
11037 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
11038 	 * is inferred from register state.
11039 	 */
11040 	for (i = 0; i < meta.access_size; i++) {
11041 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
11042 				       BPF_WRITE, -1, false, false);
11043 		if (err)
11044 			return err;
11045 	}
11046 
11047 	regs = cur_regs(env);
11048 
11049 	if (meta.release_regno) {
11050 		err = -EINVAL;
11051 		/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
11052 		 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
11053 		 * is safe to do directly.
11054 		 */
11055 		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
11056 			if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
11057 				verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
11058 				return -EFAULT;
11059 			}
11060 			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
11061 		} else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
11062 			u32 ref_obj_id = meta.ref_obj_id;
11063 			bool in_rcu = in_rcu_cs(env);
11064 			struct bpf_func_state *state;
11065 			struct bpf_reg_state *reg;
11066 
11067 			err = release_reference_nomark(env->cur_state, ref_obj_id);
11068 			if (!err) {
11069 				bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
11070 					if (reg->ref_obj_id == ref_obj_id) {
11071 						if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
11072 							reg->ref_obj_id = 0;
11073 							reg->type &= ~MEM_ALLOC;
11074 							reg->type |= MEM_RCU;
11075 						} else {
11076 							mark_reg_invalid(env, reg);
11077 						}
11078 					}
11079 				}));
11080 			}
11081 		} else if (meta.ref_obj_id) {
11082 			err = release_reference(env, meta.ref_obj_id);
11083 		} else if (register_is_null(&regs[meta.release_regno])) {
11084 			/* meta.ref_obj_id can only be 0 if register that is meant to be
11085 			 * released is NULL, which must be > R0.
11086 			 */
11087 			err = 0;
11088 		}
11089 		if (err) {
11090 			verbose(env, "func %s#%d reference has not been acquired before\n",
11091 				func_id_name(func_id), func_id);
11092 			return err;
11093 		}
11094 	}
11095 
11096 	switch (func_id) {
11097 	case BPF_FUNC_tail_call:
11098 		err = check_resource_leak(env, false, true, "tail_call");
11099 		if (err)
11100 			return err;
11101 		break;
11102 	case BPF_FUNC_get_local_storage:
11103 		/* check that flags argument in get_local_storage(map, flags) is 0,
11104 		 * this is required because get_local_storage() can't return an error.
11105 		 */
11106 		if (!register_is_null(&regs[BPF_REG_2])) {
11107 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
11108 			return -EINVAL;
11109 		}
11110 		break;
11111 	case BPF_FUNC_for_each_map_elem:
11112 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11113 					 set_map_elem_callback_state);
11114 		break;
11115 	case BPF_FUNC_timer_set_callback:
11116 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11117 					 set_timer_callback_state);
11118 		break;
11119 	case BPF_FUNC_find_vma:
11120 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11121 					 set_find_vma_callback_state);
11122 		break;
11123 	case BPF_FUNC_snprintf:
11124 		err = check_bpf_snprintf_call(env, regs);
11125 		break;
11126 	case BPF_FUNC_loop:
11127 		update_loop_inline_state(env, meta.subprogno);
11128 		/* Verifier relies on R1 value to determine if bpf_loop() iteration
11129 		 * is finished, thus mark it precise.
11130 		 */
11131 		err = mark_chain_precision(env, BPF_REG_1);
11132 		if (err)
11133 			return err;
11134 		if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
11135 			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11136 						 set_loop_callback_state);
11137 		} else {
11138 			cur_func(env)->callback_depth = 0;
11139 			if (env->log.level & BPF_LOG_LEVEL2)
11140 				verbose(env, "frame%d bpf_loop iteration limit reached\n",
11141 					env->cur_state->curframe);
11142 		}
11143 		break;
11144 	case BPF_FUNC_dynptr_from_mem:
11145 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
11146 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
11147 				reg_type_str(env, regs[BPF_REG_1].type));
11148 			return -EACCES;
11149 		}
11150 		break;
11151 	case BPF_FUNC_set_retval:
11152 		if (prog_type == BPF_PROG_TYPE_LSM &&
11153 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
11154 			if (!env->prog->aux->attach_func_proto->type) {
11155 				/* Make sure programs that attach to void
11156 				 * hooks don't try to modify return value.
11157 				 */
11158 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
11159 				return -EINVAL;
11160 			}
11161 		}
11162 		break;
11163 	case BPF_FUNC_dynptr_data:
11164 	{
11165 		struct bpf_reg_state *reg;
11166 		int id, ref_obj_id;
11167 
11168 		reg = get_dynptr_arg_reg(env, fn, regs);
11169 		if (!reg)
11170 			return -EFAULT;
11171 
11172 
11173 		if (meta.dynptr_id) {
11174 			verbose(env, "verifier internal error: meta.dynptr_id already set\n");
11175 			return -EFAULT;
11176 		}
11177 		if (meta.ref_obj_id) {
11178 			verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
11179 			return -EFAULT;
11180 		}
11181 
11182 		id = dynptr_id(env, reg);
11183 		if (id < 0) {
11184 			verbose(env, "verifier internal error: failed to obtain dynptr id\n");
11185 			return id;
11186 		}
11187 
11188 		ref_obj_id = dynptr_ref_obj_id(env, reg);
11189 		if (ref_obj_id < 0) {
11190 			verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
11191 			return ref_obj_id;
11192 		}
11193 
11194 		meta.dynptr_id = id;
11195 		meta.ref_obj_id = ref_obj_id;
11196 
11197 		break;
11198 	}
11199 	case BPF_FUNC_dynptr_write:
11200 	{
11201 		enum bpf_dynptr_type dynptr_type;
11202 		struct bpf_reg_state *reg;
11203 
11204 		reg = get_dynptr_arg_reg(env, fn, regs);
11205 		if (!reg)
11206 			return -EFAULT;
11207 
11208 		dynptr_type = dynptr_get_type(env, reg);
11209 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
11210 			return -EFAULT;
11211 
11212 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
11213 			/* this will trigger clear_all_pkt_pointers(), which will
11214 			 * invalidate all dynptr slices associated with the skb
11215 			 */
11216 			changes_data = true;
11217 
11218 		break;
11219 	}
11220 	case BPF_FUNC_per_cpu_ptr:
11221 	case BPF_FUNC_this_cpu_ptr:
11222 	{
11223 		struct bpf_reg_state *reg = &regs[BPF_REG_1];
11224 		const struct btf_type *type;
11225 
11226 		if (reg->type & MEM_RCU) {
11227 			type = btf_type_by_id(reg->btf, reg->btf_id);
11228 			if (!type || !btf_type_is_struct(type)) {
11229 				verbose(env, "Helper has invalid btf/btf_id in R1\n");
11230 				return -EFAULT;
11231 			}
11232 			returns_cpu_specific_alloc_ptr = true;
11233 			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
11234 		}
11235 		break;
11236 	}
11237 	case BPF_FUNC_user_ringbuf_drain:
11238 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11239 					 set_user_ringbuf_callback_state);
11240 		break;
11241 	}
11242 
11243 	if (err)
11244 		return err;
11245 
11246 	/* reset caller saved regs */
11247 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
11248 		mark_reg_not_init(env, regs, caller_saved[i]);
11249 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
11250 	}
11251 
11252 	/* helper call returns 64-bit value. */
11253 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
11254 
11255 	/* update return register (already marked as written above) */
11256 	ret_type = fn->ret_type;
11257 	ret_flag = type_flag(ret_type);
11258 
11259 	switch (base_type(ret_type)) {
11260 	case RET_INTEGER:
11261 		/* sets type to SCALAR_VALUE */
11262 		mark_reg_unknown(env, regs, BPF_REG_0);
11263 		break;
11264 	case RET_VOID:
11265 		regs[BPF_REG_0].type = NOT_INIT;
11266 		break;
11267 	case RET_PTR_TO_MAP_VALUE:
11268 		/* There is no offset yet applied, variable or fixed */
11269 		mark_reg_known_zero(env, regs, BPF_REG_0);
11270 		/* remember map_ptr, so that check_map_access()
11271 		 * can check 'value_size' boundary of memory access
11272 		 * to map element returned from bpf_map_lookup_elem()
11273 		 */
11274 		if (meta.map_ptr == NULL) {
11275 			verbose(env,
11276 				"kernel subsystem misconfigured verifier\n");
11277 			return -EINVAL;
11278 		}
11279 
11280 		if (func_id == BPF_FUNC_map_lookup_elem &&
11281 		    can_elide_value_nullness(meta.map_ptr->map_type) &&
11282 		    meta.const_map_key >= 0 &&
11283 		    meta.const_map_key < meta.map_ptr->max_entries)
11284 			ret_flag &= ~PTR_MAYBE_NULL;
11285 
11286 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
11287 		regs[BPF_REG_0].map_uid = meta.map_uid;
11288 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
11289 		if (!type_may_be_null(ret_flag) &&
11290 		    btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
11291 			regs[BPF_REG_0].id = ++env->id_gen;
11292 		}
11293 		break;
11294 	case RET_PTR_TO_SOCKET:
11295 		mark_reg_known_zero(env, regs, BPF_REG_0);
11296 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
11297 		break;
11298 	case RET_PTR_TO_SOCK_COMMON:
11299 		mark_reg_known_zero(env, regs, BPF_REG_0);
11300 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
11301 		break;
11302 	case RET_PTR_TO_TCP_SOCK:
11303 		mark_reg_known_zero(env, regs, BPF_REG_0);
11304 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
11305 		break;
11306 	case RET_PTR_TO_MEM:
11307 		mark_reg_known_zero(env, regs, BPF_REG_0);
11308 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
11309 		regs[BPF_REG_0].mem_size = meta.mem_size;
11310 		break;
11311 	case RET_PTR_TO_MEM_OR_BTF_ID:
11312 	{
11313 		const struct btf_type *t;
11314 
11315 		mark_reg_known_zero(env, regs, BPF_REG_0);
11316 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
11317 		if (!btf_type_is_struct(t)) {
11318 			u32 tsize;
11319 			const struct btf_type *ret;
11320 			const char *tname;
11321 
11322 			/* resolve the type size of ksym. */
11323 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
11324 			if (IS_ERR(ret)) {
11325 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
11326 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
11327 					tname, PTR_ERR(ret));
11328 				return -EINVAL;
11329 			}
11330 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
11331 			regs[BPF_REG_0].mem_size = tsize;
11332 		} else {
11333 			if (returns_cpu_specific_alloc_ptr) {
11334 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
11335 			} else {
11336 				/* MEM_RDONLY may be carried from ret_flag, but it
11337 				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
11338 				 * it will confuse the check of PTR_TO_BTF_ID in
11339 				 * check_mem_access().
11340 				 */
11341 				ret_flag &= ~MEM_RDONLY;
11342 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
11343 			}
11344 
11345 			regs[BPF_REG_0].btf = meta.ret_btf;
11346 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
11347 		}
11348 		break;
11349 	}
11350 	case RET_PTR_TO_BTF_ID:
11351 	{
11352 		struct btf *ret_btf;
11353 		int ret_btf_id;
11354 
11355 		mark_reg_known_zero(env, regs, BPF_REG_0);
11356 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
11357 		if (func_id == BPF_FUNC_kptr_xchg) {
11358 			ret_btf = meta.kptr_field->kptr.btf;
11359 			ret_btf_id = meta.kptr_field->kptr.btf_id;
11360 			if (!btf_is_kernel(ret_btf)) {
11361 				regs[BPF_REG_0].type |= MEM_ALLOC;
11362 				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
11363 					regs[BPF_REG_0].type |= MEM_PERCPU;
11364 			}
11365 		} else {
11366 			if (fn->ret_btf_id == BPF_PTR_POISON) {
11367 				verbose(env, "verifier internal error:");
11368 				verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
11369 					func_id_name(func_id));
11370 				return -EINVAL;
11371 			}
11372 			ret_btf = btf_vmlinux;
11373 			ret_btf_id = *fn->ret_btf_id;
11374 		}
11375 		if (ret_btf_id == 0) {
11376 			verbose(env, "invalid return type %u of func %s#%d\n",
11377 				base_type(ret_type), func_id_name(func_id),
11378 				func_id);
11379 			return -EINVAL;
11380 		}
11381 		regs[BPF_REG_0].btf = ret_btf;
11382 		regs[BPF_REG_0].btf_id = ret_btf_id;
11383 		break;
11384 	}
11385 	default:
11386 		verbose(env, "unknown return type %u of func %s#%d\n",
11387 			base_type(ret_type), func_id_name(func_id), func_id);
11388 		return -EINVAL;
11389 	}
11390 
11391 	if (type_may_be_null(regs[BPF_REG_0].type))
11392 		regs[BPF_REG_0].id = ++env->id_gen;
11393 
11394 	if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
11395 		verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
11396 			func_id_name(func_id), func_id);
11397 		return -EFAULT;
11398 	}
11399 
11400 	if (is_dynptr_ref_function(func_id))
11401 		regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
11402 
11403 	if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
11404 		/* For release_reference() */
11405 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
11406 	} else if (is_acquire_function(func_id, meta.map_ptr)) {
11407 		int id = acquire_reference(env, insn_idx);
11408 
11409 		if (id < 0)
11410 			return id;
11411 		/* For mark_ptr_or_null_reg() */
11412 		regs[BPF_REG_0].id = id;
11413 		/* For release_reference() */
11414 		regs[BPF_REG_0].ref_obj_id = id;
11415 	}
11416 
11417 	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
11418 	if (err)
11419 		return err;
11420 
11421 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
11422 	if (err)
11423 		return err;
11424 
11425 	if ((func_id == BPF_FUNC_get_stack ||
11426 	     func_id == BPF_FUNC_get_task_stack) &&
11427 	    !env->prog->has_callchain_buf) {
11428 		const char *err_str;
11429 
11430 #ifdef CONFIG_PERF_EVENTS
11431 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
11432 		err_str = "cannot get callchain buffer for func %s#%d\n";
11433 #else
11434 		err = -ENOTSUPP;
11435 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
11436 #endif
11437 		if (err) {
11438 			verbose(env, err_str, func_id_name(func_id), func_id);
11439 			return err;
11440 		}
11441 
11442 		env->prog->has_callchain_buf = true;
11443 	}
11444 
11445 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
11446 		env->prog->call_get_stack = true;
11447 
11448 	if (func_id == BPF_FUNC_get_func_ip) {
11449 		if (check_get_func_ip(env))
11450 			return -ENOTSUPP;
11451 		env->prog->call_get_func_ip = true;
11452 	}
11453 
11454 	if (changes_data)
11455 		clear_all_pkt_pointers(env);
11456 	return 0;
11457 }
11458 
11459 /* mark_btf_func_reg_size() is used when the reg size is determined by
11460  * the BTF func_proto's return value size and argument.
11461  */
mark_btf_func_reg_size(struct bpf_verifier_env * env,u32 regno,size_t reg_size)11462 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
11463 				   size_t reg_size)
11464 {
11465 	struct bpf_reg_state *reg = &cur_regs(env)[regno];
11466 
11467 	if (regno == BPF_REG_0) {
11468 		/* Function return value */
11469 		reg->live |= REG_LIVE_WRITTEN;
11470 		reg->subreg_def = reg_size == sizeof(u64) ?
11471 			DEF_NOT_SUBREG : env->insn_idx + 1;
11472 	} else {
11473 		/* Function argument */
11474 		if (reg_size == sizeof(u64)) {
11475 			mark_insn_zext(env, reg);
11476 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
11477 		} else {
11478 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
11479 		}
11480 	}
11481 }
11482 
is_kfunc_acquire(struct bpf_kfunc_call_arg_meta * meta)11483 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
11484 {
11485 	return meta->kfunc_flags & KF_ACQUIRE;
11486 }
11487 
is_kfunc_release(struct bpf_kfunc_call_arg_meta * meta)11488 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
11489 {
11490 	return meta->kfunc_flags & KF_RELEASE;
11491 }
11492 
is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta * meta)11493 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
11494 {
11495 	return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
11496 }
11497 
is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta * meta)11498 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
11499 {
11500 	return meta->kfunc_flags & KF_SLEEPABLE;
11501 }
11502 
is_kfunc_destructive(struct bpf_kfunc_call_arg_meta * meta)11503 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
11504 {
11505 	return meta->kfunc_flags & KF_DESTRUCTIVE;
11506 }
11507 
is_kfunc_rcu(struct bpf_kfunc_call_arg_meta * meta)11508 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
11509 {
11510 	return meta->kfunc_flags & KF_RCU;
11511 }
11512 
is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta * meta)11513 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
11514 {
11515 	return meta->kfunc_flags & KF_RCU_PROTECTED;
11516 }
11517 
is_kfunc_arg_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)11518 static bool is_kfunc_arg_mem_size(const struct btf *btf,
11519 				  const struct btf_param *arg,
11520 				  const struct bpf_reg_state *reg)
11521 {
11522 	const struct btf_type *t;
11523 
11524 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
11525 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
11526 		return false;
11527 
11528 	return btf_param_match_suffix(btf, arg, "__sz");
11529 }
11530 
is_kfunc_arg_const_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)11531 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
11532 					const struct btf_param *arg,
11533 					const struct bpf_reg_state *reg)
11534 {
11535 	const struct btf_type *t;
11536 
11537 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
11538 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
11539 		return false;
11540 
11541 	return btf_param_match_suffix(btf, arg, "__szk");
11542 }
11543 
is_kfunc_arg_optional(const struct btf * btf,const struct btf_param * arg)11544 static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
11545 {
11546 	return btf_param_match_suffix(btf, arg, "__opt");
11547 }
11548 
is_kfunc_arg_constant(const struct btf * btf,const struct btf_param * arg)11549 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
11550 {
11551 	return btf_param_match_suffix(btf, arg, "__k");
11552 }
11553 
is_kfunc_arg_ignore(const struct btf * btf,const struct btf_param * arg)11554 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
11555 {
11556 	return btf_param_match_suffix(btf, arg, "__ign");
11557 }
11558 
is_kfunc_arg_map(const struct btf * btf,const struct btf_param * arg)11559 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
11560 {
11561 	return btf_param_match_suffix(btf, arg, "__map");
11562 }
11563 
is_kfunc_arg_alloc_obj(const struct btf * btf,const struct btf_param * arg)11564 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
11565 {
11566 	return btf_param_match_suffix(btf, arg, "__alloc");
11567 }
11568 
is_kfunc_arg_uninit(const struct btf * btf,const struct btf_param * arg)11569 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
11570 {
11571 	return btf_param_match_suffix(btf, arg, "__uninit");
11572 }
11573 
is_kfunc_arg_refcounted_kptr(const struct btf * btf,const struct btf_param * arg)11574 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
11575 {
11576 	return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
11577 }
11578 
is_kfunc_arg_nullable(const struct btf * btf,const struct btf_param * arg)11579 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
11580 {
11581 	return btf_param_match_suffix(btf, arg, "__nullable");
11582 }
11583 
is_kfunc_arg_const_str(const struct btf * btf,const struct btf_param * arg)11584 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
11585 {
11586 	return btf_param_match_suffix(btf, arg, "__str");
11587 }
11588 
is_kfunc_arg_irq_flag(const struct btf * btf,const struct btf_param * arg)11589 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
11590 {
11591 	return btf_param_match_suffix(btf, arg, "__irq_flag");
11592 }
11593 
is_kfunc_arg_scalar_with_name(const struct btf * btf,const struct btf_param * arg,const char * name)11594 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
11595 					  const struct btf_param *arg,
11596 					  const char *name)
11597 {
11598 	int len, target_len = strlen(name);
11599 	const char *param_name;
11600 
11601 	param_name = btf_name_by_offset(btf, arg->name_off);
11602 	if (str_is_empty(param_name))
11603 		return false;
11604 	len = strlen(param_name);
11605 	if (len != target_len)
11606 		return false;
11607 	if (strcmp(param_name, name))
11608 		return false;
11609 
11610 	return true;
11611 }
11612 
11613 enum {
11614 	KF_ARG_DYNPTR_ID,
11615 	KF_ARG_LIST_HEAD_ID,
11616 	KF_ARG_LIST_NODE_ID,
11617 	KF_ARG_RB_ROOT_ID,
11618 	KF_ARG_RB_NODE_ID,
11619 	KF_ARG_WORKQUEUE_ID,
11620 };
11621 
11622 BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct,bpf_dynptr)11623 BTF_ID(struct, bpf_dynptr)
11624 BTF_ID(struct, bpf_list_head)
11625 BTF_ID(struct, bpf_list_node)
11626 BTF_ID(struct, bpf_rb_root)
11627 BTF_ID(struct, bpf_rb_node)
11628 BTF_ID(struct, bpf_wq)
11629 
11630 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
11631 				    const struct btf_param *arg, int type)
11632 {
11633 	const struct btf_type *t;
11634 	u32 res_id;
11635 
11636 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
11637 	if (!t)
11638 		return false;
11639 	if (!btf_type_is_ptr(t))
11640 		return false;
11641 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
11642 	if (!t)
11643 		return false;
11644 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
11645 }
11646 
is_kfunc_arg_dynptr(const struct btf * btf,const struct btf_param * arg)11647 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
11648 {
11649 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
11650 }
11651 
is_kfunc_arg_list_head(const struct btf * btf,const struct btf_param * arg)11652 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
11653 {
11654 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
11655 }
11656 
is_kfunc_arg_list_node(const struct btf * btf,const struct btf_param * arg)11657 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
11658 {
11659 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
11660 }
11661 
is_kfunc_arg_rbtree_root(const struct btf * btf,const struct btf_param * arg)11662 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
11663 {
11664 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
11665 }
11666 
is_kfunc_arg_rbtree_node(const struct btf * btf,const struct btf_param * arg)11667 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
11668 {
11669 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
11670 }
11671 
is_kfunc_arg_wq(const struct btf * btf,const struct btf_param * arg)11672 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
11673 {
11674 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
11675 }
11676 
is_kfunc_arg_callback(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_param * arg)11677 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
11678 				  const struct btf_param *arg)
11679 {
11680 	const struct btf_type *t;
11681 
11682 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
11683 	if (!t)
11684 		return false;
11685 
11686 	return true;
11687 }
11688 
11689 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
__btf_type_is_scalar_struct(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_type * t,int rec)11690 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
11691 					const struct btf *btf,
11692 					const struct btf_type *t, int rec)
11693 {
11694 	const struct btf_type *member_type;
11695 	const struct btf_member *member;
11696 	u32 i;
11697 
11698 	if (!btf_type_is_struct(t))
11699 		return false;
11700 
11701 	for_each_member(i, t, member) {
11702 		const struct btf_array *array;
11703 
11704 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
11705 		if (btf_type_is_struct(member_type)) {
11706 			if (rec >= 3) {
11707 				verbose(env, "max struct nesting depth exceeded\n");
11708 				return false;
11709 			}
11710 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
11711 				return false;
11712 			continue;
11713 		}
11714 		if (btf_type_is_array(member_type)) {
11715 			array = btf_array(member_type);
11716 			if (!array->nelems)
11717 				return false;
11718 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
11719 			if (!btf_type_is_scalar(member_type))
11720 				return false;
11721 			continue;
11722 		}
11723 		if (!btf_type_is_scalar(member_type))
11724 			return false;
11725 	}
11726 	return true;
11727 }
11728 
11729 enum kfunc_ptr_arg_type {
11730 	KF_ARG_PTR_TO_CTX,
11731 	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
11732 	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
11733 	KF_ARG_PTR_TO_DYNPTR,
11734 	KF_ARG_PTR_TO_ITER,
11735 	KF_ARG_PTR_TO_LIST_HEAD,
11736 	KF_ARG_PTR_TO_LIST_NODE,
11737 	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
11738 	KF_ARG_PTR_TO_MEM,
11739 	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
11740 	KF_ARG_PTR_TO_CALLBACK,
11741 	KF_ARG_PTR_TO_RB_ROOT,
11742 	KF_ARG_PTR_TO_RB_NODE,
11743 	KF_ARG_PTR_TO_NULL,
11744 	KF_ARG_PTR_TO_CONST_STR,
11745 	KF_ARG_PTR_TO_MAP,
11746 	KF_ARG_PTR_TO_WORKQUEUE,
11747 	KF_ARG_PTR_TO_IRQ_FLAG,
11748 };
11749 
11750 enum special_kfunc_type {
11751 	KF_bpf_obj_new_impl,
11752 	KF_bpf_obj_drop_impl,
11753 	KF_bpf_refcount_acquire_impl,
11754 	KF_bpf_list_push_front_impl,
11755 	KF_bpf_list_push_back_impl,
11756 	KF_bpf_list_pop_front,
11757 	KF_bpf_list_pop_back,
11758 	KF_bpf_cast_to_kern_ctx,
11759 	KF_bpf_rdonly_cast,
11760 	KF_bpf_rcu_read_lock,
11761 	KF_bpf_rcu_read_unlock,
11762 	KF_bpf_rbtree_remove,
11763 	KF_bpf_rbtree_add_impl,
11764 	KF_bpf_rbtree_first,
11765 	KF_bpf_dynptr_from_skb,
11766 	KF_bpf_dynptr_from_xdp,
11767 	KF_bpf_dynptr_slice,
11768 	KF_bpf_dynptr_slice_rdwr,
11769 	KF_bpf_dynptr_clone,
11770 	KF_bpf_percpu_obj_new_impl,
11771 	KF_bpf_percpu_obj_drop_impl,
11772 	KF_bpf_throw,
11773 	KF_bpf_wq_set_callback_impl,
11774 	KF_bpf_preempt_disable,
11775 	KF_bpf_preempt_enable,
11776 	KF_bpf_iter_css_task_new,
11777 	KF_bpf_session_cookie,
11778 	KF_bpf_get_kmem_cache,
11779 	KF_bpf_local_irq_save,
11780 	KF_bpf_local_irq_restore,
11781 	KF_bpf_iter_num_new,
11782 	KF_bpf_iter_num_next,
11783 	KF_bpf_iter_num_destroy,
11784 };
11785 
11786 BTF_SET_START(special_kfunc_set)
BTF_ID(func,bpf_obj_new_impl)11787 BTF_ID(func, bpf_obj_new_impl)
11788 BTF_ID(func, bpf_obj_drop_impl)
11789 BTF_ID(func, bpf_refcount_acquire_impl)
11790 BTF_ID(func, bpf_list_push_front_impl)
11791 BTF_ID(func, bpf_list_push_back_impl)
11792 BTF_ID(func, bpf_list_pop_front)
11793 BTF_ID(func, bpf_list_pop_back)
11794 BTF_ID(func, bpf_cast_to_kern_ctx)
11795 BTF_ID(func, bpf_rdonly_cast)
11796 BTF_ID(func, bpf_rbtree_remove)
11797 BTF_ID(func, bpf_rbtree_add_impl)
11798 BTF_ID(func, bpf_rbtree_first)
11799 #ifdef CONFIG_NET
11800 BTF_ID(func, bpf_dynptr_from_skb)
11801 BTF_ID(func, bpf_dynptr_from_xdp)
11802 #endif
11803 BTF_ID(func, bpf_dynptr_slice)
11804 BTF_ID(func, bpf_dynptr_slice_rdwr)
11805 BTF_ID(func, bpf_dynptr_clone)
11806 BTF_ID(func, bpf_percpu_obj_new_impl)
11807 BTF_ID(func, bpf_percpu_obj_drop_impl)
11808 BTF_ID(func, bpf_throw)
11809 BTF_ID(func, bpf_wq_set_callback_impl)
11810 #ifdef CONFIG_CGROUPS
11811 BTF_ID(func, bpf_iter_css_task_new)
11812 #endif
11813 BTF_SET_END(special_kfunc_set)
11814 
11815 BTF_ID_LIST(special_kfunc_list)
11816 BTF_ID(func, bpf_obj_new_impl)
11817 BTF_ID(func, bpf_obj_drop_impl)
11818 BTF_ID(func, bpf_refcount_acquire_impl)
11819 BTF_ID(func, bpf_list_push_front_impl)
11820 BTF_ID(func, bpf_list_push_back_impl)
11821 BTF_ID(func, bpf_list_pop_front)
11822 BTF_ID(func, bpf_list_pop_back)
11823 BTF_ID(func, bpf_cast_to_kern_ctx)
11824 BTF_ID(func, bpf_rdonly_cast)
11825 BTF_ID(func, bpf_rcu_read_lock)
11826 BTF_ID(func, bpf_rcu_read_unlock)
11827 BTF_ID(func, bpf_rbtree_remove)
11828 BTF_ID(func, bpf_rbtree_add_impl)
11829 BTF_ID(func, bpf_rbtree_first)
11830 #ifdef CONFIG_NET
11831 BTF_ID(func, bpf_dynptr_from_skb)
11832 BTF_ID(func, bpf_dynptr_from_xdp)
11833 #else
11834 BTF_ID_UNUSED
11835 BTF_ID_UNUSED
11836 #endif
11837 BTF_ID(func, bpf_dynptr_slice)
11838 BTF_ID(func, bpf_dynptr_slice_rdwr)
11839 BTF_ID(func, bpf_dynptr_clone)
11840 BTF_ID(func, bpf_percpu_obj_new_impl)
11841 BTF_ID(func, bpf_percpu_obj_drop_impl)
11842 BTF_ID(func, bpf_throw)
11843 BTF_ID(func, bpf_wq_set_callback_impl)
11844 BTF_ID(func, bpf_preempt_disable)
11845 BTF_ID(func, bpf_preempt_enable)
11846 #ifdef CONFIG_CGROUPS
11847 BTF_ID(func, bpf_iter_css_task_new)
11848 #else
11849 BTF_ID_UNUSED
11850 #endif
11851 #ifdef CONFIG_BPF_EVENTS
11852 BTF_ID(func, bpf_session_cookie)
11853 #else
11854 BTF_ID_UNUSED
11855 #endif
11856 BTF_ID(func, bpf_get_kmem_cache)
11857 BTF_ID(func, bpf_local_irq_save)
11858 BTF_ID(func, bpf_local_irq_restore)
11859 BTF_ID(func, bpf_iter_num_new)
11860 BTF_ID(func, bpf_iter_num_next)
11861 BTF_ID(func, bpf_iter_num_destroy)
11862 
11863 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11864 {
11865 	if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
11866 	    meta->arg_owning_ref) {
11867 		return false;
11868 	}
11869 
11870 	return meta->kfunc_flags & KF_RET_NULL;
11871 }
11872 
is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta * meta)11873 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11874 {
11875 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11876 }
11877 
is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta * meta)11878 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11879 {
11880 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11881 }
11882 
is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta * meta)11883 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11884 {
11885 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11886 }
11887 
is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta * meta)11888 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11889 {
11890 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11891 }
11892 
11893 static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,const struct btf_type * t,const struct btf_type * ref_t,const char * ref_tname,const struct btf_param * args,int argno,int nargs)11894 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
11895 		       struct bpf_kfunc_call_arg_meta *meta,
11896 		       const struct btf_type *t, const struct btf_type *ref_t,
11897 		       const char *ref_tname, const struct btf_param *args,
11898 		       int argno, int nargs)
11899 {
11900 	u32 regno = argno + 1;
11901 	struct bpf_reg_state *regs = cur_regs(env);
11902 	struct bpf_reg_state *reg = &regs[regno];
11903 	bool arg_mem_size = false;
11904 
11905 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
11906 		return KF_ARG_PTR_TO_CTX;
11907 
11908 	/* In this function, we verify the kfunc's BTF as per the argument type,
11909 	 * leaving the rest of the verification with respect to the register
11910 	 * type to our caller. When a set of conditions hold in the BTF type of
11911 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11912 	 */
11913 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
11914 		return KF_ARG_PTR_TO_CTX;
11915 
11916 	if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg))
11917 		return KF_ARG_PTR_TO_NULL;
11918 
11919 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
11920 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11921 
11922 	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
11923 		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11924 
11925 	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
11926 		return KF_ARG_PTR_TO_DYNPTR;
11927 
11928 	if (is_kfunc_arg_iter(meta, argno, &args[argno]))
11929 		return KF_ARG_PTR_TO_ITER;
11930 
11931 	if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
11932 		return KF_ARG_PTR_TO_LIST_HEAD;
11933 
11934 	if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
11935 		return KF_ARG_PTR_TO_LIST_NODE;
11936 
11937 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
11938 		return KF_ARG_PTR_TO_RB_ROOT;
11939 
11940 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
11941 		return KF_ARG_PTR_TO_RB_NODE;
11942 
11943 	if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
11944 		return KF_ARG_PTR_TO_CONST_STR;
11945 
11946 	if (is_kfunc_arg_map(meta->btf, &args[argno]))
11947 		return KF_ARG_PTR_TO_MAP;
11948 
11949 	if (is_kfunc_arg_wq(meta->btf, &args[argno]))
11950 		return KF_ARG_PTR_TO_WORKQUEUE;
11951 
11952 	if (is_kfunc_arg_irq_flag(meta->btf, &args[argno]))
11953 		return KF_ARG_PTR_TO_IRQ_FLAG;
11954 
11955 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11956 		if (!btf_type_is_struct(ref_t)) {
11957 			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
11958 				meta->func_name, argno, btf_type_str(ref_t), ref_tname);
11959 			return -EINVAL;
11960 		}
11961 		return KF_ARG_PTR_TO_BTF_ID;
11962 	}
11963 
11964 	if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
11965 		return KF_ARG_PTR_TO_CALLBACK;
11966 
11967 	if (argno + 1 < nargs &&
11968 	    (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
11969 	     is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
11970 		arg_mem_size = true;
11971 
11972 	/* This is the catch all argument type of register types supported by
11973 	 * check_helper_mem_access. However, we only allow when argument type is
11974 	 * pointer to scalar, or struct composed (recursively) of scalars. When
11975 	 * arg_mem_size is true, the pointer can be void *.
11976 	 */
11977 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11978 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11979 		verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
11980 			argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11981 		return -EINVAL;
11982 	}
11983 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11984 }
11985 
process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const struct btf_type * ref_t,const char * ref_tname,u32 ref_id,struct bpf_kfunc_call_arg_meta * meta,int argno)11986 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11987 					struct bpf_reg_state *reg,
11988 					const struct btf_type *ref_t,
11989 					const char *ref_tname, u32 ref_id,
11990 					struct bpf_kfunc_call_arg_meta *meta,
11991 					int argno)
11992 {
11993 	const struct btf_type *reg_ref_t;
11994 	bool strict_type_match = false;
11995 	const struct btf *reg_btf;
11996 	const char *reg_ref_tname;
11997 	bool taking_projection;
11998 	bool struct_same;
11999 	u32 reg_ref_id;
12000 
12001 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
12002 		reg_btf = reg->btf;
12003 		reg_ref_id = reg->btf_id;
12004 	} else {
12005 		reg_btf = btf_vmlinux;
12006 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
12007 	}
12008 
12009 	/* Enforce strict type matching for calls to kfuncs that are acquiring
12010 	 * or releasing a reference, or are no-cast aliases. We do _not_
12011 	 * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
12012 	 * as we want to enable BPF programs to pass types that are bitwise
12013 	 * equivalent without forcing them to explicitly cast with something
12014 	 * like bpf_cast_to_kern_ctx().
12015 	 *
12016 	 * For example, say we had a type like the following:
12017 	 *
12018 	 * struct bpf_cpumask {
12019 	 *	cpumask_t cpumask;
12020 	 *	refcount_t usage;
12021 	 * };
12022 	 *
12023 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
12024 	 * to a struct cpumask, so it would be safe to pass a struct
12025 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
12026 	 *
12027 	 * The philosophy here is similar to how we allow scalars of different
12028 	 * types to be passed to kfuncs as long as the size is the same. The
12029 	 * only difference here is that we're simply allowing
12030 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
12031 	 * resolve types.
12032 	 */
12033 	if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
12034 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
12035 		strict_type_match = true;
12036 
12037 	WARN_ON_ONCE(is_kfunc_release(meta) &&
12038 		     (reg->off || !tnum_is_const(reg->var_off) ||
12039 		      reg->var_off.value));
12040 
12041 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
12042 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
12043 	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match);
12044 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
12045 	 * actually use it -- it must cast to the underlying type. So we allow
12046 	 * caller to pass in the underlying type.
12047 	 */
12048 	taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
12049 	if (!taking_projection && !struct_same) {
12050 		verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
12051 			meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
12052 			btf_type_str(reg_ref_t), reg_ref_tname);
12053 		return -EINVAL;
12054 	}
12055 	return 0;
12056 }
12057 
process_irq_flag(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)12058 static int process_irq_flag(struct bpf_verifier_env *env, int regno,
12059 			     struct bpf_kfunc_call_arg_meta *meta)
12060 {
12061 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
12062 	bool irq_save;
12063 	int err;
12064 
12065 	if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save]) {
12066 		irq_save = true;
12067 	} else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore]) {
12068 		irq_save = false;
12069 	} else {
12070 		verbose(env, "verifier internal error: unknown irq flags kfunc\n");
12071 		return -EFAULT;
12072 	}
12073 
12074 	if (irq_save) {
12075 		if (!is_irq_flag_reg_valid_uninit(env, reg)) {
12076 			verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1);
12077 			return -EINVAL;
12078 		}
12079 
12080 		err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false);
12081 		if (err)
12082 			return err;
12083 
12084 		err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx);
12085 		if (err)
12086 			return err;
12087 	} else {
12088 		err = is_irq_flag_reg_valid_init(env, reg);
12089 		if (err) {
12090 			verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1);
12091 			return err;
12092 		}
12093 
12094 		err = mark_irq_flag_read(env, reg);
12095 		if (err)
12096 			return err;
12097 
12098 		err = unmark_stack_slot_irq_flag(env, reg);
12099 		if (err)
12100 			return err;
12101 	}
12102 	return 0;
12103 }
12104 
12105 
ref_set_non_owning(struct bpf_verifier_env * env,struct bpf_reg_state * reg)12106 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
12107 {
12108 	struct btf_record *rec = reg_btf_record(reg);
12109 
12110 	if (!env->cur_state->active_locks) {
12111 		verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
12112 		return -EFAULT;
12113 	}
12114 
12115 	if (type_flag(reg->type) & NON_OWN_REF) {
12116 		verbose(env, "verifier internal error: NON_OWN_REF already set\n");
12117 		return -EFAULT;
12118 	}
12119 
12120 	reg->type |= NON_OWN_REF;
12121 	if (rec->refcount_off >= 0)
12122 		reg->type |= MEM_RCU;
12123 
12124 	return 0;
12125 }
12126 
ref_convert_owning_non_owning(struct bpf_verifier_env * env,u32 ref_obj_id)12127 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
12128 {
12129 	struct bpf_verifier_state *state = env->cur_state;
12130 	struct bpf_func_state *unused;
12131 	struct bpf_reg_state *reg;
12132 	int i;
12133 
12134 	if (!ref_obj_id) {
12135 		verbose(env, "verifier internal error: ref_obj_id is zero for "
12136 			     "owning -> non-owning conversion\n");
12137 		return -EFAULT;
12138 	}
12139 
12140 	for (i = 0; i < state->acquired_refs; i++) {
12141 		if (state->refs[i].id != ref_obj_id)
12142 			continue;
12143 
12144 		/* Clear ref_obj_id here so release_reference doesn't clobber
12145 		 * the whole reg
12146 		 */
12147 		bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
12148 			if (reg->ref_obj_id == ref_obj_id) {
12149 				reg->ref_obj_id = 0;
12150 				ref_set_non_owning(env, reg);
12151 			}
12152 		}));
12153 		return 0;
12154 	}
12155 
12156 	verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
12157 	return -EFAULT;
12158 }
12159 
12160 /* Implementation details:
12161  *
12162  * Each register points to some region of memory, which we define as an
12163  * allocation. Each allocation may embed a bpf_spin_lock which protects any
12164  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
12165  * allocation. The lock and the data it protects are colocated in the same
12166  * memory region.
12167  *
12168  * Hence, everytime a register holds a pointer value pointing to such
12169  * allocation, the verifier preserves a unique reg->id for it.
12170  *
12171  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
12172  * bpf_spin_lock is called.
12173  *
12174  * To enable this, lock state in the verifier captures two values:
12175  *	active_lock.ptr = Register's type specific pointer
12176  *	active_lock.id  = A unique ID for each register pointer value
12177  *
12178  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
12179  * supported register types.
12180  *
12181  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
12182  * allocated objects is the reg->btf pointer.
12183  *
12184  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
12185  * can establish the provenance of the map value statically for each distinct
12186  * lookup into such maps. They always contain a single map value hence unique
12187  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
12188  *
12189  * So, in case of global variables, they use array maps with max_entries = 1,
12190  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
12191  * into the same map value as max_entries is 1, as described above).
12192  *
12193  * In case of inner map lookups, the inner map pointer has same map_ptr as the
12194  * outer map pointer (in verifier context), but each lookup into an inner map
12195  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
12196  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
12197  * will get different reg->id assigned to each lookup, hence different
12198  * active_lock.id.
12199  *
12200  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
12201  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
12202  * returned from bpf_obj_new. Each allocation receives a new reg->id.
12203  */
check_reg_allocation_locked(struct bpf_verifier_env * env,struct bpf_reg_state * reg)12204 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
12205 {
12206 	struct bpf_reference_state *s;
12207 	void *ptr;
12208 	u32 id;
12209 
12210 	switch ((int)reg->type) {
12211 	case PTR_TO_MAP_VALUE:
12212 		ptr = reg->map_ptr;
12213 		break;
12214 	case PTR_TO_BTF_ID | MEM_ALLOC:
12215 		ptr = reg->btf;
12216 		break;
12217 	default:
12218 		verbose(env, "verifier internal error: unknown reg type for lock check\n");
12219 		return -EFAULT;
12220 	}
12221 	id = reg->id;
12222 
12223 	if (!env->cur_state->active_locks)
12224 		return -EINVAL;
12225 	s = find_lock_state(env->cur_state, REF_TYPE_LOCK, id, ptr);
12226 	if (!s) {
12227 		verbose(env, "held lock and object are not in the same allocation\n");
12228 		return -EINVAL;
12229 	}
12230 	return 0;
12231 }
12232 
is_bpf_list_api_kfunc(u32 btf_id)12233 static bool is_bpf_list_api_kfunc(u32 btf_id)
12234 {
12235 	return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
12236 	       btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
12237 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
12238 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back];
12239 }
12240 
is_bpf_rbtree_api_kfunc(u32 btf_id)12241 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
12242 {
12243 	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
12244 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
12245 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first];
12246 }
12247 
is_bpf_iter_num_api_kfunc(u32 btf_id)12248 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
12249 {
12250 	return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
12251 	       btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
12252 	       btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
12253 }
12254 
is_bpf_graph_api_kfunc(u32 btf_id)12255 static bool is_bpf_graph_api_kfunc(u32 btf_id)
12256 {
12257 	return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
12258 	       btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
12259 }
12260 
kfunc_spin_allowed(u32 btf_id)12261 static bool kfunc_spin_allowed(u32 btf_id)
12262 {
12263 	return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id);
12264 }
12265 
is_sync_callback_calling_kfunc(u32 btf_id)12266 static bool is_sync_callback_calling_kfunc(u32 btf_id)
12267 {
12268 	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
12269 }
12270 
is_async_callback_calling_kfunc(u32 btf_id)12271 static bool is_async_callback_calling_kfunc(u32 btf_id)
12272 {
12273 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
12274 }
12275 
is_bpf_throw_kfunc(struct bpf_insn * insn)12276 static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
12277 {
12278 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
12279 	       insn->imm == special_kfunc_list[KF_bpf_throw];
12280 }
12281 
is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)12282 static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)
12283 {
12284 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
12285 }
12286 
is_callback_calling_kfunc(u32 btf_id)12287 static bool is_callback_calling_kfunc(u32 btf_id)
12288 {
12289 	return is_sync_callback_calling_kfunc(btf_id) ||
12290 	       is_async_callback_calling_kfunc(btf_id);
12291 }
12292 
is_rbtree_lock_required_kfunc(u32 btf_id)12293 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
12294 {
12295 	return is_bpf_rbtree_api_kfunc(btf_id);
12296 }
12297 
check_kfunc_is_graph_root_api(struct bpf_verifier_env * env,enum btf_field_type head_field_type,u32 kfunc_btf_id)12298 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
12299 					  enum btf_field_type head_field_type,
12300 					  u32 kfunc_btf_id)
12301 {
12302 	bool ret;
12303 
12304 	switch (head_field_type) {
12305 	case BPF_LIST_HEAD:
12306 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
12307 		break;
12308 	case BPF_RB_ROOT:
12309 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
12310 		break;
12311 	default:
12312 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
12313 			btf_field_type_name(head_field_type));
12314 		return false;
12315 	}
12316 
12317 	if (!ret)
12318 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
12319 			btf_field_type_name(head_field_type));
12320 	return ret;
12321 }
12322 
check_kfunc_is_graph_node_api(struct bpf_verifier_env * env,enum btf_field_type node_field_type,u32 kfunc_btf_id)12323 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
12324 					  enum btf_field_type node_field_type,
12325 					  u32 kfunc_btf_id)
12326 {
12327 	bool ret;
12328 
12329 	switch (node_field_type) {
12330 	case BPF_LIST_NODE:
12331 		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
12332 		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
12333 		break;
12334 	case BPF_RB_NODE:
12335 		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
12336 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
12337 		break;
12338 	default:
12339 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
12340 			btf_field_type_name(node_field_type));
12341 		return false;
12342 	}
12343 
12344 	if (!ret)
12345 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
12346 			btf_field_type_name(node_field_type));
12347 	return ret;
12348 }
12349 
12350 static int
__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,struct btf_field ** head_field)12351 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
12352 				   struct bpf_reg_state *reg, u32 regno,
12353 				   struct bpf_kfunc_call_arg_meta *meta,
12354 				   enum btf_field_type head_field_type,
12355 				   struct btf_field **head_field)
12356 {
12357 	const char *head_type_name;
12358 	struct btf_field *field;
12359 	struct btf_record *rec;
12360 	u32 head_off;
12361 
12362 	if (meta->btf != btf_vmlinux) {
12363 		verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
12364 		return -EFAULT;
12365 	}
12366 
12367 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
12368 		return -EFAULT;
12369 
12370 	head_type_name = btf_field_type_name(head_field_type);
12371 	if (!tnum_is_const(reg->var_off)) {
12372 		verbose(env,
12373 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
12374 			regno, head_type_name);
12375 		return -EINVAL;
12376 	}
12377 
12378 	rec = reg_btf_record(reg);
12379 	head_off = reg->off + reg->var_off.value;
12380 	field = btf_record_find(rec, head_off, head_field_type);
12381 	if (!field) {
12382 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
12383 		return -EINVAL;
12384 	}
12385 
12386 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
12387 	if (check_reg_allocation_locked(env, reg)) {
12388 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
12389 			rec->spin_lock_off, head_type_name);
12390 		return -EINVAL;
12391 	}
12392 
12393 	if (*head_field) {
12394 		verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
12395 		return -EFAULT;
12396 	}
12397 	*head_field = field;
12398 	return 0;
12399 }
12400 
process_kf_arg_ptr_to_list_head(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12401 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
12402 					   struct bpf_reg_state *reg, u32 regno,
12403 					   struct bpf_kfunc_call_arg_meta *meta)
12404 {
12405 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
12406 							  &meta->arg_list_head.field);
12407 }
12408 
process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12409 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
12410 					     struct bpf_reg_state *reg, u32 regno,
12411 					     struct bpf_kfunc_call_arg_meta *meta)
12412 {
12413 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
12414 							  &meta->arg_rbtree_root.field);
12415 }
12416 
12417 static int
__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,enum btf_field_type node_field_type,struct btf_field ** node_field)12418 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
12419 				   struct bpf_reg_state *reg, u32 regno,
12420 				   struct bpf_kfunc_call_arg_meta *meta,
12421 				   enum btf_field_type head_field_type,
12422 				   enum btf_field_type node_field_type,
12423 				   struct btf_field **node_field)
12424 {
12425 	const char *node_type_name;
12426 	const struct btf_type *et, *t;
12427 	struct btf_field *field;
12428 	u32 node_off;
12429 
12430 	if (meta->btf != btf_vmlinux) {
12431 		verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
12432 		return -EFAULT;
12433 	}
12434 
12435 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
12436 		return -EFAULT;
12437 
12438 	node_type_name = btf_field_type_name(node_field_type);
12439 	if (!tnum_is_const(reg->var_off)) {
12440 		verbose(env,
12441 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
12442 			regno, node_type_name);
12443 		return -EINVAL;
12444 	}
12445 
12446 	node_off = reg->off + reg->var_off.value;
12447 	field = reg_find_field_offset(reg, node_off, node_field_type);
12448 	if (!field) {
12449 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
12450 		return -EINVAL;
12451 	}
12452 
12453 	field = *node_field;
12454 
12455 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
12456 	t = btf_type_by_id(reg->btf, reg->btf_id);
12457 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
12458 				  field->graph_root.value_btf_id, true)) {
12459 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
12460 			"in struct %s, but arg is at offset=%d in struct %s\n",
12461 			btf_field_type_name(head_field_type),
12462 			btf_field_type_name(node_field_type),
12463 			field->graph_root.node_offset,
12464 			btf_name_by_offset(field->graph_root.btf, et->name_off),
12465 			node_off, btf_name_by_offset(reg->btf, t->name_off));
12466 		return -EINVAL;
12467 	}
12468 	meta->arg_btf = reg->btf;
12469 	meta->arg_btf_id = reg->btf_id;
12470 
12471 	if (node_off != field->graph_root.node_offset) {
12472 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
12473 			node_off, btf_field_type_name(node_field_type),
12474 			field->graph_root.node_offset,
12475 			btf_name_by_offset(field->graph_root.btf, et->name_off));
12476 		return -EINVAL;
12477 	}
12478 
12479 	return 0;
12480 }
12481 
process_kf_arg_ptr_to_list_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12482 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
12483 					   struct bpf_reg_state *reg, u32 regno,
12484 					   struct bpf_kfunc_call_arg_meta *meta)
12485 {
12486 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
12487 						  BPF_LIST_HEAD, BPF_LIST_NODE,
12488 						  &meta->arg_list_head.field);
12489 }
12490 
process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12491 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
12492 					     struct bpf_reg_state *reg, u32 regno,
12493 					     struct bpf_kfunc_call_arg_meta *meta)
12494 {
12495 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
12496 						  BPF_RB_ROOT, BPF_RB_NODE,
12497 						  &meta->arg_rbtree_root.field);
12498 }
12499 
12500 /*
12501  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
12502  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
12503  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
12504  * them can only be attached to some specific hook points.
12505  */
check_css_task_iter_allowlist(struct bpf_verifier_env * env)12506 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
12507 {
12508 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12509 
12510 	switch (prog_type) {
12511 	case BPF_PROG_TYPE_LSM:
12512 		return true;
12513 	case BPF_PROG_TYPE_TRACING:
12514 		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
12515 			return true;
12516 		fallthrough;
12517 	default:
12518 		return in_sleepable(env);
12519 	}
12520 }
12521 
check_kfunc_args(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,int insn_idx)12522 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12523 			    int insn_idx)
12524 {
12525 	const char *func_name = meta->func_name, *ref_tname;
12526 	const struct btf *btf = meta->btf;
12527 	const struct btf_param *args;
12528 	struct btf_record *rec;
12529 	u32 i, nargs;
12530 	int ret;
12531 
12532 	args = (const struct btf_param *)(meta->func_proto + 1);
12533 	nargs = btf_type_vlen(meta->func_proto);
12534 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
12535 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
12536 			MAX_BPF_FUNC_REG_ARGS);
12537 		return -EINVAL;
12538 	}
12539 
12540 	/* Check that BTF function arguments match actual types that the
12541 	 * verifier sees.
12542 	 */
12543 	for (i = 0; i < nargs; i++) {
12544 		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
12545 		const struct btf_type *t, *ref_t, *resolve_ret;
12546 		enum bpf_arg_type arg_type = ARG_DONTCARE;
12547 		u32 regno = i + 1, ref_id, type_size;
12548 		bool is_ret_buf_sz = false;
12549 		int kf_arg_type;
12550 
12551 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
12552 
12553 		if (is_kfunc_arg_ignore(btf, &args[i]))
12554 			continue;
12555 
12556 		if (btf_type_is_scalar(t)) {
12557 			if (reg->type != SCALAR_VALUE) {
12558 				verbose(env, "R%d is not a scalar\n", regno);
12559 				return -EINVAL;
12560 			}
12561 
12562 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
12563 				if (meta->arg_constant.found) {
12564 					verbose(env, "verifier internal error: only one constant argument permitted\n");
12565 					return -EFAULT;
12566 				}
12567 				if (!tnum_is_const(reg->var_off)) {
12568 					verbose(env, "R%d must be a known constant\n", regno);
12569 					return -EINVAL;
12570 				}
12571 				ret = mark_chain_precision(env, regno);
12572 				if (ret < 0)
12573 					return ret;
12574 				meta->arg_constant.found = true;
12575 				meta->arg_constant.value = reg->var_off.value;
12576 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
12577 				meta->r0_rdonly = true;
12578 				is_ret_buf_sz = true;
12579 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
12580 				is_ret_buf_sz = true;
12581 			}
12582 
12583 			if (is_ret_buf_sz) {
12584 				if (meta->r0_size) {
12585 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
12586 					return -EINVAL;
12587 				}
12588 
12589 				if (!tnum_is_const(reg->var_off)) {
12590 					verbose(env, "R%d is not a const\n", regno);
12591 					return -EINVAL;
12592 				}
12593 
12594 				meta->r0_size = reg->var_off.value;
12595 				ret = mark_chain_precision(env, regno);
12596 				if (ret)
12597 					return ret;
12598 			}
12599 			continue;
12600 		}
12601 
12602 		if (!btf_type_is_ptr(t)) {
12603 			verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
12604 			return -EINVAL;
12605 		}
12606 
12607 		if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
12608 		    (register_is_null(reg) || type_may_be_null(reg->type)) &&
12609 			!is_kfunc_arg_nullable(meta->btf, &args[i])) {
12610 			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
12611 			return -EACCES;
12612 		}
12613 
12614 		if (reg->ref_obj_id) {
12615 			if (is_kfunc_release(meta) && meta->ref_obj_id) {
12616 				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
12617 					regno, reg->ref_obj_id,
12618 					meta->ref_obj_id);
12619 				return -EFAULT;
12620 			}
12621 			meta->ref_obj_id = reg->ref_obj_id;
12622 			if (is_kfunc_release(meta))
12623 				meta->release_regno = regno;
12624 		}
12625 
12626 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12627 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12628 
12629 		kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
12630 		if (kf_arg_type < 0)
12631 			return kf_arg_type;
12632 
12633 		switch (kf_arg_type) {
12634 		case KF_ARG_PTR_TO_NULL:
12635 			continue;
12636 		case KF_ARG_PTR_TO_MAP:
12637 			if (!reg->map_ptr) {
12638 				verbose(env, "pointer in R%d isn't map pointer\n", regno);
12639 				return -EINVAL;
12640 			}
12641 			if (meta->map.ptr && reg->map_ptr->record->wq_off >= 0) {
12642 				/* Use map_uid (which is unique id of inner map) to reject:
12643 				 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12644 				 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12645 				 * if (inner_map1 && inner_map2) {
12646 				 *     wq = bpf_map_lookup_elem(inner_map1);
12647 				 *     if (wq)
12648 				 *         // mismatch would have been allowed
12649 				 *         bpf_wq_init(wq, inner_map2);
12650 				 * }
12651 				 *
12652 				 * Comparing map_ptr is enough to distinguish normal and outer maps.
12653 				 */
12654 				if (meta->map.ptr != reg->map_ptr ||
12655 				    meta->map.uid != reg->map_uid) {
12656 					verbose(env,
12657 						"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
12658 						meta->map.uid, reg->map_uid);
12659 					return -EINVAL;
12660 				}
12661 			}
12662 			meta->map.ptr = reg->map_ptr;
12663 			meta->map.uid = reg->map_uid;
12664 			fallthrough;
12665 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12666 		case KF_ARG_PTR_TO_BTF_ID:
12667 			if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
12668 				break;
12669 
12670 			if (!is_trusted_reg(reg)) {
12671 				if (!is_kfunc_rcu(meta)) {
12672 					verbose(env, "R%d must be referenced or trusted\n", regno);
12673 					return -EINVAL;
12674 				}
12675 				if (!is_rcu_reg(reg)) {
12676 					verbose(env, "R%d must be a rcu pointer\n", regno);
12677 					return -EINVAL;
12678 				}
12679 			}
12680 			fallthrough;
12681 		case KF_ARG_PTR_TO_CTX:
12682 		case KF_ARG_PTR_TO_DYNPTR:
12683 		case KF_ARG_PTR_TO_ITER:
12684 		case KF_ARG_PTR_TO_LIST_HEAD:
12685 		case KF_ARG_PTR_TO_LIST_NODE:
12686 		case KF_ARG_PTR_TO_RB_ROOT:
12687 		case KF_ARG_PTR_TO_RB_NODE:
12688 		case KF_ARG_PTR_TO_MEM:
12689 		case KF_ARG_PTR_TO_MEM_SIZE:
12690 		case KF_ARG_PTR_TO_CALLBACK:
12691 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12692 		case KF_ARG_PTR_TO_CONST_STR:
12693 		case KF_ARG_PTR_TO_WORKQUEUE:
12694 		case KF_ARG_PTR_TO_IRQ_FLAG:
12695 			break;
12696 		default:
12697 			WARN_ON_ONCE(1);
12698 			return -EFAULT;
12699 		}
12700 
12701 		if (is_kfunc_release(meta) && reg->ref_obj_id)
12702 			arg_type |= OBJ_RELEASE;
12703 		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
12704 		if (ret < 0)
12705 			return ret;
12706 
12707 		switch (kf_arg_type) {
12708 		case KF_ARG_PTR_TO_CTX:
12709 			if (reg->type != PTR_TO_CTX) {
12710 				verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
12711 					i, reg_type_str(env, reg->type));
12712 				return -EINVAL;
12713 			}
12714 
12715 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12716 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
12717 				if (ret < 0)
12718 					return -EINVAL;
12719 				meta->ret_btf_id  = ret;
12720 			}
12721 			break;
12722 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12723 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
12724 				if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
12725 					verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
12726 					return -EINVAL;
12727 				}
12728 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
12729 				if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
12730 					verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
12731 					return -EINVAL;
12732 				}
12733 			} else {
12734 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
12735 				return -EINVAL;
12736 			}
12737 			if (!reg->ref_obj_id) {
12738 				verbose(env, "allocated object must be referenced\n");
12739 				return -EINVAL;
12740 			}
12741 			if (meta->btf == btf_vmlinux) {
12742 				meta->arg_btf = reg->btf;
12743 				meta->arg_btf_id = reg->btf_id;
12744 			}
12745 			break;
12746 		case KF_ARG_PTR_TO_DYNPTR:
12747 		{
12748 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12749 			int clone_ref_obj_id = 0;
12750 
12751 			if (reg->type == CONST_PTR_TO_DYNPTR)
12752 				dynptr_arg_type |= MEM_RDONLY;
12753 
12754 			if (is_kfunc_arg_uninit(btf, &args[i]))
12755 				dynptr_arg_type |= MEM_UNINIT;
12756 
12757 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12758 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
12759 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12760 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
12761 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12762 				   (dynptr_arg_type & MEM_UNINIT)) {
12763 				enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
12764 
12765 				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12766 					verbose(env, "verifier internal error: no dynptr type for parent of clone\n");
12767 					return -EFAULT;
12768 				}
12769 
12770 				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12771 				clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
12772 				if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
12773 					verbose(env, "verifier internal error: missing ref obj id for parent of clone\n");
12774 					return -EFAULT;
12775 				}
12776 			}
12777 
12778 			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
12779 			if (ret < 0)
12780 				return ret;
12781 
12782 			if (!(dynptr_arg_type & MEM_UNINIT)) {
12783 				int id = dynptr_id(env, reg);
12784 
12785 				if (id < 0) {
12786 					verbose(env, "verifier internal error: failed to obtain dynptr id\n");
12787 					return id;
12788 				}
12789 				meta->initialized_dynptr.id = id;
12790 				meta->initialized_dynptr.type = dynptr_get_type(env, reg);
12791 				meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
12792 			}
12793 
12794 			break;
12795 		}
12796 		case KF_ARG_PTR_TO_ITER:
12797 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12798 				if (!check_css_task_iter_allowlist(env)) {
12799 					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12800 					return -EINVAL;
12801 				}
12802 			}
12803 			ret = process_iter_arg(env, regno, insn_idx, meta);
12804 			if (ret < 0)
12805 				return ret;
12806 			break;
12807 		case KF_ARG_PTR_TO_LIST_HEAD:
12808 			if (reg->type != PTR_TO_MAP_VALUE &&
12809 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12810 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12811 				return -EINVAL;
12812 			}
12813 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12814 				verbose(env, "allocated object must be referenced\n");
12815 				return -EINVAL;
12816 			}
12817 			ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
12818 			if (ret < 0)
12819 				return ret;
12820 			break;
12821 		case KF_ARG_PTR_TO_RB_ROOT:
12822 			if (reg->type != PTR_TO_MAP_VALUE &&
12823 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12824 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12825 				return -EINVAL;
12826 			}
12827 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12828 				verbose(env, "allocated object must be referenced\n");
12829 				return -EINVAL;
12830 			}
12831 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
12832 			if (ret < 0)
12833 				return ret;
12834 			break;
12835 		case KF_ARG_PTR_TO_LIST_NODE:
12836 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12837 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
12838 				return -EINVAL;
12839 			}
12840 			if (!reg->ref_obj_id) {
12841 				verbose(env, "allocated object must be referenced\n");
12842 				return -EINVAL;
12843 			}
12844 			ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
12845 			if (ret < 0)
12846 				return ret;
12847 			break;
12848 		case KF_ARG_PTR_TO_RB_NODE:
12849 			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
12850 				if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
12851 					verbose(env, "rbtree_remove node input must be non-owning ref\n");
12852 					return -EINVAL;
12853 				}
12854 				if (in_rbtree_lock_required_cb(env)) {
12855 					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
12856 					return -EINVAL;
12857 				}
12858 			} else {
12859 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12860 					verbose(env, "arg#%d expected pointer to allocated object\n", i);
12861 					return -EINVAL;
12862 				}
12863 				if (!reg->ref_obj_id) {
12864 					verbose(env, "allocated object must be referenced\n");
12865 					return -EINVAL;
12866 				}
12867 			}
12868 
12869 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
12870 			if (ret < 0)
12871 				return ret;
12872 			break;
12873 		case KF_ARG_PTR_TO_MAP:
12874 			/* If argument has '__map' suffix expect 'struct bpf_map *' */
12875 			ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12876 			ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12877 			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12878 			fallthrough;
12879 		case KF_ARG_PTR_TO_BTF_ID:
12880 			/* Only base_type is checked, further checks are done here */
12881 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12882 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12883 			    !reg2btf_ids[base_type(reg->type)]) {
12884 				verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
12885 				verbose(env, "expected %s or socket\n",
12886 					reg_type_str(env, base_type(reg->type) |
12887 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12888 				return -EINVAL;
12889 			}
12890 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
12891 			if (ret < 0)
12892 				return ret;
12893 			break;
12894 		case KF_ARG_PTR_TO_MEM:
12895 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12896 			if (IS_ERR(resolve_ret)) {
12897 				verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
12898 					i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
12899 				return -EINVAL;
12900 			}
12901 			ret = check_mem_reg(env, reg, regno, type_size);
12902 			if (ret < 0)
12903 				return ret;
12904 			break;
12905 		case KF_ARG_PTR_TO_MEM_SIZE:
12906 		{
12907 			struct bpf_reg_state *buff_reg = &regs[regno];
12908 			const struct btf_param *buff_arg = &args[i];
12909 			struct bpf_reg_state *size_reg = &regs[regno + 1];
12910 			const struct btf_param *size_arg = &args[i + 1];
12911 
12912 			if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
12913 				ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
12914 				if (ret < 0) {
12915 					verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
12916 					return ret;
12917 				}
12918 			}
12919 
12920 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12921 				if (meta->arg_constant.found) {
12922 					verbose(env, "verifier internal error: only one constant argument permitted\n");
12923 					return -EFAULT;
12924 				}
12925 				if (!tnum_is_const(size_reg->var_off)) {
12926 					verbose(env, "R%d must be a known constant\n", regno + 1);
12927 					return -EINVAL;
12928 				}
12929 				meta->arg_constant.found = true;
12930 				meta->arg_constant.value = size_reg->var_off.value;
12931 			}
12932 
12933 			/* Skip next '__sz' or '__szk' argument */
12934 			i++;
12935 			break;
12936 		}
12937 		case KF_ARG_PTR_TO_CALLBACK:
12938 			if (reg->type != PTR_TO_FUNC) {
12939 				verbose(env, "arg%d expected pointer to func\n", i);
12940 				return -EINVAL;
12941 			}
12942 			meta->subprogno = reg->subprogno;
12943 			break;
12944 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12945 			if (!type_is_ptr_alloc_obj(reg->type)) {
12946 				verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
12947 				return -EINVAL;
12948 			}
12949 			if (!type_is_non_owning_ref(reg->type))
12950 				meta->arg_owning_ref = true;
12951 
12952 			rec = reg_btf_record(reg);
12953 			if (!rec) {
12954 				verbose(env, "verifier internal error: Couldn't find btf_record\n");
12955 				return -EFAULT;
12956 			}
12957 
12958 			if (rec->refcount_off < 0) {
12959 				verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
12960 				return -EINVAL;
12961 			}
12962 
12963 			meta->arg_btf = reg->btf;
12964 			meta->arg_btf_id = reg->btf_id;
12965 			break;
12966 		case KF_ARG_PTR_TO_CONST_STR:
12967 			if (reg->type != PTR_TO_MAP_VALUE) {
12968 				verbose(env, "arg#%d doesn't point to a const string\n", i);
12969 				return -EINVAL;
12970 			}
12971 			ret = check_reg_const_str(env, reg, regno);
12972 			if (ret)
12973 				return ret;
12974 			break;
12975 		case KF_ARG_PTR_TO_WORKQUEUE:
12976 			if (reg->type != PTR_TO_MAP_VALUE) {
12977 				verbose(env, "arg#%d doesn't point to a map value\n", i);
12978 				return -EINVAL;
12979 			}
12980 			ret = process_wq_func(env, regno, meta);
12981 			if (ret < 0)
12982 				return ret;
12983 			break;
12984 		case KF_ARG_PTR_TO_IRQ_FLAG:
12985 			if (reg->type != PTR_TO_STACK) {
12986 				verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i);
12987 				return -EINVAL;
12988 			}
12989 			ret = process_irq_flag(env, regno, meta);
12990 			if (ret < 0)
12991 				return ret;
12992 			break;
12993 		}
12994 	}
12995 
12996 	if (is_kfunc_release(meta) && !meta->release_regno) {
12997 		verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
12998 			func_name);
12999 		return -EINVAL;
13000 	}
13001 
13002 	return 0;
13003 }
13004 
fetch_kfunc_meta(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_kfunc_call_arg_meta * meta,const char ** kfunc_name)13005 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
13006 			    struct bpf_insn *insn,
13007 			    struct bpf_kfunc_call_arg_meta *meta,
13008 			    const char **kfunc_name)
13009 {
13010 	const struct btf_type *func, *func_proto;
13011 	u32 func_id, *kfunc_flags;
13012 	const char *func_name;
13013 	struct btf *desc_btf;
13014 
13015 	if (kfunc_name)
13016 		*kfunc_name = NULL;
13017 
13018 	if (!insn->imm)
13019 		return -EINVAL;
13020 
13021 	desc_btf = find_kfunc_desc_btf(env, insn->off);
13022 	if (IS_ERR(desc_btf))
13023 		return PTR_ERR(desc_btf);
13024 
13025 	func_id = insn->imm;
13026 	func = btf_type_by_id(desc_btf, func_id);
13027 	func_name = btf_name_by_offset(desc_btf, func->name_off);
13028 	if (kfunc_name)
13029 		*kfunc_name = func_name;
13030 	func_proto = btf_type_by_id(desc_btf, func->type);
13031 
13032 	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
13033 	if (!kfunc_flags) {
13034 		return -EACCES;
13035 	}
13036 
13037 	memset(meta, 0, sizeof(*meta));
13038 	meta->btf = desc_btf;
13039 	meta->func_id = func_id;
13040 	meta->kfunc_flags = *kfunc_flags;
13041 	meta->func_proto = func_proto;
13042 	meta->func_name = func_name;
13043 
13044 	return 0;
13045 }
13046 
13047 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
13048 
check_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)13049 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
13050 			    int *insn_idx_p)
13051 {
13052 	bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
13053 	u32 i, nargs, ptr_type_id, release_ref_obj_id;
13054 	struct bpf_reg_state *regs = cur_regs(env);
13055 	const char *func_name, *ptr_type_name;
13056 	const struct btf_type *t, *ptr_type;
13057 	struct bpf_kfunc_call_arg_meta meta;
13058 	struct bpf_insn_aux_data *insn_aux;
13059 	int err, insn_idx = *insn_idx_p;
13060 	const struct btf_param *args;
13061 	const struct btf_type *ret_t;
13062 	struct btf *desc_btf;
13063 
13064 	/* skip for now, but return error when we find this in fixup_kfunc_call */
13065 	if (!insn->imm)
13066 		return 0;
13067 
13068 	err = fetch_kfunc_meta(env, insn, &meta, &func_name);
13069 	if (err == -EACCES && func_name)
13070 		verbose(env, "calling kernel function %s is not allowed\n", func_name);
13071 	if (err)
13072 		return err;
13073 	desc_btf = meta.btf;
13074 	insn_aux = &env->insn_aux_data[insn_idx];
13075 
13076 	insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
13077 
13078 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
13079 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
13080 		return -EACCES;
13081 	}
13082 
13083 	sleepable = is_kfunc_sleepable(&meta);
13084 	if (sleepable && !in_sleepable(env)) {
13085 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
13086 		return -EACCES;
13087 	}
13088 
13089 	/* Check the arguments */
13090 	err = check_kfunc_args(env, &meta, insn_idx);
13091 	if (err < 0)
13092 		return err;
13093 
13094 	if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
13095 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13096 					 set_rbtree_add_callback_state);
13097 		if (err) {
13098 			verbose(env, "kfunc %s#%d failed callback verification\n",
13099 				func_name, meta.func_id);
13100 			return err;
13101 		}
13102 	}
13103 
13104 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
13105 		meta.r0_size = sizeof(u64);
13106 		meta.r0_rdonly = false;
13107 	}
13108 
13109 	if (is_bpf_wq_set_callback_impl_kfunc(meta.func_id)) {
13110 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13111 					 set_timer_callback_state);
13112 		if (err) {
13113 			verbose(env, "kfunc %s#%d failed callback verification\n",
13114 				func_name, meta.func_id);
13115 			return err;
13116 		}
13117 	}
13118 
13119 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
13120 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
13121 
13122 	preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
13123 	preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
13124 
13125 	if (env->cur_state->active_rcu_lock) {
13126 		struct bpf_func_state *state;
13127 		struct bpf_reg_state *reg;
13128 		u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
13129 
13130 		if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13131 			verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13132 			return -EACCES;
13133 		}
13134 
13135 		if (rcu_lock) {
13136 			verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
13137 			return -EINVAL;
13138 		} else if (rcu_unlock) {
13139 			bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
13140 				if (reg->type & MEM_RCU) {
13141 					reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
13142 					reg->type |= PTR_UNTRUSTED;
13143 				}
13144 			}));
13145 			env->cur_state->active_rcu_lock = false;
13146 		} else if (sleepable) {
13147 			verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
13148 			return -EACCES;
13149 		}
13150 	} else if (rcu_lock) {
13151 		env->cur_state->active_rcu_lock = true;
13152 	} else if (rcu_unlock) {
13153 		verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
13154 		return -EINVAL;
13155 	}
13156 
13157 	if (env->cur_state->active_preempt_locks) {
13158 		if (preempt_disable) {
13159 			env->cur_state->active_preempt_locks++;
13160 		} else if (preempt_enable) {
13161 			env->cur_state->active_preempt_locks--;
13162 		} else if (sleepable) {
13163 			verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name);
13164 			return -EACCES;
13165 		}
13166 	} else if (preempt_disable) {
13167 		env->cur_state->active_preempt_locks++;
13168 	} else if (preempt_enable) {
13169 		verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13170 		return -EINVAL;
13171 	}
13172 
13173 	if (env->cur_state->active_irq_id && sleepable) {
13174 		verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name);
13175 		return -EACCES;
13176 	}
13177 
13178 	/* In case of release function, we get register number of refcounted
13179 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13180 	 */
13181 	if (meta.release_regno) {
13182 		err = release_reference(env, regs[meta.release_regno].ref_obj_id);
13183 		if (err) {
13184 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13185 				func_name, meta.func_id);
13186 			return err;
13187 		}
13188 	}
13189 
13190 	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
13191 	    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
13192 	    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
13193 		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
13194 		insn_aux->insert_off = regs[BPF_REG_2].off;
13195 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13196 		err = ref_convert_owning_non_owning(env, release_ref_obj_id);
13197 		if (err) {
13198 			verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
13199 				func_name, meta.func_id);
13200 			return err;
13201 		}
13202 
13203 		err = release_reference(env, release_ref_obj_id);
13204 		if (err) {
13205 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13206 				func_name, meta.func_id);
13207 			return err;
13208 		}
13209 	}
13210 
13211 	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13212 		if (!bpf_jit_supports_exceptions()) {
13213 			verbose(env, "JIT does not support calling kfunc %s#%d\n",
13214 				func_name, meta.func_id);
13215 			return -ENOTSUPP;
13216 		}
13217 		env->seen_exception = true;
13218 
13219 		/* In the case of the default callback, the cookie value passed
13220 		 * to bpf_throw becomes the return value of the program.
13221 		 */
13222 		if (!env->exception_callback_subprog) {
13223 			err = check_return_code(env, BPF_REG_1, "R1");
13224 			if (err < 0)
13225 				return err;
13226 		}
13227 	}
13228 
13229 	for (i = 0; i < CALLER_SAVED_REGS; i++)
13230 		mark_reg_not_init(env, regs, caller_saved[i]);
13231 
13232 	/* Check return type */
13233 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13234 
13235 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13236 		/* Only exception is bpf_obj_new_impl */
13237 		if (meta.btf != btf_vmlinux ||
13238 		    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
13239 		     meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
13240 		     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
13241 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13242 			return -EINVAL;
13243 		}
13244 	}
13245 
13246 	if (btf_type_is_scalar(t)) {
13247 		mark_reg_unknown(env, regs, BPF_REG_0);
13248 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13249 	} else if (btf_type_is_ptr(t)) {
13250 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13251 
13252 		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
13253 			if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
13254 			    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13255 				struct btf_struct_meta *struct_meta;
13256 				struct btf *ret_btf;
13257 				u32 ret_btf_id;
13258 
13259 				if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
13260 					return -ENOMEM;
13261 
13262 				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
13263 					verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
13264 					return -EINVAL;
13265 				}
13266 
13267 				ret_btf = env->prog->aux->btf;
13268 				ret_btf_id = meta.arg_constant.value;
13269 
13270 				/* This may be NULL due to user not supplying a BTF */
13271 				if (!ret_btf) {
13272 					verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
13273 					return -EINVAL;
13274 				}
13275 
13276 				ret_t = btf_type_by_id(ret_btf, ret_btf_id);
13277 				if (!ret_t || !__btf_type_is_struct(ret_t)) {
13278 					verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
13279 					return -EINVAL;
13280 				}
13281 
13282 				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13283 					if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
13284 						verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
13285 							ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
13286 						return -EINVAL;
13287 					}
13288 
13289 					if (!bpf_global_percpu_ma_set) {
13290 						mutex_lock(&bpf_percpu_ma_lock);
13291 						if (!bpf_global_percpu_ma_set) {
13292 							/* Charge memory allocated with bpf_global_percpu_ma to
13293 							 * root memcg. The obj_cgroup for root memcg is NULL.
13294 							 */
13295 							err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
13296 							if (!err)
13297 								bpf_global_percpu_ma_set = true;
13298 						}
13299 						mutex_unlock(&bpf_percpu_ma_lock);
13300 						if (err)
13301 							return err;
13302 					}
13303 
13304 					mutex_lock(&bpf_percpu_ma_lock);
13305 					err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
13306 					mutex_unlock(&bpf_percpu_ma_lock);
13307 					if (err)
13308 						return err;
13309 				}
13310 
13311 				struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
13312 				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13313 					if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
13314 						verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
13315 						return -EINVAL;
13316 					}
13317 
13318 					if (struct_meta) {
13319 						verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
13320 						return -EINVAL;
13321 					}
13322 				}
13323 
13324 				mark_reg_known_zero(env, regs, BPF_REG_0);
13325 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
13326 				regs[BPF_REG_0].btf = ret_btf;
13327 				regs[BPF_REG_0].btf_id = ret_btf_id;
13328 				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
13329 					regs[BPF_REG_0].type |= MEM_PERCPU;
13330 
13331 				insn_aux->obj_new_size = ret_t->size;
13332 				insn_aux->kptr_struct_meta = struct_meta;
13333 			} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
13334 				mark_reg_known_zero(env, regs, BPF_REG_0);
13335 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
13336 				regs[BPF_REG_0].btf = meta.arg_btf;
13337 				regs[BPF_REG_0].btf_id = meta.arg_btf_id;
13338 
13339 				insn_aux->kptr_struct_meta =
13340 					btf_find_struct_meta(meta.arg_btf,
13341 							     meta.arg_btf_id);
13342 			} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
13343 				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
13344 				struct btf_field *field = meta.arg_list_head.field;
13345 
13346 				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
13347 			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
13348 				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
13349 				struct btf_field *field = meta.arg_rbtree_root.field;
13350 
13351 				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
13352 			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
13353 				mark_reg_known_zero(env, regs, BPF_REG_0);
13354 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
13355 				regs[BPF_REG_0].btf = desc_btf;
13356 				regs[BPF_REG_0].btf_id = meta.ret_btf_id;
13357 			} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
13358 				ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
13359 				if (!ret_t || !btf_type_is_struct(ret_t)) {
13360 					verbose(env,
13361 						"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
13362 					return -EINVAL;
13363 				}
13364 
13365 				mark_reg_known_zero(env, regs, BPF_REG_0);
13366 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
13367 				regs[BPF_REG_0].btf = desc_btf;
13368 				regs[BPF_REG_0].btf_id = meta.arg_constant.value;
13369 			} else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
13370 				   meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
13371 				enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
13372 
13373 				mark_reg_known_zero(env, regs, BPF_REG_0);
13374 
13375 				if (!meta.arg_constant.found) {
13376 					verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
13377 					return -EFAULT;
13378 				}
13379 
13380 				regs[BPF_REG_0].mem_size = meta.arg_constant.value;
13381 
13382 				/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
13383 				regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
13384 
13385 				if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
13386 					regs[BPF_REG_0].type |= MEM_RDONLY;
13387 				} else {
13388 					/* this will set env->seen_direct_write to true */
13389 					if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
13390 						verbose(env, "the prog does not allow writes to packet data\n");
13391 						return -EINVAL;
13392 					}
13393 				}
13394 
13395 				if (!meta.initialized_dynptr.id) {
13396 					verbose(env, "verifier internal error: no dynptr id\n");
13397 					return -EFAULT;
13398 				}
13399 				regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
13400 
13401 				/* we don't need to set BPF_REG_0's ref obj id
13402 				 * because packet slices are not refcounted (see
13403 				 * dynptr_type_refcounted)
13404 				 */
13405 			} else {
13406 				verbose(env, "kernel function %s unhandled dynamic return type\n",
13407 					meta.func_name);
13408 				return -EFAULT;
13409 			}
13410 		} else if (btf_type_is_void(ptr_type)) {
13411 			/* kfunc returning 'void *' is equivalent to returning scalar */
13412 			mark_reg_unknown(env, regs, BPF_REG_0);
13413 		} else if (!__btf_type_is_struct(ptr_type)) {
13414 			if (!meta.r0_size) {
13415 				__u32 sz;
13416 
13417 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13418 					meta.r0_size = sz;
13419 					meta.r0_rdonly = true;
13420 				}
13421 			}
13422 			if (!meta.r0_size) {
13423 				ptr_type_name = btf_name_by_offset(desc_btf,
13424 								   ptr_type->name_off);
13425 				verbose(env,
13426 					"kernel function %s returns pointer type %s %s is not supported\n",
13427 					func_name,
13428 					btf_type_str(ptr_type),
13429 					ptr_type_name);
13430 				return -EINVAL;
13431 			}
13432 
13433 			mark_reg_known_zero(env, regs, BPF_REG_0);
13434 			regs[BPF_REG_0].type = PTR_TO_MEM;
13435 			regs[BPF_REG_0].mem_size = meta.r0_size;
13436 
13437 			if (meta.r0_rdonly)
13438 				regs[BPF_REG_0].type |= MEM_RDONLY;
13439 
13440 			/* Ensures we don't access the memory after a release_reference() */
13441 			if (meta.ref_obj_id)
13442 				regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
13443 		} else {
13444 			mark_reg_known_zero(env, regs, BPF_REG_0);
13445 			regs[BPF_REG_0].btf = desc_btf;
13446 			regs[BPF_REG_0].type = PTR_TO_BTF_ID;
13447 			regs[BPF_REG_0].btf_id = ptr_type_id;
13448 
13449 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13450 				regs[BPF_REG_0].type |= PTR_UNTRUSTED;
13451 
13452 			if (is_iter_next_kfunc(&meta)) {
13453 				struct bpf_reg_state *cur_iter;
13454 
13455 				cur_iter = get_iter_from_state(env->cur_state, &meta);
13456 
13457 				if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
13458 					regs[BPF_REG_0].type |= MEM_RCU;
13459 				else
13460 					regs[BPF_REG_0].type |= PTR_TRUSTED;
13461 			}
13462 		}
13463 
13464 		if (is_kfunc_ret_null(&meta)) {
13465 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13466 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13467 			regs[BPF_REG_0].id = ++env->id_gen;
13468 		}
13469 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13470 		if (is_kfunc_acquire(&meta)) {
13471 			int id = acquire_reference(env, insn_idx);
13472 
13473 			if (id < 0)
13474 				return id;
13475 			if (is_kfunc_ret_null(&meta))
13476 				regs[BPF_REG_0].id = id;
13477 			regs[BPF_REG_0].ref_obj_id = id;
13478 		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
13479 			ref_set_non_owning(env, &regs[BPF_REG_0]);
13480 		}
13481 
13482 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
13483 			regs[BPF_REG_0].id = ++env->id_gen;
13484 	} else if (btf_type_is_void(t)) {
13485 		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
13486 			if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
13487 			    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
13488 				insn_aux->kptr_struct_meta =
13489 					btf_find_struct_meta(meta.arg_btf,
13490 							     meta.arg_btf_id);
13491 			}
13492 		}
13493 	}
13494 
13495 	nargs = btf_type_vlen(meta.func_proto);
13496 	args = (const struct btf_param *)(meta.func_proto + 1);
13497 	for (i = 0; i < nargs; i++) {
13498 		u32 regno = i + 1;
13499 
13500 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13501 		if (btf_type_is_ptr(t))
13502 			mark_btf_func_reg_size(env, regno, sizeof(void *));
13503 		else
13504 			/* scalar. ensured by btf_check_kfunc_arg_match() */
13505 			mark_btf_func_reg_size(env, regno, t->size);
13506 	}
13507 
13508 	if (is_iter_next_kfunc(&meta)) {
13509 		err = process_iter_next_call(env, insn_idx, &meta);
13510 		if (err)
13511 			return err;
13512 	}
13513 
13514 	return 0;
13515 }
13516 
check_reg_sane_offset(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,enum bpf_reg_type type)13517 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
13518 				  const struct bpf_reg_state *reg,
13519 				  enum bpf_reg_type type)
13520 {
13521 	bool known = tnum_is_const(reg->var_off);
13522 	s64 val = reg->var_off.value;
13523 	s64 smin = reg->smin_value;
13524 
13525 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13526 		verbose(env, "math between %s pointer and %lld is not allowed\n",
13527 			reg_type_str(env, type), val);
13528 		return false;
13529 	}
13530 
13531 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
13532 		verbose(env, "%s pointer offset %d is not allowed\n",
13533 			reg_type_str(env, type), reg->off);
13534 		return false;
13535 	}
13536 
13537 	if (smin == S64_MIN) {
13538 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13539 			reg_type_str(env, type));
13540 		return false;
13541 	}
13542 
13543 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13544 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
13545 			smin, reg_type_str(env, type));
13546 		return false;
13547 	}
13548 
13549 	return true;
13550 }
13551 
13552 enum {
13553 	REASON_BOUNDS	= -1,
13554 	REASON_TYPE	= -2,
13555 	REASON_PATHS	= -3,
13556 	REASON_LIMIT	= -4,
13557 	REASON_STACK	= -5,
13558 };
13559 
retrieve_ptr_limit(const struct bpf_reg_state * ptr_reg,u32 * alu_limit,bool mask_to_left)13560 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13561 			      u32 *alu_limit, bool mask_to_left)
13562 {
13563 	u32 max = 0, ptr_limit = 0;
13564 
13565 	switch (ptr_reg->type) {
13566 	case PTR_TO_STACK:
13567 		/* Offset 0 is out-of-bounds, but acceptable start for the
13568 		 * left direction, see BPF_REG_FP. Also, unknown scalar
13569 		 * offset where we would need to deal with min/max bounds is
13570 		 * currently prohibited for unprivileged.
13571 		 */
13572 		max = MAX_BPF_STACK + mask_to_left;
13573 		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
13574 		break;
13575 	case PTR_TO_MAP_VALUE:
13576 		max = ptr_reg->map_ptr->value_size;
13577 		ptr_limit = (mask_to_left ?
13578 			     ptr_reg->smin_value :
13579 			     ptr_reg->umax_value) + ptr_reg->off;
13580 		break;
13581 	default:
13582 		return REASON_TYPE;
13583 	}
13584 
13585 	if (ptr_limit >= max)
13586 		return REASON_LIMIT;
13587 	*alu_limit = ptr_limit;
13588 	return 0;
13589 }
13590 
can_skip_alu_sanitation(const struct bpf_verifier_env * env,const struct bpf_insn * insn)13591 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13592 				    const struct bpf_insn *insn)
13593 {
13594 	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
13595 }
13596 
update_alu_sanitation_state(struct bpf_insn_aux_data * aux,u32 alu_state,u32 alu_limit)13597 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
13598 				       u32 alu_state, u32 alu_limit)
13599 {
13600 	/* If we arrived here from different branches with different
13601 	 * state or limits to sanitize, then this won't work.
13602 	 */
13603 	if (aux->alu_state &&
13604 	    (aux->alu_state != alu_state ||
13605 	     aux->alu_limit != alu_limit))
13606 		return REASON_PATHS;
13607 
13608 	/* Corresponding fixup done in do_misc_fixups(). */
13609 	aux->alu_state = alu_state;
13610 	aux->alu_limit = alu_limit;
13611 	return 0;
13612 }
13613 
sanitize_val_alu(struct bpf_verifier_env * env,struct bpf_insn * insn)13614 static int sanitize_val_alu(struct bpf_verifier_env *env,
13615 			    struct bpf_insn *insn)
13616 {
13617 	struct bpf_insn_aux_data *aux = cur_aux(env);
13618 
13619 	if (can_skip_alu_sanitation(env, insn))
13620 		return 0;
13621 
13622 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
13623 }
13624 
sanitize_needed(u8 opcode)13625 static bool sanitize_needed(u8 opcode)
13626 {
13627 	return opcode == BPF_ADD || opcode == BPF_SUB;
13628 }
13629 
13630 struct bpf_sanitize_info {
13631 	struct bpf_insn_aux_data aux;
13632 	bool mask_to_left;
13633 };
13634 
13635 static struct bpf_verifier_state *
sanitize_speculative_path(struct bpf_verifier_env * env,const struct bpf_insn * insn,u32 next_idx,u32 curr_idx)13636 sanitize_speculative_path(struct bpf_verifier_env *env,
13637 			  const struct bpf_insn *insn,
13638 			  u32 next_idx, u32 curr_idx)
13639 {
13640 	struct bpf_verifier_state *branch;
13641 	struct bpf_reg_state *regs;
13642 
13643 	branch = push_stack(env, next_idx, curr_idx, true);
13644 	if (branch && insn) {
13645 		regs = branch->frame[branch->curframe]->regs;
13646 		if (BPF_SRC(insn->code) == BPF_K) {
13647 			mark_reg_unknown(env, regs, insn->dst_reg);
13648 		} else if (BPF_SRC(insn->code) == BPF_X) {
13649 			mark_reg_unknown(env, regs, insn->dst_reg);
13650 			mark_reg_unknown(env, regs, insn->src_reg);
13651 		}
13652 	}
13653 	return branch;
13654 }
13655 
sanitize_ptr_alu(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg,struct bpf_reg_state * dst_reg,struct bpf_sanitize_info * info,const bool commit_window)13656 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
13657 			    struct bpf_insn *insn,
13658 			    const struct bpf_reg_state *ptr_reg,
13659 			    const struct bpf_reg_state *off_reg,
13660 			    struct bpf_reg_state *dst_reg,
13661 			    struct bpf_sanitize_info *info,
13662 			    const bool commit_window)
13663 {
13664 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
13665 	struct bpf_verifier_state *vstate = env->cur_state;
13666 	bool off_is_imm = tnum_is_const(off_reg->var_off);
13667 	bool off_is_neg = off_reg->smin_value < 0;
13668 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
13669 	u8 opcode = BPF_OP(insn->code);
13670 	u32 alu_state, alu_limit;
13671 	struct bpf_reg_state tmp;
13672 	bool ret;
13673 	int err;
13674 
13675 	if (can_skip_alu_sanitation(env, insn))
13676 		return 0;
13677 
13678 	/* We already marked aux for masking from non-speculative
13679 	 * paths, thus we got here in the first place. We only care
13680 	 * to explore bad access from here.
13681 	 */
13682 	if (vstate->speculative)
13683 		goto do_sim;
13684 
13685 	if (!commit_window) {
13686 		if (!tnum_is_const(off_reg->var_off) &&
13687 		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
13688 			return REASON_BOUNDS;
13689 
13690 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
13691 				     (opcode == BPF_SUB && !off_is_neg);
13692 	}
13693 
13694 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
13695 	if (err < 0)
13696 		return err;
13697 
13698 	if (commit_window) {
13699 		/* In commit phase we narrow the masking window based on
13700 		 * the observed pointer move after the simulated operation.
13701 		 */
13702 		alu_state = info->aux.alu_state;
13703 		alu_limit = abs(info->aux.alu_limit - alu_limit);
13704 	} else {
13705 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
13706 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
13707 		alu_state |= ptr_is_dst_reg ?
13708 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
13709 
13710 		/* Limit pruning on unknown scalars to enable deep search for
13711 		 * potential masking differences from other program paths.
13712 		 */
13713 		if (!off_is_imm)
13714 			env->explore_alu_limits = true;
13715 	}
13716 
13717 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
13718 	if (err < 0)
13719 		return err;
13720 do_sim:
13721 	/* If we're in commit phase, we're done here given we already
13722 	 * pushed the truncated dst_reg into the speculative verification
13723 	 * stack.
13724 	 *
13725 	 * Also, when register is a known constant, we rewrite register-based
13726 	 * operation to immediate-based, and thus do not need masking (and as
13727 	 * a consequence, do not need to simulate the zero-truncation either).
13728 	 */
13729 	if (commit_window || off_is_imm)
13730 		return 0;
13731 
13732 	/* Simulate and find potential out-of-bounds access under
13733 	 * speculative execution from truncation as a result of
13734 	 * masking when off was not within expected range. If off
13735 	 * sits in dst, then we temporarily need to move ptr there
13736 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
13737 	 * for cases where we use K-based arithmetic in one direction
13738 	 * and truncated reg-based in the other in order to explore
13739 	 * bad access.
13740 	 */
13741 	if (!ptr_is_dst_reg) {
13742 		tmp = *dst_reg;
13743 		copy_register_state(dst_reg, ptr_reg);
13744 	}
13745 	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
13746 					env->insn_idx);
13747 	if (!ptr_is_dst_reg && ret)
13748 		*dst_reg = tmp;
13749 	return !ret ? REASON_STACK : 0;
13750 }
13751 
sanitize_mark_insn_seen(struct bpf_verifier_env * env)13752 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13753 {
13754 	struct bpf_verifier_state *vstate = env->cur_state;
13755 
13756 	/* If we simulate paths under speculation, we don't update the
13757 	 * insn as 'seen' such that when we verify unreachable paths in
13758 	 * the non-speculative domain, sanitize_dead_code() can still
13759 	 * rewrite/sanitize them.
13760 	 */
13761 	if (!vstate->speculative)
13762 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13763 }
13764 
sanitize_err(struct bpf_verifier_env * env,const struct bpf_insn * insn,int reason,const struct bpf_reg_state * off_reg,const struct bpf_reg_state * dst_reg)13765 static int sanitize_err(struct bpf_verifier_env *env,
13766 			const struct bpf_insn *insn, int reason,
13767 			const struct bpf_reg_state *off_reg,
13768 			const struct bpf_reg_state *dst_reg)
13769 {
13770 	static const char *err = "pointer arithmetic with it prohibited for !root";
13771 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13772 	u32 dst = insn->dst_reg, src = insn->src_reg;
13773 
13774 	switch (reason) {
13775 	case REASON_BOUNDS:
13776 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13777 			off_reg == dst_reg ? dst : src, err);
13778 		break;
13779 	case REASON_TYPE:
13780 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13781 			off_reg == dst_reg ? src : dst, err);
13782 		break;
13783 	case REASON_PATHS:
13784 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13785 			dst, op, err);
13786 		break;
13787 	case REASON_LIMIT:
13788 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13789 			dst, op, err);
13790 		break;
13791 	case REASON_STACK:
13792 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13793 			dst, err);
13794 		break;
13795 	default:
13796 		verbose(env, "verifier internal error: unknown reason (%d)\n",
13797 			reason);
13798 		break;
13799 	}
13800 
13801 	return -EACCES;
13802 }
13803 
13804 /* check that stack access falls within stack limits and that 'reg' doesn't
13805  * have a variable offset.
13806  *
13807  * Variable offset is prohibited for unprivileged mode for simplicity since it
13808  * requires corresponding support in Spectre masking for stack ALU.  See also
13809  * retrieve_ptr_limit().
13810  *
13811  *
13812  * 'off' includes 'reg->off'.
13813  */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env * env,int regno,const struct bpf_reg_state * reg,int off)13814 static int check_stack_access_for_ptr_arithmetic(
13815 				struct bpf_verifier_env *env,
13816 				int regno,
13817 				const struct bpf_reg_state *reg,
13818 				int off)
13819 {
13820 	if (!tnum_is_const(reg->var_off)) {
13821 		char tn_buf[48];
13822 
13823 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13824 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13825 			regno, tn_buf, off);
13826 		return -EACCES;
13827 	}
13828 
13829 	if (off >= 0 || off < -MAX_BPF_STACK) {
13830 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
13831 			"prohibited for !root; off=%d\n", regno, off);
13832 		return -EACCES;
13833 	}
13834 
13835 	return 0;
13836 }
13837 
sanitize_check_bounds(struct bpf_verifier_env * env,const struct bpf_insn * insn,const struct bpf_reg_state * dst_reg)13838 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13839 				 const struct bpf_insn *insn,
13840 				 const struct bpf_reg_state *dst_reg)
13841 {
13842 	u32 dst = insn->dst_reg;
13843 
13844 	/* For unprivileged we require that resulting offset must be in bounds
13845 	 * in order to be able to sanitize access later on.
13846 	 */
13847 	if (env->bypass_spec_v1)
13848 		return 0;
13849 
13850 	switch (dst_reg->type) {
13851 	case PTR_TO_STACK:
13852 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13853 					dst_reg->off + dst_reg->var_off.value))
13854 			return -EACCES;
13855 		break;
13856 	case PTR_TO_MAP_VALUE:
13857 		if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
13858 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13859 				"prohibited for !root\n", dst);
13860 			return -EACCES;
13861 		}
13862 		break;
13863 	default:
13864 		break;
13865 	}
13866 
13867 	return 0;
13868 }
13869 
13870 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13871  * Caller should also handle BPF_MOV case separately.
13872  * If we return -EACCES, caller may want to try again treating pointer as a
13873  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13874  */
adjust_ptr_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg)13875 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13876 				   struct bpf_insn *insn,
13877 				   const struct bpf_reg_state *ptr_reg,
13878 				   const struct bpf_reg_state *off_reg)
13879 {
13880 	struct bpf_verifier_state *vstate = env->cur_state;
13881 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13882 	struct bpf_reg_state *regs = state->regs, *dst_reg;
13883 	bool known = tnum_is_const(off_reg->var_off);
13884 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
13885 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
13886 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
13887 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
13888 	struct bpf_sanitize_info info = {};
13889 	u8 opcode = BPF_OP(insn->code);
13890 	u32 dst = insn->dst_reg;
13891 	int ret;
13892 
13893 	dst_reg = &regs[dst];
13894 
13895 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13896 	    smin_val > smax_val || umin_val > umax_val) {
13897 		/* Taint dst register if offset had invalid bounds derived from
13898 		 * e.g. dead branches.
13899 		 */
13900 		__mark_reg_unknown(env, dst_reg);
13901 		return 0;
13902 	}
13903 
13904 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
13905 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
13906 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13907 			__mark_reg_unknown(env, dst_reg);
13908 			return 0;
13909 		}
13910 
13911 		verbose(env,
13912 			"R%d 32-bit pointer arithmetic prohibited\n",
13913 			dst);
13914 		return -EACCES;
13915 	}
13916 
13917 	if (ptr_reg->type & PTR_MAYBE_NULL) {
13918 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13919 			dst, reg_type_str(env, ptr_reg->type));
13920 		return -EACCES;
13921 	}
13922 
13923 	switch (base_type(ptr_reg->type)) {
13924 	case PTR_TO_CTX:
13925 	case PTR_TO_MAP_VALUE:
13926 	case PTR_TO_MAP_KEY:
13927 	case PTR_TO_STACK:
13928 	case PTR_TO_PACKET_META:
13929 	case PTR_TO_PACKET:
13930 	case PTR_TO_TP_BUFFER:
13931 	case PTR_TO_BTF_ID:
13932 	case PTR_TO_MEM:
13933 	case PTR_TO_BUF:
13934 	case PTR_TO_FUNC:
13935 	case CONST_PTR_TO_DYNPTR:
13936 		break;
13937 	case PTR_TO_FLOW_KEYS:
13938 		if (known)
13939 			break;
13940 		fallthrough;
13941 	case CONST_PTR_TO_MAP:
13942 		/* smin_val represents the known value */
13943 		if (known && smin_val == 0 && opcode == BPF_ADD)
13944 			break;
13945 		fallthrough;
13946 	default:
13947 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13948 			dst, reg_type_str(env, ptr_reg->type));
13949 		return -EACCES;
13950 	}
13951 
13952 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13953 	 * The id may be overwritten later if we create a new variable offset.
13954 	 */
13955 	dst_reg->type = ptr_reg->type;
13956 	dst_reg->id = ptr_reg->id;
13957 
13958 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
13959 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
13960 		return -EINVAL;
13961 
13962 	/* pointer types do not carry 32-bit bounds at the moment. */
13963 	__mark_reg32_unbounded(dst_reg);
13964 
13965 	if (sanitize_needed(opcode)) {
13966 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13967 				       &info, false);
13968 		if (ret < 0)
13969 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13970 	}
13971 
13972 	switch (opcode) {
13973 	case BPF_ADD:
13974 		/* We can take a fixed offset as long as it doesn't overflow
13975 		 * the s32 'off' field
13976 		 */
13977 		if (known && (ptr_reg->off + smin_val ==
13978 			      (s64)(s32)(ptr_reg->off + smin_val))) {
13979 			/* pointer += K.  Accumulate it into fixed offset */
13980 			dst_reg->smin_value = smin_ptr;
13981 			dst_reg->smax_value = smax_ptr;
13982 			dst_reg->umin_value = umin_ptr;
13983 			dst_reg->umax_value = umax_ptr;
13984 			dst_reg->var_off = ptr_reg->var_off;
13985 			dst_reg->off = ptr_reg->off + smin_val;
13986 			dst_reg->raw = ptr_reg->raw;
13987 			break;
13988 		}
13989 		/* A new variable offset is created.  Note that off_reg->off
13990 		 * == 0, since it's a scalar.
13991 		 * dst_reg gets the pointer type and since some positive
13992 		 * integer value was added to the pointer, give it a new 'id'
13993 		 * if it's a PTR_TO_PACKET.
13994 		 * this creates a new 'base' pointer, off_reg (variable) gets
13995 		 * added into the variable offset, and we copy the fixed offset
13996 		 * from ptr_reg.
13997 		 */
13998 		if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) ||
13999 		    check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) {
14000 			dst_reg->smin_value = S64_MIN;
14001 			dst_reg->smax_value = S64_MAX;
14002 		}
14003 		if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) ||
14004 		    check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) {
14005 			dst_reg->umin_value = 0;
14006 			dst_reg->umax_value = U64_MAX;
14007 		}
14008 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
14009 		dst_reg->off = ptr_reg->off;
14010 		dst_reg->raw = ptr_reg->raw;
14011 		if (reg_is_pkt_pointer(ptr_reg)) {
14012 			dst_reg->id = ++env->id_gen;
14013 			/* something was added to pkt_ptr, set range to zero */
14014 			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
14015 		}
14016 		break;
14017 	case BPF_SUB:
14018 		if (dst_reg == off_reg) {
14019 			/* scalar -= pointer.  Creates an unknown scalar */
14020 			verbose(env, "R%d tried to subtract pointer from scalar\n",
14021 				dst);
14022 			return -EACCES;
14023 		}
14024 		/* We don't allow subtraction from FP, because (according to
14025 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
14026 		 * be able to deal with it.
14027 		 */
14028 		if (ptr_reg->type == PTR_TO_STACK) {
14029 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
14030 				dst);
14031 			return -EACCES;
14032 		}
14033 		if (known && (ptr_reg->off - smin_val ==
14034 			      (s64)(s32)(ptr_reg->off - smin_val))) {
14035 			/* pointer -= K.  Subtract it from fixed offset */
14036 			dst_reg->smin_value = smin_ptr;
14037 			dst_reg->smax_value = smax_ptr;
14038 			dst_reg->umin_value = umin_ptr;
14039 			dst_reg->umax_value = umax_ptr;
14040 			dst_reg->var_off = ptr_reg->var_off;
14041 			dst_reg->id = ptr_reg->id;
14042 			dst_reg->off = ptr_reg->off - smin_val;
14043 			dst_reg->raw = ptr_reg->raw;
14044 			break;
14045 		}
14046 		/* A new variable offset is created.  If the subtrahend is known
14047 		 * nonnegative, then any reg->range we had before is still good.
14048 		 */
14049 		if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) ||
14050 		    check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) {
14051 			/* Overflow possible, we know nothing */
14052 			dst_reg->smin_value = S64_MIN;
14053 			dst_reg->smax_value = S64_MAX;
14054 		}
14055 		if (umin_ptr < umax_val) {
14056 			/* Overflow possible, we know nothing */
14057 			dst_reg->umin_value = 0;
14058 			dst_reg->umax_value = U64_MAX;
14059 		} else {
14060 			/* Cannot overflow (as long as bounds are consistent) */
14061 			dst_reg->umin_value = umin_ptr - umax_val;
14062 			dst_reg->umax_value = umax_ptr - umin_val;
14063 		}
14064 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
14065 		dst_reg->off = ptr_reg->off;
14066 		dst_reg->raw = ptr_reg->raw;
14067 		if (reg_is_pkt_pointer(ptr_reg)) {
14068 			dst_reg->id = ++env->id_gen;
14069 			/* something was added to pkt_ptr, set range to zero */
14070 			if (smin_val < 0)
14071 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
14072 		}
14073 		break;
14074 	case BPF_AND:
14075 	case BPF_OR:
14076 	case BPF_XOR:
14077 		/* bitwise ops on pointers are troublesome, prohibit. */
14078 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
14079 			dst, bpf_alu_string[opcode >> 4]);
14080 		return -EACCES;
14081 	default:
14082 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
14083 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
14084 			dst, bpf_alu_string[opcode >> 4]);
14085 		return -EACCES;
14086 	}
14087 
14088 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
14089 		return -EINVAL;
14090 	reg_bounds_sync(dst_reg);
14091 	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
14092 		return -EACCES;
14093 	if (sanitize_needed(opcode)) {
14094 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
14095 				       &info, true);
14096 		if (ret < 0)
14097 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
14098 	}
14099 
14100 	return 0;
14101 }
14102 
scalar32_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14103 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
14104 				 struct bpf_reg_state *src_reg)
14105 {
14106 	s32 *dst_smin = &dst_reg->s32_min_value;
14107 	s32 *dst_smax = &dst_reg->s32_max_value;
14108 	u32 *dst_umin = &dst_reg->u32_min_value;
14109 	u32 *dst_umax = &dst_reg->u32_max_value;
14110 
14111 	if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
14112 	    check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
14113 		*dst_smin = S32_MIN;
14114 		*dst_smax = S32_MAX;
14115 	}
14116 	if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) ||
14117 	    check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) {
14118 		*dst_umin = 0;
14119 		*dst_umax = U32_MAX;
14120 	}
14121 }
14122 
scalar_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14123 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
14124 			       struct bpf_reg_state *src_reg)
14125 {
14126 	s64 *dst_smin = &dst_reg->smin_value;
14127 	s64 *dst_smax = &dst_reg->smax_value;
14128 	u64 *dst_umin = &dst_reg->umin_value;
14129 	u64 *dst_umax = &dst_reg->umax_value;
14130 
14131 	if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
14132 	    check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
14133 		*dst_smin = S64_MIN;
14134 		*dst_smax = S64_MAX;
14135 	}
14136 	if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) ||
14137 	    check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) {
14138 		*dst_umin = 0;
14139 		*dst_umax = U64_MAX;
14140 	}
14141 }
14142 
scalar32_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14143 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
14144 				 struct bpf_reg_state *src_reg)
14145 {
14146 	s32 *dst_smin = &dst_reg->s32_min_value;
14147 	s32 *dst_smax = &dst_reg->s32_max_value;
14148 	u32 umin_val = src_reg->u32_min_value;
14149 	u32 umax_val = src_reg->u32_max_value;
14150 
14151 	if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
14152 	    check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
14153 		/* Overflow possible, we know nothing */
14154 		*dst_smin = S32_MIN;
14155 		*dst_smax = S32_MAX;
14156 	}
14157 	if (dst_reg->u32_min_value < umax_val) {
14158 		/* Overflow possible, we know nothing */
14159 		dst_reg->u32_min_value = 0;
14160 		dst_reg->u32_max_value = U32_MAX;
14161 	} else {
14162 		/* Cannot overflow (as long as bounds are consistent) */
14163 		dst_reg->u32_min_value -= umax_val;
14164 		dst_reg->u32_max_value -= umin_val;
14165 	}
14166 }
14167 
scalar_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14168 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
14169 			       struct bpf_reg_state *src_reg)
14170 {
14171 	s64 *dst_smin = &dst_reg->smin_value;
14172 	s64 *dst_smax = &dst_reg->smax_value;
14173 	u64 umin_val = src_reg->umin_value;
14174 	u64 umax_val = src_reg->umax_value;
14175 
14176 	if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
14177 	    check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
14178 		/* Overflow possible, we know nothing */
14179 		*dst_smin = S64_MIN;
14180 		*dst_smax = S64_MAX;
14181 	}
14182 	if (dst_reg->umin_value < umax_val) {
14183 		/* Overflow possible, we know nothing */
14184 		dst_reg->umin_value = 0;
14185 		dst_reg->umax_value = U64_MAX;
14186 	} else {
14187 		/* Cannot overflow (as long as bounds are consistent) */
14188 		dst_reg->umin_value -= umax_val;
14189 		dst_reg->umax_value -= umin_val;
14190 	}
14191 }
14192 
scalar32_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14193 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
14194 				 struct bpf_reg_state *src_reg)
14195 {
14196 	s32 *dst_smin = &dst_reg->s32_min_value;
14197 	s32 *dst_smax = &dst_reg->s32_max_value;
14198 	u32 *dst_umin = &dst_reg->u32_min_value;
14199 	u32 *dst_umax = &dst_reg->u32_max_value;
14200 	s32 tmp_prod[4];
14201 
14202 	if (check_mul_overflow(*dst_umax, src_reg->u32_max_value, dst_umax) ||
14203 	    check_mul_overflow(*dst_umin, src_reg->u32_min_value, dst_umin)) {
14204 		/* Overflow possible, we know nothing */
14205 		*dst_umin = 0;
14206 		*dst_umax = U32_MAX;
14207 	}
14208 	if (check_mul_overflow(*dst_smin, src_reg->s32_min_value, &tmp_prod[0]) ||
14209 	    check_mul_overflow(*dst_smin, src_reg->s32_max_value, &tmp_prod[1]) ||
14210 	    check_mul_overflow(*dst_smax, src_reg->s32_min_value, &tmp_prod[2]) ||
14211 	    check_mul_overflow(*dst_smax, src_reg->s32_max_value, &tmp_prod[3])) {
14212 		/* Overflow possible, we know nothing */
14213 		*dst_smin = S32_MIN;
14214 		*dst_smax = S32_MAX;
14215 	} else {
14216 		*dst_smin = min_array(tmp_prod, 4);
14217 		*dst_smax = max_array(tmp_prod, 4);
14218 	}
14219 }
14220 
scalar_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14221 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
14222 			       struct bpf_reg_state *src_reg)
14223 {
14224 	s64 *dst_smin = &dst_reg->smin_value;
14225 	s64 *dst_smax = &dst_reg->smax_value;
14226 	u64 *dst_umin = &dst_reg->umin_value;
14227 	u64 *dst_umax = &dst_reg->umax_value;
14228 	s64 tmp_prod[4];
14229 
14230 	if (check_mul_overflow(*dst_umax, src_reg->umax_value, dst_umax) ||
14231 	    check_mul_overflow(*dst_umin, src_reg->umin_value, dst_umin)) {
14232 		/* Overflow possible, we know nothing */
14233 		*dst_umin = 0;
14234 		*dst_umax = U64_MAX;
14235 	}
14236 	if (check_mul_overflow(*dst_smin, src_reg->smin_value, &tmp_prod[0]) ||
14237 	    check_mul_overflow(*dst_smin, src_reg->smax_value, &tmp_prod[1]) ||
14238 	    check_mul_overflow(*dst_smax, src_reg->smin_value, &tmp_prod[2]) ||
14239 	    check_mul_overflow(*dst_smax, src_reg->smax_value, &tmp_prod[3])) {
14240 		/* Overflow possible, we know nothing */
14241 		*dst_smin = S64_MIN;
14242 		*dst_smax = S64_MAX;
14243 	} else {
14244 		*dst_smin = min_array(tmp_prod, 4);
14245 		*dst_smax = max_array(tmp_prod, 4);
14246 	}
14247 }
14248 
scalar32_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14249 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14250 				 struct bpf_reg_state *src_reg)
14251 {
14252 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14253 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14254 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14255 	u32 umax_val = src_reg->u32_max_value;
14256 
14257 	if (src_known && dst_known) {
14258 		__mark_reg32_known(dst_reg, var32_off.value);
14259 		return;
14260 	}
14261 
14262 	/* We get our minimum from the var_off, since that's inherently
14263 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14264 	 */
14265 	dst_reg->u32_min_value = var32_off.value;
14266 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
14267 
14268 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14269 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14270 	 */
14271 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14272 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14273 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14274 	} else {
14275 		dst_reg->s32_min_value = S32_MIN;
14276 		dst_reg->s32_max_value = S32_MAX;
14277 	}
14278 }
14279 
scalar_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14280 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14281 			       struct bpf_reg_state *src_reg)
14282 {
14283 	bool src_known = tnum_is_const(src_reg->var_off);
14284 	bool dst_known = tnum_is_const(dst_reg->var_off);
14285 	u64 umax_val = src_reg->umax_value;
14286 
14287 	if (src_known && dst_known) {
14288 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14289 		return;
14290 	}
14291 
14292 	/* We get our minimum from the var_off, since that's inherently
14293 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14294 	 */
14295 	dst_reg->umin_value = dst_reg->var_off.value;
14296 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
14297 
14298 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14299 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14300 	 */
14301 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14302 		dst_reg->smin_value = dst_reg->umin_value;
14303 		dst_reg->smax_value = dst_reg->umax_value;
14304 	} else {
14305 		dst_reg->smin_value = S64_MIN;
14306 		dst_reg->smax_value = S64_MAX;
14307 	}
14308 	/* We may learn something more from the var_off */
14309 	__update_reg_bounds(dst_reg);
14310 }
14311 
scalar32_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14312 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14313 				struct bpf_reg_state *src_reg)
14314 {
14315 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14316 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14317 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14318 	u32 umin_val = src_reg->u32_min_value;
14319 
14320 	if (src_known && dst_known) {
14321 		__mark_reg32_known(dst_reg, var32_off.value);
14322 		return;
14323 	}
14324 
14325 	/* We get our maximum from the var_off, and our minimum is the
14326 	 * maximum of the operands' minima
14327 	 */
14328 	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
14329 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14330 
14331 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14332 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14333 	 */
14334 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14335 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14336 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14337 	} else {
14338 		dst_reg->s32_min_value = S32_MIN;
14339 		dst_reg->s32_max_value = S32_MAX;
14340 	}
14341 }
14342 
scalar_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14343 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14344 			      struct bpf_reg_state *src_reg)
14345 {
14346 	bool src_known = tnum_is_const(src_reg->var_off);
14347 	bool dst_known = tnum_is_const(dst_reg->var_off);
14348 	u64 umin_val = src_reg->umin_value;
14349 
14350 	if (src_known && dst_known) {
14351 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14352 		return;
14353 	}
14354 
14355 	/* We get our maximum from the var_off, and our minimum is the
14356 	 * maximum of the operands' minima
14357 	 */
14358 	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
14359 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14360 
14361 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14362 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14363 	 */
14364 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14365 		dst_reg->smin_value = dst_reg->umin_value;
14366 		dst_reg->smax_value = dst_reg->umax_value;
14367 	} else {
14368 		dst_reg->smin_value = S64_MIN;
14369 		dst_reg->smax_value = S64_MAX;
14370 	}
14371 	/* We may learn something more from the var_off */
14372 	__update_reg_bounds(dst_reg);
14373 }
14374 
scalar32_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14375 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14376 				 struct bpf_reg_state *src_reg)
14377 {
14378 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14379 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14380 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14381 
14382 	if (src_known && dst_known) {
14383 		__mark_reg32_known(dst_reg, var32_off.value);
14384 		return;
14385 	}
14386 
14387 	/* We get both minimum and maximum from the var32_off. */
14388 	dst_reg->u32_min_value = var32_off.value;
14389 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14390 
14391 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14392 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14393 	 */
14394 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14395 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14396 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14397 	} else {
14398 		dst_reg->s32_min_value = S32_MIN;
14399 		dst_reg->s32_max_value = S32_MAX;
14400 	}
14401 }
14402 
scalar_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14403 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14404 			       struct bpf_reg_state *src_reg)
14405 {
14406 	bool src_known = tnum_is_const(src_reg->var_off);
14407 	bool dst_known = tnum_is_const(dst_reg->var_off);
14408 
14409 	if (src_known && dst_known) {
14410 		/* dst_reg->var_off.value has been updated earlier */
14411 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14412 		return;
14413 	}
14414 
14415 	/* We get both minimum and maximum from the var_off. */
14416 	dst_reg->umin_value = dst_reg->var_off.value;
14417 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14418 
14419 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14420 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14421 	 */
14422 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14423 		dst_reg->smin_value = dst_reg->umin_value;
14424 		dst_reg->smax_value = dst_reg->umax_value;
14425 	} else {
14426 		dst_reg->smin_value = S64_MIN;
14427 		dst_reg->smax_value = S64_MAX;
14428 	}
14429 
14430 	__update_reg_bounds(dst_reg);
14431 }
14432 
__scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)14433 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14434 				   u64 umin_val, u64 umax_val)
14435 {
14436 	/* We lose all sign bit information (except what we can pick
14437 	 * up from var_off)
14438 	 */
14439 	dst_reg->s32_min_value = S32_MIN;
14440 	dst_reg->s32_max_value = S32_MAX;
14441 	/* If we might shift our top bit out, then we know nothing */
14442 	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
14443 		dst_reg->u32_min_value = 0;
14444 		dst_reg->u32_max_value = U32_MAX;
14445 	} else {
14446 		dst_reg->u32_min_value <<= umin_val;
14447 		dst_reg->u32_max_value <<= umax_val;
14448 	}
14449 }
14450 
scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14451 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14452 				 struct bpf_reg_state *src_reg)
14453 {
14454 	u32 umax_val = src_reg->u32_max_value;
14455 	u32 umin_val = src_reg->u32_min_value;
14456 	/* u32 alu operation will zext upper bits */
14457 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14458 
14459 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14460 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14461 	/* Not required but being careful mark reg64 bounds as unknown so
14462 	 * that we are forced to pick them up from tnum and zext later and
14463 	 * if some path skips this step we are still safe.
14464 	 */
14465 	__mark_reg64_unbounded(dst_reg);
14466 	__update_reg32_bounds(dst_reg);
14467 }
14468 
__scalar64_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)14469 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14470 				   u64 umin_val, u64 umax_val)
14471 {
14472 	/* Special case <<32 because it is a common compiler pattern to sign
14473 	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
14474 	 * positive we know this shift will also be positive so we can track
14475 	 * bounds correctly. Otherwise we lose all sign bit information except
14476 	 * what we can pick up from var_off. Perhaps we can generalize this
14477 	 * later to shifts of any length.
14478 	 */
14479 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
14480 		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
14481 	else
14482 		dst_reg->smax_value = S64_MAX;
14483 
14484 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
14485 		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
14486 	else
14487 		dst_reg->smin_value = S64_MIN;
14488 
14489 	/* If we might shift our top bit out, then we know nothing */
14490 	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
14491 		dst_reg->umin_value = 0;
14492 		dst_reg->umax_value = U64_MAX;
14493 	} else {
14494 		dst_reg->umin_value <<= umin_val;
14495 		dst_reg->umax_value <<= umax_val;
14496 	}
14497 }
14498 
scalar_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14499 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14500 			       struct bpf_reg_state *src_reg)
14501 {
14502 	u64 umax_val = src_reg->umax_value;
14503 	u64 umin_val = src_reg->umin_value;
14504 
14505 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
14506 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14507 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14508 
14509 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14510 	/* We may learn something more from the var_off */
14511 	__update_reg_bounds(dst_reg);
14512 }
14513 
scalar32_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14514 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14515 				 struct bpf_reg_state *src_reg)
14516 {
14517 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14518 	u32 umax_val = src_reg->u32_max_value;
14519 	u32 umin_val = src_reg->u32_min_value;
14520 
14521 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14522 	 * be negative, then either:
14523 	 * 1) src_reg might be zero, so the sign bit of the result is
14524 	 *    unknown, so we lose our signed bounds
14525 	 * 2) it's known negative, thus the unsigned bounds capture the
14526 	 *    signed bounds
14527 	 * 3) the signed bounds cross zero, so they tell us nothing
14528 	 *    about the result
14529 	 * If the value in dst_reg is known nonnegative, then again the
14530 	 * unsigned bounds capture the signed bounds.
14531 	 * Thus, in all cases it suffices to blow away our signed bounds
14532 	 * and rely on inferring new ones from the unsigned bounds and
14533 	 * var_off of the result.
14534 	 */
14535 	dst_reg->s32_min_value = S32_MIN;
14536 	dst_reg->s32_max_value = S32_MAX;
14537 
14538 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
14539 	dst_reg->u32_min_value >>= umax_val;
14540 	dst_reg->u32_max_value >>= umin_val;
14541 
14542 	__mark_reg64_unbounded(dst_reg);
14543 	__update_reg32_bounds(dst_reg);
14544 }
14545 
scalar_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14546 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14547 			       struct bpf_reg_state *src_reg)
14548 {
14549 	u64 umax_val = src_reg->umax_value;
14550 	u64 umin_val = src_reg->umin_value;
14551 
14552 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14553 	 * be negative, then either:
14554 	 * 1) src_reg might be zero, so the sign bit of the result is
14555 	 *    unknown, so we lose our signed bounds
14556 	 * 2) it's known negative, thus the unsigned bounds capture the
14557 	 *    signed bounds
14558 	 * 3) the signed bounds cross zero, so they tell us nothing
14559 	 *    about the result
14560 	 * If the value in dst_reg is known nonnegative, then again the
14561 	 * unsigned bounds capture the signed bounds.
14562 	 * Thus, in all cases it suffices to blow away our signed bounds
14563 	 * and rely on inferring new ones from the unsigned bounds and
14564 	 * var_off of the result.
14565 	 */
14566 	dst_reg->smin_value = S64_MIN;
14567 	dst_reg->smax_value = S64_MAX;
14568 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14569 	dst_reg->umin_value >>= umax_val;
14570 	dst_reg->umax_value >>= umin_val;
14571 
14572 	/* Its not easy to operate on alu32 bounds here because it depends
14573 	 * on bits being shifted in. Take easy way out and mark unbounded
14574 	 * so we can recalculate later from tnum.
14575 	 */
14576 	__mark_reg32_unbounded(dst_reg);
14577 	__update_reg_bounds(dst_reg);
14578 }
14579 
scalar32_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14580 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14581 				  struct bpf_reg_state *src_reg)
14582 {
14583 	u64 umin_val = src_reg->u32_min_value;
14584 
14585 	/* Upon reaching here, src_known is true and
14586 	 * umax_val is equal to umin_val.
14587 	 */
14588 	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
14589 	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
14590 
14591 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14592 
14593 	/* blow away the dst_reg umin_value/umax_value and rely on
14594 	 * dst_reg var_off to refine the result.
14595 	 */
14596 	dst_reg->u32_min_value = 0;
14597 	dst_reg->u32_max_value = U32_MAX;
14598 
14599 	__mark_reg64_unbounded(dst_reg);
14600 	__update_reg32_bounds(dst_reg);
14601 }
14602 
scalar_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14603 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
14604 				struct bpf_reg_state *src_reg)
14605 {
14606 	u64 umin_val = src_reg->umin_value;
14607 
14608 	/* Upon reaching here, src_known is true and umax_val is equal
14609 	 * to umin_val.
14610 	 */
14611 	dst_reg->smin_value >>= umin_val;
14612 	dst_reg->smax_value >>= umin_val;
14613 
14614 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
14615 
14616 	/* blow away the dst_reg umin_value/umax_value and rely on
14617 	 * dst_reg var_off to refine the result.
14618 	 */
14619 	dst_reg->umin_value = 0;
14620 	dst_reg->umax_value = U64_MAX;
14621 
14622 	/* Its not easy to operate on alu32 bounds here because it depends
14623 	 * on bits being shifted in from upper 32-bits. Take easy way out
14624 	 * and mark unbounded so we can recalculate later from tnum.
14625 	 */
14626 	__mark_reg32_unbounded(dst_reg);
14627 	__update_reg_bounds(dst_reg);
14628 }
14629 
is_safe_to_compute_dst_reg_range(struct bpf_insn * insn,const struct bpf_reg_state * src_reg)14630 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
14631 					     const struct bpf_reg_state *src_reg)
14632 {
14633 	bool src_is_const = false;
14634 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
14635 
14636 	if (insn_bitness == 32) {
14637 		if (tnum_subreg_is_const(src_reg->var_off)
14638 		    && src_reg->s32_min_value == src_reg->s32_max_value
14639 		    && src_reg->u32_min_value == src_reg->u32_max_value)
14640 			src_is_const = true;
14641 	} else {
14642 		if (tnum_is_const(src_reg->var_off)
14643 		    && src_reg->smin_value == src_reg->smax_value
14644 		    && src_reg->umin_value == src_reg->umax_value)
14645 			src_is_const = true;
14646 	}
14647 
14648 	switch (BPF_OP(insn->code)) {
14649 	case BPF_ADD:
14650 	case BPF_SUB:
14651 	case BPF_AND:
14652 	case BPF_XOR:
14653 	case BPF_OR:
14654 	case BPF_MUL:
14655 		return true;
14656 
14657 	/* Shift operators range is only computable if shift dimension operand
14658 	 * is a constant. Shifts greater than 31 or 63 are undefined. This
14659 	 * includes shifts by a negative number.
14660 	 */
14661 	case BPF_LSH:
14662 	case BPF_RSH:
14663 	case BPF_ARSH:
14664 		return (src_is_const && src_reg->umax_value < insn_bitness);
14665 	default:
14666 		return false;
14667 	}
14668 }
14669 
14670 /* WARNING: This function does calculations on 64-bit values, but the actual
14671  * execution may occur on 32-bit values. Therefore, things like bitshifts
14672  * need extra checks in the 32-bit case.
14673  */
adjust_scalar_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state src_reg)14674 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
14675 				      struct bpf_insn *insn,
14676 				      struct bpf_reg_state *dst_reg,
14677 				      struct bpf_reg_state src_reg)
14678 {
14679 	u8 opcode = BPF_OP(insn->code);
14680 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14681 	int ret;
14682 
14683 	if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
14684 		__mark_reg_unknown(env, dst_reg);
14685 		return 0;
14686 	}
14687 
14688 	if (sanitize_needed(opcode)) {
14689 		ret = sanitize_val_alu(env, insn);
14690 		if (ret < 0)
14691 			return sanitize_err(env, insn, ret, NULL, NULL);
14692 	}
14693 
14694 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
14695 	 * There are two classes of instructions: The first class we track both
14696 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
14697 	 * greatest amount of precision when alu operations are mixed with jmp32
14698 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
14699 	 * and BPF_OR. This is possible because these ops have fairly easy to
14700 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
14701 	 * See alu32 verifier tests for examples. The second class of
14702 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
14703 	 * with regards to tracking sign/unsigned bounds because the bits may
14704 	 * cross subreg boundaries in the alu64 case. When this happens we mark
14705 	 * the reg unbounded in the subreg bound space and use the resulting
14706 	 * tnum to calculate an approximation of the sign/unsigned bounds.
14707 	 */
14708 	switch (opcode) {
14709 	case BPF_ADD:
14710 		scalar32_min_max_add(dst_reg, &src_reg);
14711 		scalar_min_max_add(dst_reg, &src_reg);
14712 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
14713 		break;
14714 	case BPF_SUB:
14715 		scalar32_min_max_sub(dst_reg, &src_reg);
14716 		scalar_min_max_sub(dst_reg, &src_reg);
14717 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
14718 		break;
14719 	case BPF_MUL:
14720 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
14721 		scalar32_min_max_mul(dst_reg, &src_reg);
14722 		scalar_min_max_mul(dst_reg, &src_reg);
14723 		break;
14724 	case BPF_AND:
14725 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
14726 		scalar32_min_max_and(dst_reg, &src_reg);
14727 		scalar_min_max_and(dst_reg, &src_reg);
14728 		break;
14729 	case BPF_OR:
14730 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
14731 		scalar32_min_max_or(dst_reg, &src_reg);
14732 		scalar_min_max_or(dst_reg, &src_reg);
14733 		break;
14734 	case BPF_XOR:
14735 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
14736 		scalar32_min_max_xor(dst_reg, &src_reg);
14737 		scalar_min_max_xor(dst_reg, &src_reg);
14738 		break;
14739 	case BPF_LSH:
14740 		if (alu32)
14741 			scalar32_min_max_lsh(dst_reg, &src_reg);
14742 		else
14743 			scalar_min_max_lsh(dst_reg, &src_reg);
14744 		break;
14745 	case BPF_RSH:
14746 		if (alu32)
14747 			scalar32_min_max_rsh(dst_reg, &src_reg);
14748 		else
14749 			scalar_min_max_rsh(dst_reg, &src_reg);
14750 		break;
14751 	case BPF_ARSH:
14752 		if (alu32)
14753 			scalar32_min_max_arsh(dst_reg, &src_reg);
14754 		else
14755 			scalar_min_max_arsh(dst_reg, &src_reg);
14756 		break;
14757 	default:
14758 		break;
14759 	}
14760 
14761 	/* ALU32 ops are zero extended into 64bit register */
14762 	if (alu32)
14763 		zext_32_to_64(dst_reg);
14764 	reg_bounds_sync(dst_reg);
14765 	return 0;
14766 }
14767 
14768 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
14769  * and var_off.
14770  */
adjust_reg_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn)14771 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
14772 				   struct bpf_insn *insn)
14773 {
14774 	struct bpf_verifier_state *vstate = env->cur_state;
14775 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
14776 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
14777 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
14778 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14779 	u8 opcode = BPF_OP(insn->code);
14780 	int err;
14781 
14782 	dst_reg = &regs[insn->dst_reg];
14783 	src_reg = NULL;
14784 
14785 	if (dst_reg->type == PTR_TO_ARENA) {
14786 		struct bpf_insn_aux_data *aux = cur_aux(env);
14787 
14788 		if (BPF_CLASS(insn->code) == BPF_ALU64)
14789 			/*
14790 			 * 32-bit operations zero upper bits automatically.
14791 			 * 64-bit operations need to be converted to 32.
14792 			 */
14793 			aux->needs_zext = true;
14794 
14795 		/* Any arithmetic operations are allowed on arena pointers */
14796 		return 0;
14797 	}
14798 
14799 	if (dst_reg->type != SCALAR_VALUE)
14800 		ptr_reg = dst_reg;
14801 
14802 	if (BPF_SRC(insn->code) == BPF_X) {
14803 		src_reg = &regs[insn->src_reg];
14804 		if (src_reg->type != SCALAR_VALUE) {
14805 			if (dst_reg->type != SCALAR_VALUE) {
14806 				/* Combining two pointers by any ALU op yields
14807 				 * an arbitrary scalar. Disallow all math except
14808 				 * pointer subtraction
14809 				 */
14810 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14811 					mark_reg_unknown(env, regs, insn->dst_reg);
14812 					return 0;
14813 				}
14814 				verbose(env, "R%d pointer %s pointer prohibited\n",
14815 					insn->dst_reg,
14816 					bpf_alu_string[opcode >> 4]);
14817 				return -EACCES;
14818 			} else {
14819 				/* scalar += pointer
14820 				 * This is legal, but we have to reverse our
14821 				 * src/dest handling in computing the range
14822 				 */
14823 				err = mark_chain_precision(env, insn->dst_reg);
14824 				if (err)
14825 					return err;
14826 				return adjust_ptr_min_max_vals(env, insn,
14827 							       src_reg, dst_reg);
14828 			}
14829 		} else if (ptr_reg) {
14830 			/* pointer += scalar */
14831 			err = mark_chain_precision(env, insn->src_reg);
14832 			if (err)
14833 				return err;
14834 			return adjust_ptr_min_max_vals(env, insn,
14835 						       dst_reg, src_reg);
14836 		} else if (dst_reg->precise) {
14837 			/* if dst_reg is precise, src_reg should be precise as well */
14838 			err = mark_chain_precision(env, insn->src_reg);
14839 			if (err)
14840 				return err;
14841 		}
14842 	} else {
14843 		/* Pretend the src is a reg with a known value, since we only
14844 		 * need to be able to read from this state.
14845 		 */
14846 		off_reg.type = SCALAR_VALUE;
14847 		__mark_reg_known(&off_reg, insn->imm);
14848 		src_reg = &off_reg;
14849 		if (ptr_reg) /* pointer += K */
14850 			return adjust_ptr_min_max_vals(env, insn,
14851 						       ptr_reg, src_reg);
14852 	}
14853 
14854 	/* Got here implies adding two SCALAR_VALUEs */
14855 	if (WARN_ON_ONCE(ptr_reg)) {
14856 		print_verifier_state(env, vstate, vstate->curframe, true);
14857 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
14858 		return -EINVAL;
14859 	}
14860 	if (WARN_ON(!src_reg)) {
14861 		print_verifier_state(env, vstate, vstate->curframe, true);
14862 		verbose(env, "verifier internal error: no src_reg\n");
14863 		return -EINVAL;
14864 	}
14865 	err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
14866 	if (err)
14867 		return err;
14868 	/*
14869 	 * Compilers can generate the code
14870 	 * r1 = r2
14871 	 * r1 += 0x1
14872 	 * if r2 < 1000 goto ...
14873 	 * use r1 in memory access
14874 	 * So for 64-bit alu remember constant delta between r2 and r1 and
14875 	 * update r1 after 'if' condition.
14876 	 */
14877 	if (env->bpf_capable &&
14878 	    BPF_OP(insn->code) == BPF_ADD && !alu32 &&
14879 	    dst_reg->id && is_reg_const(src_reg, false)) {
14880 		u64 val = reg_const_value(src_reg, false);
14881 
14882 		if ((dst_reg->id & BPF_ADD_CONST) ||
14883 		    /* prevent overflow in sync_linked_regs() later */
14884 		    val > (u32)S32_MAX) {
14885 			/*
14886 			 * If the register already went through rX += val
14887 			 * we cannot accumulate another val into rx->off.
14888 			 */
14889 			dst_reg->off = 0;
14890 			dst_reg->id = 0;
14891 		} else {
14892 			dst_reg->id |= BPF_ADD_CONST;
14893 			dst_reg->off = val;
14894 		}
14895 	} else {
14896 		/*
14897 		 * Make sure ID is cleared otherwise dst_reg min/max could be
14898 		 * incorrectly propagated into other registers by sync_linked_regs()
14899 		 */
14900 		dst_reg->id = 0;
14901 	}
14902 	return 0;
14903 }
14904 
14905 /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env * env,struct bpf_insn * insn)14906 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
14907 {
14908 	struct bpf_reg_state *regs = cur_regs(env);
14909 	u8 opcode = BPF_OP(insn->code);
14910 	int err;
14911 
14912 	if (opcode == BPF_END || opcode == BPF_NEG) {
14913 		if (opcode == BPF_NEG) {
14914 			if (BPF_SRC(insn->code) != BPF_K ||
14915 			    insn->src_reg != BPF_REG_0 ||
14916 			    insn->off != 0 || insn->imm != 0) {
14917 				verbose(env, "BPF_NEG uses reserved fields\n");
14918 				return -EINVAL;
14919 			}
14920 		} else {
14921 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
14922 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
14923 			    (BPF_CLASS(insn->code) == BPF_ALU64 &&
14924 			     BPF_SRC(insn->code) != BPF_TO_LE)) {
14925 				verbose(env, "BPF_END uses reserved fields\n");
14926 				return -EINVAL;
14927 			}
14928 		}
14929 
14930 		/* check src operand */
14931 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14932 		if (err)
14933 			return err;
14934 
14935 		if (is_pointer_value(env, insn->dst_reg)) {
14936 			verbose(env, "R%d pointer arithmetic prohibited\n",
14937 				insn->dst_reg);
14938 			return -EACCES;
14939 		}
14940 
14941 		/* check dest operand */
14942 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
14943 		if (err)
14944 			return err;
14945 
14946 	} else if (opcode == BPF_MOV) {
14947 
14948 		if (BPF_SRC(insn->code) == BPF_X) {
14949 			if (BPF_CLASS(insn->code) == BPF_ALU) {
14950 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
14951 				    insn->imm) {
14952 					verbose(env, "BPF_MOV uses reserved fields\n");
14953 					return -EINVAL;
14954 				}
14955 			} else if (insn->off == BPF_ADDR_SPACE_CAST) {
14956 				if (insn->imm != 1 && insn->imm != 1u << 16) {
14957 					verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
14958 					return -EINVAL;
14959 				}
14960 				if (!env->prog->aux->arena) {
14961 					verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
14962 					return -EINVAL;
14963 				}
14964 			} else {
14965 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
14966 				     insn->off != 32) || insn->imm) {
14967 					verbose(env, "BPF_MOV uses reserved fields\n");
14968 					return -EINVAL;
14969 				}
14970 			}
14971 
14972 			/* check src operand */
14973 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
14974 			if (err)
14975 				return err;
14976 		} else {
14977 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
14978 				verbose(env, "BPF_MOV uses reserved fields\n");
14979 				return -EINVAL;
14980 			}
14981 		}
14982 
14983 		/* check dest operand, mark as required later */
14984 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14985 		if (err)
14986 			return err;
14987 
14988 		if (BPF_SRC(insn->code) == BPF_X) {
14989 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
14990 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
14991 
14992 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
14993 				if (insn->imm) {
14994 					/* off == BPF_ADDR_SPACE_CAST */
14995 					mark_reg_unknown(env, regs, insn->dst_reg);
14996 					if (insn->imm == 1) { /* cast from as(1) to as(0) */
14997 						dst_reg->type = PTR_TO_ARENA;
14998 						/* PTR_TO_ARENA is 32-bit */
14999 						dst_reg->subreg_def = env->insn_idx + 1;
15000 					}
15001 				} else if (insn->off == 0) {
15002 					/* case: R1 = R2
15003 					 * copy register state to dest reg
15004 					 */
15005 					assign_scalar_id_before_mov(env, src_reg);
15006 					copy_register_state(dst_reg, src_reg);
15007 					dst_reg->live |= REG_LIVE_WRITTEN;
15008 					dst_reg->subreg_def = DEF_NOT_SUBREG;
15009 				} else {
15010 					/* case: R1 = (s8, s16 s32)R2 */
15011 					if (is_pointer_value(env, insn->src_reg)) {
15012 						verbose(env,
15013 							"R%d sign-extension part of pointer\n",
15014 							insn->src_reg);
15015 						return -EACCES;
15016 					} else if (src_reg->type == SCALAR_VALUE) {
15017 						bool no_sext;
15018 
15019 						no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15020 						if (no_sext)
15021 							assign_scalar_id_before_mov(env, src_reg);
15022 						copy_register_state(dst_reg, src_reg);
15023 						if (!no_sext)
15024 							dst_reg->id = 0;
15025 						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
15026 						dst_reg->live |= REG_LIVE_WRITTEN;
15027 						dst_reg->subreg_def = DEF_NOT_SUBREG;
15028 					} else {
15029 						mark_reg_unknown(env, regs, insn->dst_reg);
15030 					}
15031 				}
15032 			} else {
15033 				/* R1 = (u32) R2 */
15034 				if (is_pointer_value(env, insn->src_reg)) {
15035 					verbose(env,
15036 						"R%d partial copy of pointer\n",
15037 						insn->src_reg);
15038 					return -EACCES;
15039 				} else if (src_reg->type == SCALAR_VALUE) {
15040 					if (insn->off == 0) {
15041 						bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
15042 
15043 						if (is_src_reg_u32)
15044 							assign_scalar_id_before_mov(env, src_reg);
15045 						copy_register_state(dst_reg, src_reg);
15046 						/* Make sure ID is cleared if src_reg is not in u32
15047 						 * range otherwise dst_reg min/max could be incorrectly
15048 						 * propagated into src_reg by sync_linked_regs()
15049 						 */
15050 						if (!is_src_reg_u32)
15051 							dst_reg->id = 0;
15052 						dst_reg->live |= REG_LIVE_WRITTEN;
15053 						dst_reg->subreg_def = env->insn_idx + 1;
15054 					} else {
15055 						/* case: W1 = (s8, s16)W2 */
15056 						bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15057 
15058 						if (no_sext)
15059 							assign_scalar_id_before_mov(env, src_reg);
15060 						copy_register_state(dst_reg, src_reg);
15061 						if (!no_sext)
15062 							dst_reg->id = 0;
15063 						dst_reg->live |= REG_LIVE_WRITTEN;
15064 						dst_reg->subreg_def = env->insn_idx + 1;
15065 						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
15066 					}
15067 				} else {
15068 					mark_reg_unknown(env, regs,
15069 							 insn->dst_reg);
15070 				}
15071 				zext_32_to_64(dst_reg);
15072 				reg_bounds_sync(dst_reg);
15073 			}
15074 		} else {
15075 			/* case: R = imm
15076 			 * remember the value we stored into this reg
15077 			 */
15078 			/* clear any state __mark_reg_known doesn't set */
15079 			mark_reg_unknown(env, regs, insn->dst_reg);
15080 			regs[insn->dst_reg].type = SCALAR_VALUE;
15081 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15082 				__mark_reg_known(regs + insn->dst_reg,
15083 						 insn->imm);
15084 			} else {
15085 				__mark_reg_known(regs + insn->dst_reg,
15086 						 (u32)insn->imm);
15087 			}
15088 		}
15089 
15090 	} else if (opcode > BPF_END) {
15091 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
15092 		return -EINVAL;
15093 
15094 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
15095 
15096 		if (BPF_SRC(insn->code) == BPF_X) {
15097 			if (insn->imm != 0 || insn->off > 1 ||
15098 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
15099 				verbose(env, "BPF_ALU uses reserved fields\n");
15100 				return -EINVAL;
15101 			}
15102 			/* check src1 operand */
15103 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15104 			if (err)
15105 				return err;
15106 		} else {
15107 			if (insn->src_reg != BPF_REG_0 || insn->off > 1 ||
15108 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
15109 				verbose(env, "BPF_ALU uses reserved fields\n");
15110 				return -EINVAL;
15111 			}
15112 		}
15113 
15114 		/* check src2 operand */
15115 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15116 		if (err)
15117 			return err;
15118 
15119 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15120 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15121 			verbose(env, "div by zero\n");
15122 			return -EINVAL;
15123 		}
15124 
15125 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15126 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15127 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15128 
15129 			if (insn->imm < 0 || insn->imm >= size) {
15130 				verbose(env, "invalid shift %d\n", insn->imm);
15131 				return -EINVAL;
15132 			}
15133 		}
15134 
15135 		/* check dest operand */
15136 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15137 		err = err ?: adjust_reg_min_max_vals(env, insn);
15138 		if (err)
15139 			return err;
15140 	}
15141 
15142 	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
15143 }
15144 
find_good_pkt_pointers(struct bpf_verifier_state * vstate,struct bpf_reg_state * dst_reg,enum bpf_reg_type type,bool range_right_open)15145 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15146 				   struct bpf_reg_state *dst_reg,
15147 				   enum bpf_reg_type type,
15148 				   bool range_right_open)
15149 {
15150 	struct bpf_func_state *state;
15151 	struct bpf_reg_state *reg;
15152 	int new_range;
15153 
15154 	if (dst_reg->off < 0 ||
15155 	    (dst_reg->off == 0 && range_right_open))
15156 		/* This doesn't give us any range */
15157 		return;
15158 
15159 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
15160 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
15161 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
15162 		 * than pkt_end, but that's because it's also less than pkt.
15163 		 */
15164 		return;
15165 
15166 	new_range = dst_reg->off;
15167 	if (range_right_open)
15168 		new_range++;
15169 
15170 	/* Examples for register markings:
15171 	 *
15172 	 * pkt_data in dst register:
15173 	 *
15174 	 *   r2 = r3;
15175 	 *   r2 += 8;
15176 	 *   if (r2 > pkt_end) goto <handle exception>
15177 	 *   <access okay>
15178 	 *
15179 	 *   r2 = r3;
15180 	 *   r2 += 8;
15181 	 *   if (r2 < pkt_end) goto <access okay>
15182 	 *   <handle exception>
15183 	 *
15184 	 *   Where:
15185 	 *     r2 == dst_reg, pkt_end == src_reg
15186 	 *     r2=pkt(id=n,off=8,r=0)
15187 	 *     r3=pkt(id=n,off=0,r=0)
15188 	 *
15189 	 * pkt_data in src register:
15190 	 *
15191 	 *   r2 = r3;
15192 	 *   r2 += 8;
15193 	 *   if (pkt_end >= r2) goto <access okay>
15194 	 *   <handle exception>
15195 	 *
15196 	 *   r2 = r3;
15197 	 *   r2 += 8;
15198 	 *   if (pkt_end <= r2) goto <handle exception>
15199 	 *   <access okay>
15200 	 *
15201 	 *   Where:
15202 	 *     pkt_end == dst_reg, r2 == src_reg
15203 	 *     r2=pkt(id=n,off=8,r=0)
15204 	 *     r3=pkt(id=n,off=0,r=0)
15205 	 *
15206 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15207 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15208 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
15209 	 * the check.
15210 	 */
15211 
15212 	/* If our ids match, then we must have the same max_value.  And we
15213 	 * don't care about the other reg's fixed offset, since if it's too big
15214 	 * the range won't allow anything.
15215 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
15216 	 */
15217 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15218 		if (reg->type == type && reg->id == dst_reg->id)
15219 			/* keep the maximum range already checked */
15220 			reg->range = max(reg->range, new_range);
15221 	}));
15222 }
15223 
15224 /*
15225  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15226  */
is_scalar_branch_taken(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15227 static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15228 				  u8 opcode, bool is_jmp32)
15229 {
15230 	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15231 	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15232 	u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
15233 	u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
15234 	s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
15235 	s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
15236 	u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
15237 	u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
15238 	s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
15239 	s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
15240 
15241 	switch (opcode) {
15242 	case BPF_JEQ:
15243 		/* constants, umin/umax and smin/smax checks would be
15244 		 * redundant in this case because they all should match
15245 		 */
15246 		if (tnum_is_const(t1) && tnum_is_const(t2))
15247 			return t1.value == t2.value;
15248 		/* non-overlapping ranges */
15249 		if (umin1 > umax2 || umax1 < umin2)
15250 			return 0;
15251 		if (smin1 > smax2 || smax1 < smin2)
15252 			return 0;
15253 		if (!is_jmp32) {
15254 			/* if 64-bit ranges are inconclusive, see if we can
15255 			 * utilize 32-bit subrange knowledge to eliminate
15256 			 * branches that can't be taken a priori
15257 			 */
15258 			if (reg1->u32_min_value > reg2->u32_max_value ||
15259 			    reg1->u32_max_value < reg2->u32_min_value)
15260 				return 0;
15261 			if (reg1->s32_min_value > reg2->s32_max_value ||
15262 			    reg1->s32_max_value < reg2->s32_min_value)
15263 				return 0;
15264 		}
15265 		break;
15266 	case BPF_JNE:
15267 		/* constants, umin/umax and smin/smax checks would be
15268 		 * redundant in this case because they all should match
15269 		 */
15270 		if (tnum_is_const(t1) && tnum_is_const(t2))
15271 			return t1.value != t2.value;
15272 		/* non-overlapping ranges */
15273 		if (umin1 > umax2 || umax1 < umin2)
15274 			return 1;
15275 		if (smin1 > smax2 || smax1 < smin2)
15276 			return 1;
15277 		if (!is_jmp32) {
15278 			/* if 64-bit ranges are inconclusive, see if we can
15279 			 * utilize 32-bit subrange knowledge to eliminate
15280 			 * branches that can't be taken a priori
15281 			 */
15282 			if (reg1->u32_min_value > reg2->u32_max_value ||
15283 			    reg1->u32_max_value < reg2->u32_min_value)
15284 				return 1;
15285 			if (reg1->s32_min_value > reg2->s32_max_value ||
15286 			    reg1->s32_max_value < reg2->s32_min_value)
15287 				return 1;
15288 		}
15289 		break;
15290 	case BPF_JSET:
15291 		if (!is_reg_const(reg2, is_jmp32)) {
15292 			swap(reg1, reg2);
15293 			swap(t1, t2);
15294 		}
15295 		if (!is_reg_const(reg2, is_jmp32))
15296 			return -1;
15297 		if ((~t1.mask & t1.value) & t2.value)
15298 			return 1;
15299 		if (!((t1.mask | t1.value) & t2.value))
15300 			return 0;
15301 		break;
15302 	case BPF_JGT:
15303 		if (umin1 > umax2)
15304 			return 1;
15305 		else if (umax1 <= umin2)
15306 			return 0;
15307 		break;
15308 	case BPF_JSGT:
15309 		if (smin1 > smax2)
15310 			return 1;
15311 		else if (smax1 <= smin2)
15312 			return 0;
15313 		break;
15314 	case BPF_JLT:
15315 		if (umax1 < umin2)
15316 			return 1;
15317 		else if (umin1 >= umax2)
15318 			return 0;
15319 		break;
15320 	case BPF_JSLT:
15321 		if (smax1 < smin2)
15322 			return 1;
15323 		else if (smin1 >= smax2)
15324 			return 0;
15325 		break;
15326 	case BPF_JGE:
15327 		if (umin1 >= umax2)
15328 			return 1;
15329 		else if (umax1 < umin2)
15330 			return 0;
15331 		break;
15332 	case BPF_JSGE:
15333 		if (smin1 >= smax2)
15334 			return 1;
15335 		else if (smax1 < smin2)
15336 			return 0;
15337 		break;
15338 	case BPF_JLE:
15339 		if (umax1 <= umin2)
15340 			return 1;
15341 		else if (umin1 > umax2)
15342 			return 0;
15343 		break;
15344 	case BPF_JSLE:
15345 		if (smax1 <= smin2)
15346 			return 1;
15347 		else if (smin1 > smax2)
15348 			return 0;
15349 		break;
15350 	}
15351 
15352 	return -1;
15353 }
15354 
flip_opcode(u32 opcode)15355 static int flip_opcode(u32 opcode)
15356 {
15357 	/* How can we transform "a <op> b" into "b <op> a"? */
15358 	static const u8 opcode_flip[16] = {
15359 		/* these stay the same */
15360 		[BPF_JEQ  >> 4] = BPF_JEQ,
15361 		[BPF_JNE  >> 4] = BPF_JNE,
15362 		[BPF_JSET >> 4] = BPF_JSET,
15363 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
15364 		[BPF_JGE  >> 4] = BPF_JLE,
15365 		[BPF_JGT  >> 4] = BPF_JLT,
15366 		[BPF_JLE  >> 4] = BPF_JGE,
15367 		[BPF_JLT  >> 4] = BPF_JGT,
15368 		[BPF_JSGE >> 4] = BPF_JSLE,
15369 		[BPF_JSGT >> 4] = BPF_JSLT,
15370 		[BPF_JSLE >> 4] = BPF_JSGE,
15371 		[BPF_JSLT >> 4] = BPF_JSGT
15372 	};
15373 	return opcode_flip[opcode >> 4];
15374 }
15375 
is_pkt_ptr_branch_taken(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,u8 opcode)15376 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15377 				   struct bpf_reg_state *src_reg,
15378 				   u8 opcode)
15379 {
15380 	struct bpf_reg_state *pkt;
15381 
15382 	if (src_reg->type == PTR_TO_PACKET_END) {
15383 		pkt = dst_reg;
15384 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
15385 		pkt = src_reg;
15386 		opcode = flip_opcode(opcode);
15387 	} else {
15388 		return -1;
15389 	}
15390 
15391 	if (pkt->range >= 0)
15392 		return -1;
15393 
15394 	switch (opcode) {
15395 	case BPF_JLE:
15396 		/* pkt <= pkt_end */
15397 		fallthrough;
15398 	case BPF_JGT:
15399 		/* pkt > pkt_end */
15400 		if (pkt->range == BEYOND_PKT_END)
15401 			/* pkt has at last one extra byte beyond pkt_end */
15402 			return opcode == BPF_JGT;
15403 		break;
15404 	case BPF_JLT:
15405 		/* pkt < pkt_end */
15406 		fallthrough;
15407 	case BPF_JGE:
15408 		/* pkt >= pkt_end */
15409 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15410 			return opcode == BPF_JGE;
15411 		break;
15412 	}
15413 	return -1;
15414 }
15415 
15416 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15417  * and return:
15418  *  1 - branch will be taken and "goto target" will be executed
15419  *  0 - branch will not be taken and fall-through to next insn
15420  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15421  *      range [0,10]
15422  */
is_branch_taken(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15423 static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15424 			   u8 opcode, bool is_jmp32)
15425 {
15426 	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15427 		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15428 
15429 	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15430 		u64 val;
15431 
15432 		/* arrange that reg2 is a scalar, and reg1 is a pointer */
15433 		if (!is_reg_const(reg2, is_jmp32)) {
15434 			opcode = flip_opcode(opcode);
15435 			swap(reg1, reg2);
15436 		}
15437 		/* and ensure that reg2 is a constant */
15438 		if (!is_reg_const(reg2, is_jmp32))
15439 			return -1;
15440 
15441 		if (!reg_not_null(reg1))
15442 			return -1;
15443 
15444 		/* If pointer is valid tests against zero will fail so we can
15445 		 * use this to direct branch taken.
15446 		 */
15447 		val = reg_const_value(reg2, is_jmp32);
15448 		if (val != 0)
15449 			return -1;
15450 
15451 		switch (opcode) {
15452 		case BPF_JEQ:
15453 			return 0;
15454 		case BPF_JNE:
15455 			return 1;
15456 		default:
15457 			return -1;
15458 		}
15459 	}
15460 
15461 	/* now deal with two scalars, but not necessarily constants */
15462 	return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
15463 }
15464 
15465 /* Opcode that corresponds to a *false* branch condition.
15466  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15467  */
rev_opcode(u8 opcode)15468 static u8 rev_opcode(u8 opcode)
15469 {
15470 	switch (opcode) {
15471 	case BPF_JEQ:		return BPF_JNE;
15472 	case BPF_JNE:		return BPF_JEQ;
15473 	/* JSET doesn't have it's reverse opcode in BPF, so add
15474 	 * BPF_X flag to denote the reverse of that operation
15475 	 */
15476 	case BPF_JSET:		return BPF_JSET | BPF_X;
15477 	case BPF_JSET | BPF_X:	return BPF_JSET;
15478 	case BPF_JGE:		return BPF_JLT;
15479 	case BPF_JGT:		return BPF_JLE;
15480 	case BPF_JLE:		return BPF_JGT;
15481 	case BPF_JLT:		return BPF_JGE;
15482 	case BPF_JSGE:		return BPF_JSLT;
15483 	case BPF_JSGT:		return BPF_JSLE;
15484 	case BPF_JSLE:		return BPF_JSGT;
15485 	case BPF_JSLT:		return BPF_JSGE;
15486 	default:		return 0;
15487 	}
15488 }
15489 
15490 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
regs_refine_cond_op(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15491 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15492 				u8 opcode, bool is_jmp32)
15493 {
15494 	struct tnum t;
15495 	u64 val;
15496 
15497 	/* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15498 	switch (opcode) {
15499 	case BPF_JGE:
15500 	case BPF_JGT:
15501 	case BPF_JSGE:
15502 	case BPF_JSGT:
15503 		opcode = flip_opcode(opcode);
15504 		swap(reg1, reg2);
15505 		break;
15506 	default:
15507 		break;
15508 	}
15509 
15510 	switch (opcode) {
15511 	case BPF_JEQ:
15512 		if (is_jmp32) {
15513 			reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
15514 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
15515 			reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
15516 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
15517 			reg2->u32_min_value = reg1->u32_min_value;
15518 			reg2->u32_max_value = reg1->u32_max_value;
15519 			reg2->s32_min_value = reg1->s32_min_value;
15520 			reg2->s32_max_value = reg1->s32_max_value;
15521 
15522 			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15523 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15524 			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15525 		} else {
15526 			reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
15527 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
15528 			reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
15529 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
15530 			reg2->umin_value = reg1->umin_value;
15531 			reg2->umax_value = reg1->umax_value;
15532 			reg2->smin_value = reg1->smin_value;
15533 			reg2->smax_value = reg1->smax_value;
15534 
15535 			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15536 			reg2->var_off = reg1->var_off;
15537 		}
15538 		break;
15539 	case BPF_JNE:
15540 		if (!is_reg_const(reg2, is_jmp32))
15541 			swap(reg1, reg2);
15542 		if (!is_reg_const(reg2, is_jmp32))
15543 			break;
15544 
15545 		/* try to recompute the bound of reg1 if reg2 is a const and
15546 		 * is exactly the edge of reg1.
15547 		 */
15548 		val = reg_const_value(reg2, is_jmp32);
15549 		if (is_jmp32) {
15550 			/* u32_min_value is not equal to 0xffffffff at this point,
15551 			 * because otherwise u32_max_value is 0xffffffff as well,
15552 			 * in such a case both reg1 and reg2 would be constants,
15553 			 * jump would be predicted and reg_set_min_max() won't
15554 			 * be called.
15555 			 *
15556 			 * Same reasoning works for all {u,s}{min,max}{32,64} cases
15557 			 * below.
15558 			 */
15559 			if (reg1->u32_min_value == (u32)val)
15560 				reg1->u32_min_value++;
15561 			if (reg1->u32_max_value == (u32)val)
15562 				reg1->u32_max_value--;
15563 			if (reg1->s32_min_value == (s32)val)
15564 				reg1->s32_min_value++;
15565 			if (reg1->s32_max_value == (s32)val)
15566 				reg1->s32_max_value--;
15567 		} else {
15568 			if (reg1->umin_value == (u64)val)
15569 				reg1->umin_value++;
15570 			if (reg1->umax_value == (u64)val)
15571 				reg1->umax_value--;
15572 			if (reg1->smin_value == (s64)val)
15573 				reg1->smin_value++;
15574 			if (reg1->smax_value == (s64)val)
15575 				reg1->smax_value--;
15576 		}
15577 		break;
15578 	case BPF_JSET:
15579 		if (!is_reg_const(reg2, is_jmp32))
15580 			swap(reg1, reg2);
15581 		if (!is_reg_const(reg2, is_jmp32))
15582 			break;
15583 		val = reg_const_value(reg2, is_jmp32);
15584 		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15585 		 * requires single bit to learn something useful. E.g., if we
15586 		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15587 		 * are actually set? We can learn something definite only if
15588 		 * it's a single-bit value to begin with.
15589 		 *
15590 		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15591 		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15592 		 * bit 1 is set, which we can readily use in adjustments.
15593 		 */
15594 		if (!is_power_of_2(val))
15595 			break;
15596 		if (is_jmp32) {
15597 			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
15598 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15599 		} else {
15600 			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
15601 		}
15602 		break;
15603 	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
15604 		if (!is_reg_const(reg2, is_jmp32))
15605 			swap(reg1, reg2);
15606 		if (!is_reg_const(reg2, is_jmp32))
15607 			break;
15608 		val = reg_const_value(reg2, is_jmp32);
15609 		if (is_jmp32) {
15610 			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
15611 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15612 		} else {
15613 			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
15614 		}
15615 		break;
15616 	case BPF_JLE:
15617 		if (is_jmp32) {
15618 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
15619 			reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
15620 		} else {
15621 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
15622 			reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
15623 		}
15624 		break;
15625 	case BPF_JLT:
15626 		if (is_jmp32) {
15627 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
15628 			reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
15629 		} else {
15630 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
15631 			reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
15632 		}
15633 		break;
15634 	case BPF_JSLE:
15635 		if (is_jmp32) {
15636 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
15637 			reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
15638 		} else {
15639 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
15640 			reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
15641 		}
15642 		break;
15643 	case BPF_JSLT:
15644 		if (is_jmp32) {
15645 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
15646 			reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
15647 		} else {
15648 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
15649 			reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
15650 		}
15651 		break;
15652 	default:
15653 		return;
15654 	}
15655 }
15656 
15657 /* Adjusts the register min/max values in the case that the dst_reg and
15658  * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
15659  * check, in which case we have a fake SCALAR_VALUE representing insn->imm).
15660  * Technically we can do similar adjustments for pointers to the same object,
15661  * but we don't support that right now.
15662  */
reg_set_min_max(struct bpf_verifier_env * env,struct bpf_reg_state * true_reg1,struct bpf_reg_state * true_reg2,struct bpf_reg_state * false_reg1,struct bpf_reg_state * false_reg2,u8 opcode,bool is_jmp32)15663 static int reg_set_min_max(struct bpf_verifier_env *env,
15664 			   struct bpf_reg_state *true_reg1,
15665 			   struct bpf_reg_state *true_reg2,
15666 			   struct bpf_reg_state *false_reg1,
15667 			   struct bpf_reg_state *false_reg2,
15668 			   u8 opcode, bool is_jmp32)
15669 {
15670 	int err;
15671 
15672 	/* If either register is a pointer, we can't learn anything about its
15673 	 * variable offset from the compare (unless they were a pointer into
15674 	 * the same object, but we don't bother with that).
15675 	 */
15676 	if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
15677 		return 0;
15678 
15679 	/* fallthrough (FALSE) branch */
15680 	regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
15681 	reg_bounds_sync(false_reg1);
15682 	reg_bounds_sync(false_reg2);
15683 
15684 	/* jump (TRUE) branch */
15685 	regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
15686 	reg_bounds_sync(true_reg1);
15687 	reg_bounds_sync(true_reg2);
15688 
15689 	err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
15690 	err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
15691 	err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
15692 	err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
15693 	return err;
15694 }
15695 
mark_ptr_or_null_reg(struct bpf_func_state * state,struct bpf_reg_state * reg,u32 id,bool is_null)15696 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
15697 				 struct bpf_reg_state *reg, u32 id,
15698 				 bool is_null)
15699 {
15700 	if (type_may_be_null(reg->type) && reg->id == id &&
15701 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
15702 		/* Old offset (both fixed and variable parts) should have been
15703 		 * known-zero, because we don't allow pointer arithmetic on
15704 		 * pointers that might be NULL. If we see this happening, don't
15705 		 * convert the register.
15706 		 *
15707 		 * But in some cases, some helpers that return local kptrs
15708 		 * advance offset for the returned pointer. In those cases, it
15709 		 * is fine to expect to see reg->off.
15710 		 */
15711 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
15712 			return;
15713 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
15714 		    WARN_ON_ONCE(reg->off))
15715 			return;
15716 
15717 		if (is_null) {
15718 			reg->type = SCALAR_VALUE;
15719 			/* We don't need id and ref_obj_id from this point
15720 			 * onwards anymore, thus we should better reset it,
15721 			 * so that state pruning has chances to take effect.
15722 			 */
15723 			reg->id = 0;
15724 			reg->ref_obj_id = 0;
15725 
15726 			return;
15727 		}
15728 
15729 		mark_ptr_not_null_reg(reg);
15730 
15731 		if (!reg_may_point_to_spin_lock(reg)) {
15732 			/* For not-NULL ptr, reg->ref_obj_id will be reset
15733 			 * in release_reference().
15734 			 *
15735 			 * reg->id is still used by spin_lock ptr. Other
15736 			 * than spin_lock ptr type, reg->id can be reset.
15737 			 */
15738 			reg->id = 0;
15739 		}
15740 	}
15741 }
15742 
15743 /* The logic is similar to find_good_pkt_pointers(), both could eventually
15744  * be folded together at some point.
15745  */
mark_ptr_or_null_regs(struct bpf_verifier_state * vstate,u32 regno,bool is_null)15746 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
15747 				  bool is_null)
15748 {
15749 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
15750 	struct bpf_reg_state *regs = state->regs, *reg;
15751 	u32 ref_obj_id = regs[regno].ref_obj_id;
15752 	u32 id = regs[regno].id;
15753 
15754 	if (ref_obj_id && ref_obj_id == id && is_null)
15755 		/* regs[regno] is in the " == NULL" branch.
15756 		 * No one could have freed the reference state before
15757 		 * doing the NULL check.
15758 		 */
15759 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
15760 
15761 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15762 		mark_ptr_or_null_reg(state, reg, id, is_null);
15763 	}));
15764 }
15765 
try_match_pkt_pointers(const struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,struct bpf_verifier_state * this_branch,struct bpf_verifier_state * other_branch)15766 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
15767 				   struct bpf_reg_state *dst_reg,
15768 				   struct bpf_reg_state *src_reg,
15769 				   struct bpf_verifier_state *this_branch,
15770 				   struct bpf_verifier_state *other_branch)
15771 {
15772 	if (BPF_SRC(insn->code) != BPF_X)
15773 		return false;
15774 
15775 	/* Pointers are always 64-bit. */
15776 	if (BPF_CLASS(insn->code) == BPF_JMP32)
15777 		return false;
15778 
15779 	switch (BPF_OP(insn->code)) {
15780 	case BPF_JGT:
15781 		if ((dst_reg->type == PTR_TO_PACKET &&
15782 		     src_reg->type == PTR_TO_PACKET_END) ||
15783 		    (dst_reg->type == PTR_TO_PACKET_META &&
15784 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15785 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
15786 			find_good_pkt_pointers(this_branch, dst_reg,
15787 					       dst_reg->type, false);
15788 			mark_pkt_end(other_branch, insn->dst_reg, true);
15789 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15790 			    src_reg->type == PTR_TO_PACKET) ||
15791 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15792 			    src_reg->type == PTR_TO_PACKET_META)) {
15793 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
15794 			find_good_pkt_pointers(other_branch, src_reg,
15795 					       src_reg->type, true);
15796 			mark_pkt_end(this_branch, insn->src_reg, false);
15797 		} else {
15798 			return false;
15799 		}
15800 		break;
15801 	case BPF_JLT:
15802 		if ((dst_reg->type == PTR_TO_PACKET &&
15803 		     src_reg->type == PTR_TO_PACKET_END) ||
15804 		    (dst_reg->type == PTR_TO_PACKET_META &&
15805 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15806 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
15807 			find_good_pkt_pointers(other_branch, dst_reg,
15808 					       dst_reg->type, true);
15809 			mark_pkt_end(this_branch, insn->dst_reg, false);
15810 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15811 			    src_reg->type == PTR_TO_PACKET) ||
15812 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15813 			    src_reg->type == PTR_TO_PACKET_META)) {
15814 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
15815 			find_good_pkt_pointers(this_branch, src_reg,
15816 					       src_reg->type, false);
15817 			mark_pkt_end(other_branch, insn->src_reg, true);
15818 		} else {
15819 			return false;
15820 		}
15821 		break;
15822 	case BPF_JGE:
15823 		if ((dst_reg->type == PTR_TO_PACKET &&
15824 		     src_reg->type == PTR_TO_PACKET_END) ||
15825 		    (dst_reg->type == PTR_TO_PACKET_META &&
15826 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15827 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
15828 			find_good_pkt_pointers(this_branch, dst_reg,
15829 					       dst_reg->type, true);
15830 			mark_pkt_end(other_branch, insn->dst_reg, false);
15831 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15832 			    src_reg->type == PTR_TO_PACKET) ||
15833 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15834 			    src_reg->type == PTR_TO_PACKET_META)) {
15835 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
15836 			find_good_pkt_pointers(other_branch, src_reg,
15837 					       src_reg->type, false);
15838 			mark_pkt_end(this_branch, insn->src_reg, true);
15839 		} else {
15840 			return false;
15841 		}
15842 		break;
15843 	case BPF_JLE:
15844 		if ((dst_reg->type == PTR_TO_PACKET &&
15845 		     src_reg->type == PTR_TO_PACKET_END) ||
15846 		    (dst_reg->type == PTR_TO_PACKET_META &&
15847 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15848 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
15849 			find_good_pkt_pointers(other_branch, dst_reg,
15850 					       dst_reg->type, false);
15851 			mark_pkt_end(this_branch, insn->dst_reg, true);
15852 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15853 			    src_reg->type == PTR_TO_PACKET) ||
15854 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15855 			    src_reg->type == PTR_TO_PACKET_META)) {
15856 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
15857 			find_good_pkt_pointers(this_branch, src_reg,
15858 					       src_reg->type, true);
15859 			mark_pkt_end(other_branch, insn->src_reg, false);
15860 		} else {
15861 			return false;
15862 		}
15863 		break;
15864 	default:
15865 		return false;
15866 	}
15867 
15868 	return true;
15869 }
15870 
__collect_linked_regs(struct linked_regs * reg_set,struct bpf_reg_state * reg,u32 id,u32 frameno,u32 spi_or_reg,bool is_reg)15871 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
15872 				  u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
15873 {
15874 	struct linked_reg *e;
15875 
15876 	if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
15877 		return;
15878 
15879 	e = linked_regs_push(reg_set);
15880 	if (e) {
15881 		e->frameno = frameno;
15882 		e->is_reg = is_reg;
15883 		e->regno = spi_or_reg;
15884 	} else {
15885 		reg->id = 0;
15886 	}
15887 }
15888 
15889 /* For all R being scalar registers or spilled scalar registers
15890  * in verifier state, save R in linked_regs if R->id == id.
15891  * If there are too many Rs sharing same id, reset id for leftover Rs.
15892  */
collect_linked_regs(struct bpf_verifier_state * vstate,u32 id,struct linked_regs * linked_regs)15893 static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id,
15894 				struct linked_regs *linked_regs)
15895 {
15896 	struct bpf_func_state *func;
15897 	struct bpf_reg_state *reg;
15898 	int i, j;
15899 
15900 	id = id & ~BPF_ADD_CONST;
15901 	for (i = vstate->curframe; i >= 0; i--) {
15902 		func = vstate->frame[i];
15903 		for (j = 0; j < BPF_REG_FP; j++) {
15904 			reg = &func->regs[j];
15905 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
15906 		}
15907 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
15908 			if (!is_spilled_reg(&func->stack[j]))
15909 				continue;
15910 			reg = &func->stack[j].spilled_ptr;
15911 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
15912 		}
15913 	}
15914 }
15915 
15916 /* For all R in linked_regs, copy known_reg range into R
15917  * if R->id == known_reg->id.
15918  */
sync_linked_regs(struct bpf_verifier_state * vstate,struct bpf_reg_state * known_reg,struct linked_regs * linked_regs)15919 static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg,
15920 			     struct linked_regs *linked_regs)
15921 {
15922 	struct bpf_reg_state fake_reg;
15923 	struct bpf_reg_state *reg;
15924 	struct linked_reg *e;
15925 	int i;
15926 
15927 	for (i = 0; i < linked_regs->cnt; ++i) {
15928 		e = &linked_regs->entries[i];
15929 		reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
15930 				: &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
15931 		if (reg->type != SCALAR_VALUE || reg == known_reg)
15932 			continue;
15933 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
15934 			continue;
15935 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
15936 		    reg->off == known_reg->off) {
15937 			s32 saved_subreg_def = reg->subreg_def;
15938 
15939 			copy_register_state(reg, known_reg);
15940 			reg->subreg_def = saved_subreg_def;
15941 		} else {
15942 			s32 saved_subreg_def = reg->subreg_def;
15943 			s32 saved_off = reg->off;
15944 
15945 			fake_reg.type = SCALAR_VALUE;
15946 			__mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off);
15947 
15948 			/* reg = known_reg; reg += delta */
15949 			copy_register_state(reg, known_reg);
15950 			/*
15951 			 * Must preserve off, id and add_const flag,
15952 			 * otherwise another sync_linked_regs() will be incorrect.
15953 			 */
15954 			reg->off = saved_off;
15955 			reg->subreg_def = saved_subreg_def;
15956 
15957 			scalar32_min_max_add(reg, &fake_reg);
15958 			scalar_min_max_add(reg, &fake_reg);
15959 			reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
15960 		}
15961 	}
15962 }
15963 
check_cond_jmp_op(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)15964 static int check_cond_jmp_op(struct bpf_verifier_env *env,
15965 			     struct bpf_insn *insn, int *insn_idx)
15966 {
15967 	struct bpf_verifier_state *this_branch = env->cur_state;
15968 	struct bpf_verifier_state *other_branch;
15969 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
15970 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
15971 	struct bpf_reg_state *eq_branch_regs;
15972 	struct linked_regs linked_regs = {};
15973 	u8 opcode = BPF_OP(insn->code);
15974 	bool is_jmp32;
15975 	int pred = -1;
15976 	int err;
15977 
15978 	/* Only conditional jumps are expected to reach here. */
15979 	if (opcode == BPF_JA || opcode > BPF_JCOND) {
15980 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
15981 		return -EINVAL;
15982 	}
15983 
15984 	if (opcode == BPF_JCOND) {
15985 		struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
15986 		int idx = *insn_idx;
15987 
15988 		if (insn->code != (BPF_JMP | BPF_JCOND) ||
15989 		    insn->src_reg != BPF_MAY_GOTO ||
15990 		    insn->dst_reg || insn->imm) {
15991 			verbose(env, "invalid may_goto imm %d\n", insn->imm);
15992 			return -EINVAL;
15993 		}
15994 		prev_st = find_prev_entry(env, cur_st->parent, idx);
15995 
15996 		/* branch out 'fallthrough' insn as a new state to explore */
15997 		queued_st = push_stack(env, idx + 1, idx, false);
15998 		if (!queued_st)
15999 			return -ENOMEM;
16000 
16001 		queued_st->may_goto_depth++;
16002 		if (prev_st)
16003 			widen_imprecise_scalars(env, prev_st, queued_st);
16004 		*insn_idx += insn->off;
16005 		return 0;
16006 	}
16007 
16008 	/* check src2 operand */
16009 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16010 	if (err)
16011 		return err;
16012 
16013 	dst_reg = &regs[insn->dst_reg];
16014 	if (BPF_SRC(insn->code) == BPF_X) {
16015 		if (insn->imm != 0) {
16016 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
16017 			return -EINVAL;
16018 		}
16019 
16020 		/* check src1 operand */
16021 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16022 		if (err)
16023 			return err;
16024 
16025 		src_reg = &regs[insn->src_reg];
16026 		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
16027 		    is_pointer_value(env, insn->src_reg)) {
16028 			verbose(env, "R%d pointer comparison prohibited\n",
16029 				insn->src_reg);
16030 			return -EACCES;
16031 		}
16032 	} else {
16033 		if (insn->src_reg != BPF_REG_0) {
16034 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
16035 			return -EINVAL;
16036 		}
16037 		src_reg = &env->fake_reg[0];
16038 		memset(src_reg, 0, sizeof(*src_reg));
16039 		src_reg->type = SCALAR_VALUE;
16040 		__mark_reg_known(src_reg, insn->imm);
16041 	}
16042 
16043 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
16044 	pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
16045 	if (pred >= 0) {
16046 		/* If we get here with a dst_reg pointer type it is because
16047 		 * above is_branch_taken() special cased the 0 comparison.
16048 		 */
16049 		if (!__is_pointer_value(false, dst_reg))
16050 			err = mark_chain_precision(env, insn->dst_reg);
16051 		if (BPF_SRC(insn->code) == BPF_X && !err &&
16052 		    !__is_pointer_value(false, src_reg))
16053 			err = mark_chain_precision(env, insn->src_reg);
16054 		if (err)
16055 			return err;
16056 	}
16057 
16058 	if (pred == 1) {
16059 		/* Only follow the goto, ignore fall-through. If needed, push
16060 		 * the fall-through branch for simulation under speculative
16061 		 * execution.
16062 		 */
16063 		if (!env->bypass_spec_v1 &&
16064 		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
16065 					       *insn_idx))
16066 			return -EFAULT;
16067 		if (env->log.level & BPF_LOG_LEVEL)
16068 			print_insn_state(env, this_branch, this_branch->curframe);
16069 		*insn_idx += insn->off;
16070 		return 0;
16071 	} else if (pred == 0) {
16072 		/* Only follow the fall-through branch, since that's where the
16073 		 * program will go. If needed, push the goto branch for
16074 		 * simulation under speculative execution.
16075 		 */
16076 		if (!env->bypass_spec_v1 &&
16077 		    !sanitize_speculative_path(env, insn,
16078 					       *insn_idx + insn->off + 1,
16079 					       *insn_idx))
16080 			return -EFAULT;
16081 		if (env->log.level & BPF_LOG_LEVEL)
16082 			print_insn_state(env, this_branch, this_branch->curframe);
16083 		return 0;
16084 	}
16085 
16086 	/* Push scalar registers sharing same ID to jump history,
16087 	 * do this before creating 'other_branch', so that both
16088 	 * 'this_branch' and 'other_branch' share this history
16089 	 * if parent state is created.
16090 	 */
16091 	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16092 		collect_linked_regs(this_branch, src_reg->id, &linked_regs);
16093 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16094 		collect_linked_regs(this_branch, dst_reg->id, &linked_regs);
16095 	if (linked_regs.cnt > 1) {
16096 		err = push_insn_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
16097 		if (err)
16098 			return err;
16099 	}
16100 
16101 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
16102 				  false);
16103 	if (!other_branch)
16104 		return -EFAULT;
16105 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16106 
16107 	if (BPF_SRC(insn->code) == BPF_X) {
16108 		err = reg_set_min_max(env,
16109 				      &other_branch_regs[insn->dst_reg],
16110 				      &other_branch_regs[insn->src_reg],
16111 				      dst_reg, src_reg, opcode, is_jmp32);
16112 	} else /* BPF_SRC(insn->code) == BPF_K */ {
16113 		/* reg_set_min_max() can mangle the fake_reg. Make a copy
16114 		 * so that these are two different memory locations. The
16115 		 * src_reg is not used beyond here in context of K.
16116 		 */
16117 		memcpy(&env->fake_reg[1], &env->fake_reg[0],
16118 		       sizeof(env->fake_reg[0]));
16119 		err = reg_set_min_max(env,
16120 				      &other_branch_regs[insn->dst_reg],
16121 				      &env->fake_reg[0],
16122 				      dst_reg, &env->fake_reg[1],
16123 				      opcode, is_jmp32);
16124 	}
16125 	if (err)
16126 		return err;
16127 
16128 	if (BPF_SRC(insn->code) == BPF_X &&
16129 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
16130 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16131 		sync_linked_regs(this_branch, src_reg, &linked_regs);
16132 		sync_linked_regs(other_branch, &other_branch_regs[insn->src_reg], &linked_regs);
16133 	}
16134 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16135 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16136 		sync_linked_regs(this_branch, dst_reg, &linked_regs);
16137 		sync_linked_regs(other_branch, &other_branch_regs[insn->dst_reg], &linked_regs);
16138 	}
16139 
16140 	/* if one pointer register is compared to another pointer
16141 	 * register check if PTR_MAYBE_NULL could be lifted.
16142 	 * E.g. register A - maybe null
16143 	 *      register B - not null
16144 	 * for JNE A, B, ... - A is not null in the false branch;
16145 	 * for JEQ A, B, ... - A is not null in the true branch.
16146 	 *
16147 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
16148 	 * not need to be null checked by the BPF program, i.e.,
16149 	 * could be null even without PTR_MAYBE_NULL marking, so
16150 	 * only propagate nullness when neither reg is that type.
16151 	 */
16152 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16153 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16154 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16155 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
16156 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16157 		eq_branch_regs = NULL;
16158 		switch (opcode) {
16159 		case BPF_JEQ:
16160 			eq_branch_regs = other_branch_regs;
16161 			break;
16162 		case BPF_JNE:
16163 			eq_branch_regs = regs;
16164 			break;
16165 		default:
16166 			/* do nothing */
16167 			break;
16168 		}
16169 		if (eq_branch_regs) {
16170 			if (type_may_be_null(src_reg->type))
16171 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16172 			else
16173 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16174 		}
16175 	}
16176 
16177 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16178 	 * NOTE: these optimizations below are related with pointer comparison
16179 	 *       which will never be JMP32.
16180 	 */
16181 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
16182 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16183 	    type_may_be_null(dst_reg->type)) {
16184 		/* Mark all identical registers in each branch as either
16185 		 * safe or unknown depending R == 0 or R != 0 conditional.
16186 		 */
16187 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16188 				      opcode == BPF_JNE);
16189 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16190 				      opcode == BPF_JEQ);
16191 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
16192 					   this_branch, other_branch) &&
16193 		   is_pointer_value(env, insn->dst_reg)) {
16194 		verbose(env, "R%d pointer comparison prohibited\n",
16195 			insn->dst_reg);
16196 		return -EACCES;
16197 	}
16198 	if (env->log.level & BPF_LOG_LEVEL)
16199 		print_insn_state(env, this_branch, this_branch->curframe);
16200 	return 0;
16201 }
16202 
16203 /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env * env,struct bpf_insn * insn)16204 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16205 {
16206 	struct bpf_insn_aux_data *aux = cur_aux(env);
16207 	struct bpf_reg_state *regs = cur_regs(env);
16208 	struct bpf_reg_state *dst_reg;
16209 	struct bpf_map *map;
16210 	int err;
16211 
16212 	if (BPF_SIZE(insn->code) != BPF_DW) {
16213 		verbose(env, "invalid BPF_LD_IMM insn\n");
16214 		return -EINVAL;
16215 	}
16216 	if (insn->off != 0) {
16217 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
16218 		return -EINVAL;
16219 	}
16220 
16221 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
16222 	if (err)
16223 		return err;
16224 
16225 	dst_reg = &regs[insn->dst_reg];
16226 	if (insn->src_reg == 0) {
16227 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16228 
16229 		dst_reg->type = SCALAR_VALUE;
16230 		__mark_reg_known(&regs[insn->dst_reg], imm);
16231 		return 0;
16232 	}
16233 
16234 	/* All special src_reg cases are listed below. From this point onwards
16235 	 * we either succeed and assign a corresponding dst_reg->type after
16236 	 * zeroing the offset, or fail and reject the program.
16237 	 */
16238 	mark_reg_known_zero(env, regs, insn->dst_reg);
16239 
16240 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16241 		dst_reg->type = aux->btf_var.reg_type;
16242 		switch (base_type(dst_reg->type)) {
16243 		case PTR_TO_MEM:
16244 			dst_reg->mem_size = aux->btf_var.mem_size;
16245 			break;
16246 		case PTR_TO_BTF_ID:
16247 			dst_reg->btf = aux->btf_var.btf;
16248 			dst_reg->btf_id = aux->btf_var.btf_id;
16249 			break;
16250 		default:
16251 			verbose(env, "bpf verifier is misconfigured\n");
16252 			return -EFAULT;
16253 		}
16254 		return 0;
16255 	}
16256 
16257 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
16258 		struct bpf_prog_aux *aux = env->prog->aux;
16259 		u32 subprogno = find_subprog(env,
16260 					     env->insn_idx + insn->imm + 1);
16261 
16262 		if (!aux->func_info) {
16263 			verbose(env, "missing btf func_info\n");
16264 			return -EINVAL;
16265 		}
16266 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16267 			verbose(env, "callback function not static\n");
16268 			return -EINVAL;
16269 		}
16270 
16271 		dst_reg->type = PTR_TO_FUNC;
16272 		dst_reg->subprogno = subprogno;
16273 		return 0;
16274 	}
16275 
16276 	map = env->used_maps[aux->map_index];
16277 	dst_reg->map_ptr = map;
16278 
16279 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16280 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16281 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
16282 			__mark_reg_unknown(env, dst_reg);
16283 			return 0;
16284 		}
16285 		dst_reg->type = PTR_TO_MAP_VALUE;
16286 		dst_reg->off = aux->map_off;
16287 		WARN_ON_ONCE(map->max_entries != 1);
16288 		/* We want reg->id to be same (0) as map_value is not distinct */
16289 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16290 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16291 		dst_reg->type = CONST_PTR_TO_MAP;
16292 	} else {
16293 		verbose(env, "bpf verifier is misconfigured\n");
16294 		return -EINVAL;
16295 	}
16296 
16297 	return 0;
16298 }
16299 
may_access_skb(enum bpf_prog_type type)16300 static bool may_access_skb(enum bpf_prog_type type)
16301 {
16302 	switch (type) {
16303 	case BPF_PROG_TYPE_SOCKET_FILTER:
16304 	case BPF_PROG_TYPE_SCHED_CLS:
16305 	case BPF_PROG_TYPE_SCHED_ACT:
16306 		return true;
16307 	default:
16308 		return false;
16309 	}
16310 }
16311 
16312 /* verify safety of LD_ABS|LD_IND instructions:
16313  * - they can only appear in the programs where ctx == skb
16314  * - since they are wrappers of function calls, they scratch R1-R5 registers,
16315  *   preserve R6-R9, and store return value into R0
16316  *
16317  * Implicit input:
16318  *   ctx == skb == R6 == CTX
16319  *
16320  * Explicit input:
16321  *   SRC == any register
16322  *   IMM == 32-bit immediate
16323  *
16324  * Output:
16325  *   R0 - 8/16/32-bit skb data converted to cpu endianness
16326  */
check_ld_abs(struct bpf_verifier_env * env,struct bpf_insn * insn)16327 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16328 {
16329 	struct bpf_reg_state *regs = cur_regs(env);
16330 	static const int ctx_reg = BPF_REG_6;
16331 	u8 mode = BPF_MODE(insn->code);
16332 	int i, err;
16333 
16334 	if (!may_access_skb(resolve_prog_type(env->prog))) {
16335 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16336 		return -EINVAL;
16337 	}
16338 
16339 	if (!env->ops->gen_ld_abs) {
16340 		verbose(env, "bpf verifier is misconfigured\n");
16341 		return -EINVAL;
16342 	}
16343 
16344 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
16345 	    BPF_SIZE(insn->code) == BPF_DW ||
16346 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
16347 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
16348 		return -EINVAL;
16349 	}
16350 
16351 	/* check whether implicit source operand (register R6) is readable */
16352 	err = check_reg_arg(env, ctx_reg, SRC_OP);
16353 	if (err)
16354 		return err;
16355 
16356 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16357 	 * gen_ld_abs() may terminate the program at runtime, leading to
16358 	 * reference leak.
16359 	 */
16360 	err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16361 	if (err)
16362 		return err;
16363 
16364 	if (regs[ctx_reg].type != PTR_TO_CTX) {
16365 		verbose(env,
16366 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16367 		return -EINVAL;
16368 	}
16369 
16370 	if (mode == BPF_IND) {
16371 		/* check explicit source operand */
16372 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16373 		if (err)
16374 			return err;
16375 	}
16376 
16377 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
16378 	if (err < 0)
16379 		return err;
16380 
16381 	/* reset caller saved regs to unreadable */
16382 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
16383 		mark_reg_not_init(env, regs, caller_saved[i]);
16384 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16385 	}
16386 
16387 	/* mark destination R0 register as readable, since it contains
16388 	 * the value fetched from the packet.
16389 	 * Already marked as written above.
16390 	 */
16391 	mark_reg_unknown(env, regs, BPF_REG_0);
16392 	/* ld_abs load up to 32-bit skb data. */
16393 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16394 	return 0;
16395 }
16396 
check_return_code(struct bpf_verifier_env * env,int regno,const char * reg_name)16397 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16398 {
16399 	const char *exit_ctx = "At program exit";
16400 	struct tnum enforce_attach_type_range = tnum_unknown;
16401 	const struct bpf_prog *prog = env->prog;
16402 	struct bpf_reg_state *reg;
16403 	struct bpf_retval_range range = retval_range(0, 1);
16404 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16405 	int err;
16406 	struct bpf_func_state *frame = env->cur_state->frame[0];
16407 	const bool is_subprog = frame->subprogno;
16408 	bool return_32bit = false;
16409 
16410 	/* LSM and struct_ops func-ptr's return type could be "void" */
16411 	if (!is_subprog || frame->in_exception_callback_fn) {
16412 		switch (prog_type) {
16413 		case BPF_PROG_TYPE_LSM:
16414 			if (prog->expected_attach_type == BPF_LSM_CGROUP)
16415 				/* See below, can be 0 or 0-1 depending on hook. */
16416 				break;
16417 			fallthrough;
16418 		case BPF_PROG_TYPE_STRUCT_OPS:
16419 			if (!prog->aux->attach_func_proto->type)
16420 				return 0;
16421 			break;
16422 		default:
16423 			break;
16424 		}
16425 	}
16426 
16427 	/* eBPF calling convention is such that R0 is used
16428 	 * to return the value from eBPF program.
16429 	 * Make sure that it's readable at this time
16430 	 * of bpf_exit, which means that program wrote
16431 	 * something into it earlier
16432 	 */
16433 	err = check_reg_arg(env, regno, SRC_OP);
16434 	if (err)
16435 		return err;
16436 
16437 	if (is_pointer_value(env, regno)) {
16438 		verbose(env, "R%d leaks addr as return value\n", regno);
16439 		return -EACCES;
16440 	}
16441 
16442 	reg = cur_regs(env) + regno;
16443 
16444 	if (frame->in_async_callback_fn) {
16445 		/* enforce return zero from async callbacks like timer */
16446 		exit_ctx = "At async callback return";
16447 		range = retval_range(0, 0);
16448 		goto enforce_retval;
16449 	}
16450 
16451 	if (is_subprog && !frame->in_exception_callback_fn) {
16452 		if (reg->type != SCALAR_VALUE) {
16453 			verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n",
16454 				regno, reg_type_str(env, reg->type));
16455 			return -EINVAL;
16456 		}
16457 		return 0;
16458 	}
16459 
16460 	switch (prog_type) {
16461 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16462 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
16463 		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
16464 		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
16465 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
16466 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
16467 		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
16468 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
16469 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
16470 		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
16471 			range = retval_range(1, 1);
16472 		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
16473 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
16474 			range = retval_range(0, 3);
16475 		break;
16476 	case BPF_PROG_TYPE_CGROUP_SKB:
16477 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
16478 			range = retval_range(0, 3);
16479 			enforce_attach_type_range = tnum_range(2, 3);
16480 		}
16481 		break;
16482 	case BPF_PROG_TYPE_CGROUP_SOCK:
16483 	case BPF_PROG_TYPE_SOCK_OPS:
16484 	case BPF_PROG_TYPE_CGROUP_DEVICE:
16485 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
16486 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16487 		break;
16488 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16489 		if (!env->prog->aux->attach_btf_id)
16490 			return 0;
16491 		range = retval_range(0, 0);
16492 		break;
16493 	case BPF_PROG_TYPE_TRACING:
16494 		switch (env->prog->expected_attach_type) {
16495 		case BPF_TRACE_FENTRY:
16496 		case BPF_TRACE_FEXIT:
16497 			range = retval_range(0, 0);
16498 			break;
16499 		case BPF_TRACE_RAW_TP:
16500 		case BPF_MODIFY_RETURN:
16501 			return 0;
16502 		case BPF_TRACE_ITER:
16503 			break;
16504 		default:
16505 			return -ENOTSUPP;
16506 		}
16507 		break;
16508 	case BPF_PROG_TYPE_KPROBE:
16509 		switch (env->prog->expected_attach_type) {
16510 		case BPF_TRACE_KPROBE_SESSION:
16511 		case BPF_TRACE_UPROBE_SESSION:
16512 			range = retval_range(0, 1);
16513 			break;
16514 		default:
16515 			return 0;
16516 		}
16517 		break;
16518 	case BPF_PROG_TYPE_SK_LOOKUP:
16519 		range = retval_range(SK_DROP, SK_PASS);
16520 		break;
16521 
16522 	case BPF_PROG_TYPE_LSM:
16523 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16524 			/* no range found, any return value is allowed */
16525 			if (!get_func_retval_range(env->prog, &range))
16526 				return 0;
16527 			/* no restricted range, any return value is allowed */
16528 			if (range.minval == S32_MIN && range.maxval == S32_MAX)
16529 				return 0;
16530 			return_32bit = true;
16531 		} else if (!env->prog->aux->attach_func_proto->type) {
16532 			/* Make sure programs that attach to void
16533 			 * hooks don't try to modify return value.
16534 			 */
16535 			range = retval_range(1, 1);
16536 		}
16537 		break;
16538 
16539 	case BPF_PROG_TYPE_NETFILTER:
16540 		range = retval_range(NF_DROP, NF_ACCEPT);
16541 		break;
16542 	case BPF_PROG_TYPE_EXT:
16543 		/* freplace program can return anything as its return value
16544 		 * depends on the to-be-replaced kernel func or bpf program.
16545 		 */
16546 	default:
16547 		return 0;
16548 	}
16549 
16550 enforce_retval:
16551 	if (reg->type != SCALAR_VALUE) {
16552 		verbose(env, "%s the register R%d is not a known value (%s)\n",
16553 			exit_ctx, regno, reg_type_str(env, reg->type));
16554 		return -EINVAL;
16555 	}
16556 
16557 	err = mark_chain_precision(env, regno);
16558 	if (err)
16559 		return err;
16560 
16561 	if (!retval_range_within(range, reg, return_32bit)) {
16562 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16563 		if (!is_subprog &&
16564 		    prog->expected_attach_type == BPF_LSM_CGROUP &&
16565 		    prog_type == BPF_PROG_TYPE_LSM &&
16566 		    !prog->aux->attach_func_proto->type)
16567 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16568 		return -EINVAL;
16569 	}
16570 
16571 	if (!tnum_is_unknown(enforce_attach_type_range) &&
16572 	    tnum_in(enforce_attach_type_range, reg->var_off))
16573 		env->prog->enforce_expected_attach_type = 1;
16574 	return 0;
16575 }
16576 
mark_subprog_changes_pkt_data(struct bpf_verifier_env * env,int off)16577 static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
16578 {
16579 	struct bpf_subprog_info *subprog;
16580 
16581 	subprog = find_containing_subprog(env, off);
16582 	subprog->changes_pkt_data = true;
16583 }
16584 
16585 /* 't' is an index of a call-site.
16586  * 'w' is a callee entry point.
16587  * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
16588  * Rely on DFS traversal order and absence of recursive calls to guarantee that
16589  * callee's change_pkt_data marks would be correct at that moment.
16590  */
merge_callee_effects(struct bpf_verifier_env * env,int t,int w)16591 static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
16592 {
16593 	struct bpf_subprog_info *caller, *callee;
16594 
16595 	caller = find_containing_subprog(env, t);
16596 	callee = find_containing_subprog(env, w);
16597 	caller->changes_pkt_data |= callee->changes_pkt_data;
16598 }
16599 
16600 /* non-recursive DFS pseudo code
16601  * 1  procedure DFS-iterative(G,v):
16602  * 2      label v as discovered
16603  * 3      let S be a stack
16604  * 4      S.push(v)
16605  * 5      while S is not empty
16606  * 6            t <- S.peek()
16607  * 7            if t is what we're looking for:
16608  * 8                return t
16609  * 9            for all edges e in G.adjacentEdges(t) do
16610  * 10               if edge e is already labelled
16611  * 11                   continue with the next edge
16612  * 12               w <- G.adjacentVertex(t,e)
16613  * 13               if vertex w is not discovered and not explored
16614  * 14                   label e as tree-edge
16615  * 15                   label w as discovered
16616  * 16                   S.push(w)
16617  * 17                   continue at 5
16618  * 18               else if vertex w is discovered
16619  * 19                   label e as back-edge
16620  * 20               else
16621  * 21                   // vertex w is explored
16622  * 22                   label e as forward- or cross-edge
16623  * 23           label t as explored
16624  * 24           S.pop()
16625  *
16626  * convention:
16627  * 0x10 - discovered
16628  * 0x11 - discovered and fall-through edge labelled
16629  * 0x12 - discovered and fall-through and branch edges labelled
16630  * 0x20 - explored
16631  */
16632 
16633 enum {
16634 	DISCOVERED = 0x10,
16635 	EXPLORED = 0x20,
16636 	FALLTHROUGH = 1,
16637 	BRANCH = 2,
16638 };
16639 
mark_prune_point(struct bpf_verifier_env * env,int idx)16640 static void mark_prune_point(struct bpf_verifier_env *env, int idx)
16641 {
16642 	env->insn_aux_data[idx].prune_point = true;
16643 }
16644 
is_prune_point(struct bpf_verifier_env * env,int insn_idx)16645 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
16646 {
16647 	return env->insn_aux_data[insn_idx].prune_point;
16648 }
16649 
mark_force_checkpoint(struct bpf_verifier_env * env,int idx)16650 static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
16651 {
16652 	env->insn_aux_data[idx].force_checkpoint = true;
16653 }
16654 
is_force_checkpoint(struct bpf_verifier_env * env,int insn_idx)16655 static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
16656 {
16657 	return env->insn_aux_data[insn_idx].force_checkpoint;
16658 }
16659 
mark_calls_callback(struct bpf_verifier_env * env,int idx)16660 static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
16661 {
16662 	env->insn_aux_data[idx].calls_callback = true;
16663 }
16664 
calls_callback(struct bpf_verifier_env * env,int insn_idx)16665 static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
16666 {
16667 	return env->insn_aux_data[insn_idx].calls_callback;
16668 }
16669 
16670 enum {
16671 	DONE_EXPLORING = 0,
16672 	KEEP_EXPLORING = 1,
16673 };
16674 
16675 /* t, w, e - match pseudo-code above:
16676  * t - index of current instruction
16677  * w - next instruction
16678  * e - edge
16679  */
push_insn(int t,int w,int e,struct bpf_verifier_env * env)16680 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
16681 {
16682 	int *insn_stack = env->cfg.insn_stack;
16683 	int *insn_state = env->cfg.insn_state;
16684 
16685 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
16686 		return DONE_EXPLORING;
16687 
16688 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
16689 		return DONE_EXPLORING;
16690 
16691 	if (w < 0 || w >= env->prog->len) {
16692 		verbose_linfo(env, t, "%d: ", t);
16693 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
16694 		return -EINVAL;
16695 	}
16696 
16697 	if (e == BRANCH) {
16698 		/* mark branch target for state pruning */
16699 		mark_prune_point(env, w);
16700 		mark_jmp_point(env, w);
16701 	}
16702 
16703 	if (insn_state[w] == 0) {
16704 		/* tree-edge */
16705 		insn_state[t] = DISCOVERED | e;
16706 		insn_state[w] = DISCOVERED;
16707 		if (env->cfg.cur_stack >= env->prog->len)
16708 			return -E2BIG;
16709 		insn_stack[env->cfg.cur_stack++] = w;
16710 		return KEEP_EXPLORING;
16711 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
16712 		if (env->bpf_capable)
16713 			return DONE_EXPLORING;
16714 		verbose_linfo(env, t, "%d: ", t);
16715 		verbose_linfo(env, w, "%d: ", w);
16716 		verbose(env, "back-edge from insn %d to %d\n", t, w);
16717 		return -EINVAL;
16718 	} else if (insn_state[w] == EXPLORED) {
16719 		/* forward- or cross-edge */
16720 		insn_state[t] = DISCOVERED | e;
16721 	} else {
16722 		verbose(env, "insn state internal bug\n");
16723 		return -EFAULT;
16724 	}
16725 	return DONE_EXPLORING;
16726 }
16727 
visit_func_call_insn(int t,struct bpf_insn * insns,struct bpf_verifier_env * env,bool visit_callee)16728 static int visit_func_call_insn(int t, struct bpf_insn *insns,
16729 				struct bpf_verifier_env *env,
16730 				bool visit_callee)
16731 {
16732 	int ret, insn_sz;
16733 	int w;
16734 
16735 	insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
16736 	ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
16737 	if (ret)
16738 		return ret;
16739 
16740 	mark_prune_point(env, t + insn_sz);
16741 	/* when we exit from subprog, we need to record non-linear history */
16742 	mark_jmp_point(env, t + insn_sz);
16743 
16744 	if (visit_callee) {
16745 		w = t + insns[t].imm + 1;
16746 		mark_prune_point(env, t);
16747 		merge_callee_effects(env, t, w);
16748 		ret = push_insn(t, w, BRANCH, env);
16749 	}
16750 	return ret;
16751 }
16752 
16753 /* Bitmask with 1s for all caller saved registers */
16754 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
16755 
16756 /* Return a bitmask specifying which caller saved registers are
16757  * clobbered by a call to a helper *as if* this helper follows
16758  * bpf_fastcall contract:
16759  * - includes R0 if function is non-void;
16760  * - includes R1-R5 if corresponding parameter has is described
16761  *   in the function prototype.
16762  */
helper_fastcall_clobber_mask(const struct bpf_func_proto * fn)16763 static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn)
16764 {
16765 	u32 mask;
16766 	int i;
16767 
16768 	mask = 0;
16769 	if (fn->ret_type != RET_VOID)
16770 		mask |= BIT(BPF_REG_0);
16771 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i)
16772 		if (fn->arg_type[i] != ARG_DONTCARE)
16773 			mask |= BIT(BPF_REG_1 + i);
16774 	return mask;
16775 }
16776 
16777 /* True if do_misc_fixups() replaces calls to helper number 'imm',
16778  * replacement patch is presumed to follow bpf_fastcall contract
16779  * (see mark_fastcall_pattern_for_call() below).
16780  */
verifier_inlines_helper_call(struct bpf_verifier_env * env,s32 imm)16781 static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
16782 {
16783 	switch (imm) {
16784 #ifdef CONFIG_X86_64
16785 	case BPF_FUNC_get_smp_processor_id:
16786 		return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
16787 #endif
16788 	default:
16789 		return false;
16790 	}
16791 }
16792 
16793 /* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */
kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta * meta)16794 static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta)
16795 {
16796 	u32 vlen, i, mask;
16797 
16798 	vlen = btf_type_vlen(meta->func_proto);
16799 	mask = 0;
16800 	if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type)))
16801 		mask |= BIT(BPF_REG_0);
16802 	for (i = 0; i < vlen; ++i)
16803 		mask |= BIT(BPF_REG_1 + i);
16804 	return mask;
16805 }
16806 
16807 /* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */
is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta * meta)16808 static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta)
16809 {
16810 	return meta->kfunc_flags & KF_FASTCALL;
16811 }
16812 
16813 /* LLVM define a bpf_fastcall function attribute.
16814  * This attribute means that function scratches only some of
16815  * the caller saved registers defined by ABI.
16816  * For BPF the set of such registers could be defined as follows:
16817  * - R0 is scratched only if function is non-void;
16818  * - R1-R5 are scratched only if corresponding parameter type is defined
16819  *   in the function prototype.
16820  *
16821  * The contract between kernel and clang allows to simultaneously use
16822  * such functions and maintain backwards compatibility with old
16823  * kernels that don't understand bpf_fastcall calls:
16824  *
16825  * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
16826  *   registers are not scratched by the call;
16827  *
16828  * - as a post-processing step, clang visits each bpf_fastcall call and adds
16829  *   spill/fill for every live r0-r5;
16830  *
16831  * - stack offsets used for the spill/fill are allocated as lowest
16832  *   stack offsets in whole function and are not used for any other
16833  *   purposes;
16834  *
16835  * - when kernel loads a program, it looks for such patterns
16836  *   (bpf_fastcall function surrounded by spills/fills) and checks if
16837  *   spill/fill stack offsets are used exclusively in fastcall patterns;
16838  *
16839  * - if so, and if verifier or current JIT inlines the call to the
16840  *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
16841  *   spill/fill pairs;
16842  *
16843  * - when old kernel loads a program, presence of spill/fill pairs
16844  *   keeps BPF program valid, albeit slightly less efficient.
16845  *
16846  * For example:
16847  *
16848  *   r1 = 1;
16849  *   r2 = 2;
16850  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16851  *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
16852  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16853  *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
16854  *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
16855  *   r0 = r1;                            exit;
16856  *   r0 += r2;
16857  *   exit;
16858  *
16859  * The purpose of mark_fastcall_pattern_for_call is to:
16860  * - look for such patterns;
16861  * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
16862  * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
16863  * - update env->subprog_info[*]->fastcall_stack_off to find an offset
16864  *   at which bpf_fastcall spill/fill stack slots start;
16865  * - update env->subprog_info[*]->keep_fastcall_stack.
16866  *
16867  * The .fastcall_pattern and .fastcall_stack_off are used by
16868  * check_fastcall_stack_contract() to check if every stack access to
16869  * fastcall spill/fill stack slot originates from spill/fill
16870  * instructions, members of fastcall patterns.
16871  *
16872  * If such condition holds true for a subprogram, fastcall patterns could
16873  * be rewritten by remove_fastcall_spills_fills().
16874  * Otherwise bpf_fastcall patterns are not changed in the subprogram
16875  * (code, presumably, generated by an older clang version).
16876  *
16877  * For example, it is *not* safe to remove spill/fill below:
16878  *
16879  *   r1 = 1;
16880  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16881  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16882  *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
16883  *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
16884  *   r0 += r1;                           exit;
16885  *   exit;
16886  */
mark_fastcall_pattern_for_call(struct bpf_verifier_env * env,struct bpf_subprog_info * subprog,int insn_idx,s16 lowest_off)16887 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
16888 					   struct bpf_subprog_info *subprog,
16889 					   int insn_idx, s16 lowest_off)
16890 {
16891 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
16892 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
16893 	const struct bpf_func_proto *fn;
16894 	u32 clobbered_regs_mask = ALL_CALLER_SAVED_REGS;
16895 	u32 expected_regs_mask;
16896 	bool can_be_inlined = false;
16897 	s16 off;
16898 	int i;
16899 
16900 	if (bpf_helper_call(call)) {
16901 		if (get_helper_proto(env, call->imm, &fn) < 0)
16902 			/* error would be reported later */
16903 			return;
16904 		clobbered_regs_mask = helper_fastcall_clobber_mask(fn);
16905 		can_be_inlined = fn->allow_fastcall &&
16906 				 (verifier_inlines_helper_call(env, call->imm) ||
16907 				  bpf_jit_inlines_helper_call(call->imm));
16908 	}
16909 
16910 	if (bpf_pseudo_kfunc_call(call)) {
16911 		struct bpf_kfunc_call_arg_meta meta;
16912 		int err;
16913 
16914 		err = fetch_kfunc_meta(env, call, &meta, NULL);
16915 		if (err < 0)
16916 			/* error would be reported later */
16917 			return;
16918 
16919 		clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta);
16920 		can_be_inlined = is_fastcall_kfunc_call(&meta);
16921 	}
16922 
16923 	if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS)
16924 		return;
16925 
16926 	/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
16927 	expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
16928 
16929 	/* match pairs of form:
16930 	 *
16931 	 * *(u64 *)(r10 - Y) = rX   (where Y % 8 == 0)
16932 	 * ...
16933 	 * call %[to_be_inlined]
16934 	 * ...
16935 	 * rX = *(u64 *)(r10 - Y)
16936 	 */
16937 	for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
16938 		if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
16939 			break;
16940 		stx = &insns[insn_idx - i];
16941 		ldx = &insns[insn_idx + i];
16942 		/* must be a stack spill/fill pair */
16943 		if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16944 		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
16945 		    stx->dst_reg != BPF_REG_10 ||
16946 		    ldx->src_reg != BPF_REG_10)
16947 			break;
16948 		/* must be a spill/fill for the same reg */
16949 		if (stx->src_reg != ldx->dst_reg)
16950 			break;
16951 		/* must be one of the previously unseen registers */
16952 		if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
16953 			break;
16954 		/* must be a spill/fill for the same expected offset,
16955 		 * no need to check offset alignment, BPF_DW stack access
16956 		 * is always 8-byte aligned.
16957 		 */
16958 		if (stx->off != off || ldx->off != off)
16959 			break;
16960 		expected_regs_mask &= ~BIT(stx->src_reg);
16961 		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
16962 		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
16963 	}
16964 	if (i == 1)
16965 		return;
16966 
16967 	/* Conditionally set 'fastcall_spills_num' to allow forward
16968 	 * compatibility when more helper functions are marked as
16969 	 * bpf_fastcall at compile time than current kernel supports, e.g:
16970 	 *
16971 	 *   1: *(u64 *)(r10 - 8) = r1
16972 	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
16973 	 *   3: r1 = *(u64 *)(r10 - 8)
16974 	 *   4: *(u64 *)(r10 - 8) = r1
16975 	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
16976 	 *   6: r1 = *(u64 *)(r10 - 8)
16977 	 *
16978 	 * There is no need to block bpf_fastcall rewrite for such program.
16979 	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
16980 	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
16981 	 * does not remove spill/fill pair {4,6}.
16982 	 */
16983 	if (can_be_inlined)
16984 		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
16985 	else
16986 		subprog->keep_fastcall_stack = 1;
16987 	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
16988 }
16989 
mark_fastcall_patterns(struct bpf_verifier_env * env)16990 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
16991 {
16992 	struct bpf_subprog_info *subprog = env->subprog_info;
16993 	struct bpf_insn *insn;
16994 	s16 lowest_off;
16995 	int s, i;
16996 
16997 	for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
16998 		/* find lowest stack spill offset used in this subprog */
16999 		lowest_off = 0;
17000 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17001 			insn = env->prog->insnsi + i;
17002 			if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
17003 			    insn->dst_reg != BPF_REG_10)
17004 				continue;
17005 			lowest_off = min(lowest_off, insn->off);
17006 		}
17007 		/* use this offset to find fastcall patterns */
17008 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17009 			insn = env->prog->insnsi + i;
17010 			if (insn->code != (BPF_JMP | BPF_CALL))
17011 				continue;
17012 			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
17013 		}
17014 	}
17015 	return 0;
17016 }
17017 
17018 /* Visits the instruction at index t and returns one of the following:
17019  *  < 0 - an error occurred
17020  *  DONE_EXPLORING - the instruction was fully explored
17021  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
17022  */
visit_insn(int t,struct bpf_verifier_env * env)17023 static int visit_insn(int t, struct bpf_verifier_env *env)
17024 {
17025 	struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
17026 	int ret, off, insn_sz;
17027 
17028 	if (bpf_pseudo_func(insn))
17029 		return visit_func_call_insn(t, insns, env, true);
17030 
17031 	/* All non-branch instructions have a single fall-through edge. */
17032 	if (BPF_CLASS(insn->code) != BPF_JMP &&
17033 	    BPF_CLASS(insn->code) != BPF_JMP32) {
17034 		insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
17035 		return push_insn(t, t + insn_sz, FALLTHROUGH, env);
17036 	}
17037 
17038 	switch (BPF_OP(insn->code)) {
17039 	case BPF_EXIT:
17040 		return DONE_EXPLORING;
17041 
17042 	case BPF_CALL:
17043 		if (is_async_callback_calling_insn(insn))
17044 			/* Mark this call insn as a prune point to trigger
17045 			 * is_state_visited() check before call itself is
17046 			 * processed by __check_func_call(). Otherwise new
17047 			 * async state will be pushed for further exploration.
17048 			 */
17049 			mark_prune_point(env, t);
17050 		/* For functions that invoke callbacks it is not known how many times
17051 		 * callback would be called. Verifier models callback calling functions
17052 		 * by repeatedly visiting callback bodies and returning to origin call
17053 		 * instruction.
17054 		 * In order to stop such iteration verifier needs to identify when a
17055 		 * state identical some state from a previous iteration is reached.
17056 		 * Check below forces creation of checkpoint before callback calling
17057 		 * instruction to allow search for such identical states.
17058 		 */
17059 		if (is_sync_callback_calling_insn(insn)) {
17060 			mark_calls_callback(env, t);
17061 			mark_force_checkpoint(env, t);
17062 			mark_prune_point(env, t);
17063 			mark_jmp_point(env, t);
17064 		}
17065 		if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm))
17066 			mark_subprog_changes_pkt_data(env, t);
17067 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
17068 			struct bpf_kfunc_call_arg_meta meta;
17069 
17070 			ret = fetch_kfunc_meta(env, insn, &meta, NULL);
17071 			if (ret == 0 && is_iter_next_kfunc(&meta)) {
17072 				mark_prune_point(env, t);
17073 				/* Checking and saving state checkpoints at iter_next() call
17074 				 * is crucial for fast convergence of open-coded iterator loop
17075 				 * logic, so we need to force it. If we don't do that,
17076 				 * is_state_visited() might skip saving a checkpoint, causing
17077 				 * unnecessarily long sequence of not checkpointed
17078 				 * instructions and jumps, leading to exhaustion of jump
17079 				 * history buffer, and potentially other undesired outcomes.
17080 				 * It is expected that with correct open-coded iterators
17081 				 * convergence will happen quickly, so we don't run a risk of
17082 				 * exhausting memory.
17083 				 */
17084 				mark_force_checkpoint(env, t);
17085 			}
17086 		}
17087 		return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
17088 
17089 	case BPF_JA:
17090 		if (BPF_SRC(insn->code) != BPF_K)
17091 			return -EINVAL;
17092 
17093 		if (BPF_CLASS(insn->code) == BPF_JMP)
17094 			off = insn->off;
17095 		else
17096 			off = insn->imm;
17097 
17098 		/* unconditional jump with single edge */
17099 		ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
17100 		if (ret)
17101 			return ret;
17102 
17103 		mark_prune_point(env, t + off + 1);
17104 		mark_jmp_point(env, t + off + 1);
17105 
17106 		return ret;
17107 
17108 	default:
17109 		/* conditional jump with two edges */
17110 		mark_prune_point(env, t);
17111 		if (is_may_goto_insn(insn))
17112 			mark_force_checkpoint(env, t);
17113 
17114 		ret = push_insn(t, t + 1, FALLTHROUGH, env);
17115 		if (ret)
17116 			return ret;
17117 
17118 		return push_insn(t, t + insn->off + 1, BRANCH, env);
17119 	}
17120 }
17121 
17122 /* non-recursive depth-first-search to detect loops in BPF program
17123  * loop == back-edge in directed graph
17124  */
check_cfg(struct bpf_verifier_env * env)17125 static int check_cfg(struct bpf_verifier_env *env)
17126 {
17127 	int insn_cnt = env->prog->len;
17128 	int *insn_stack, *insn_state;
17129 	int ex_insn_beg, i, ret = 0;
17130 	bool ex_done = false;
17131 
17132 	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
17133 	if (!insn_state)
17134 		return -ENOMEM;
17135 
17136 	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
17137 	if (!insn_stack) {
17138 		kvfree(insn_state);
17139 		return -ENOMEM;
17140 	}
17141 
17142 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
17143 	insn_stack[0] = 0; /* 0 is the first instruction */
17144 	env->cfg.cur_stack = 1;
17145 
17146 walk_cfg:
17147 	while (env->cfg.cur_stack > 0) {
17148 		int t = insn_stack[env->cfg.cur_stack - 1];
17149 
17150 		ret = visit_insn(t, env);
17151 		switch (ret) {
17152 		case DONE_EXPLORING:
17153 			insn_state[t] = EXPLORED;
17154 			env->cfg.cur_stack--;
17155 			break;
17156 		case KEEP_EXPLORING:
17157 			break;
17158 		default:
17159 			if (ret > 0) {
17160 				verbose(env, "visit_insn internal bug\n");
17161 				ret = -EFAULT;
17162 			}
17163 			goto err_free;
17164 		}
17165 	}
17166 
17167 	if (env->cfg.cur_stack < 0) {
17168 		verbose(env, "pop stack internal bug\n");
17169 		ret = -EFAULT;
17170 		goto err_free;
17171 	}
17172 
17173 	if (env->exception_callback_subprog && !ex_done) {
17174 		ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start;
17175 
17176 		insn_state[ex_insn_beg] = DISCOVERED;
17177 		insn_stack[0] = ex_insn_beg;
17178 		env->cfg.cur_stack = 1;
17179 		ex_done = true;
17180 		goto walk_cfg;
17181 	}
17182 
17183 	for (i = 0; i < insn_cnt; i++) {
17184 		struct bpf_insn *insn = &env->prog->insnsi[i];
17185 
17186 		if (insn_state[i] != EXPLORED) {
17187 			verbose(env, "unreachable insn %d\n", i);
17188 			ret = -EINVAL;
17189 			goto err_free;
17190 		}
17191 		if (bpf_is_ldimm64(insn)) {
17192 			if (insn_state[i + 1] != 0) {
17193 				verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
17194 				ret = -EINVAL;
17195 				goto err_free;
17196 			}
17197 			i++; /* skip second half of ldimm64 */
17198 		}
17199 	}
17200 	ret = 0; /* cfg looks good */
17201 	env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
17202 
17203 err_free:
17204 	kvfree(insn_state);
17205 	kvfree(insn_stack);
17206 	env->cfg.insn_state = env->cfg.insn_stack = NULL;
17207 	return ret;
17208 }
17209 
check_abnormal_return(struct bpf_verifier_env * env)17210 static int check_abnormal_return(struct bpf_verifier_env *env)
17211 {
17212 	int i;
17213 
17214 	for (i = 1; i < env->subprog_cnt; i++) {
17215 		if (env->subprog_info[i].has_ld_abs) {
17216 			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
17217 			return -EINVAL;
17218 		}
17219 		if (env->subprog_info[i].has_tail_call) {
17220 			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
17221 			return -EINVAL;
17222 		}
17223 	}
17224 	return 0;
17225 }
17226 
17227 /* The minimum supported BTF func info size */
17228 #define MIN_BPF_FUNCINFO_SIZE	8
17229 #define MAX_FUNCINFO_REC_SIZE	252
17230 
check_btf_func_early(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17231 static int check_btf_func_early(struct bpf_verifier_env *env,
17232 				const union bpf_attr *attr,
17233 				bpfptr_t uattr)
17234 {
17235 	u32 krec_size = sizeof(struct bpf_func_info);
17236 	const struct btf_type *type, *func_proto;
17237 	u32 i, nfuncs, urec_size, min_size;
17238 	struct bpf_func_info *krecord;
17239 	struct bpf_prog *prog;
17240 	const struct btf *btf;
17241 	u32 prev_offset = 0;
17242 	bpfptr_t urecord;
17243 	int ret = -ENOMEM;
17244 
17245 	nfuncs = attr->func_info_cnt;
17246 	if (!nfuncs) {
17247 		if (check_abnormal_return(env))
17248 			return -EINVAL;
17249 		return 0;
17250 	}
17251 
17252 	urec_size = attr->func_info_rec_size;
17253 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
17254 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
17255 	    urec_size % sizeof(u32)) {
17256 		verbose(env, "invalid func info rec size %u\n", urec_size);
17257 		return -EINVAL;
17258 	}
17259 
17260 	prog = env->prog;
17261 	btf = prog->aux->btf;
17262 
17263 	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
17264 	min_size = min_t(u32, krec_size, urec_size);
17265 
17266 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
17267 	if (!krecord)
17268 		return -ENOMEM;
17269 
17270 	for (i = 0; i < nfuncs; i++) {
17271 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
17272 		if (ret) {
17273 			if (ret == -E2BIG) {
17274 				verbose(env, "nonzero tailing record in func info");
17275 				/* set the size kernel expects so loader can zero
17276 				 * out the rest of the record.
17277 				 */
17278 				if (copy_to_bpfptr_offset(uattr,
17279 							  offsetof(union bpf_attr, func_info_rec_size),
17280 							  &min_size, sizeof(min_size)))
17281 					ret = -EFAULT;
17282 			}
17283 			goto err_free;
17284 		}
17285 
17286 		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
17287 			ret = -EFAULT;
17288 			goto err_free;
17289 		}
17290 
17291 		/* check insn_off */
17292 		ret = -EINVAL;
17293 		if (i == 0) {
17294 			if (krecord[i].insn_off) {
17295 				verbose(env,
17296 					"nonzero insn_off %u for the first func info record",
17297 					krecord[i].insn_off);
17298 				goto err_free;
17299 			}
17300 		} else if (krecord[i].insn_off <= prev_offset) {
17301 			verbose(env,
17302 				"same or smaller insn offset (%u) than previous func info record (%u)",
17303 				krecord[i].insn_off, prev_offset);
17304 			goto err_free;
17305 		}
17306 
17307 		/* check type_id */
17308 		type = btf_type_by_id(btf, krecord[i].type_id);
17309 		if (!type || !btf_type_is_func(type)) {
17310 			verbose(env, "invalid type id %d in func info",
17311 				krecord[i].type_id);
17312 			goto err_free;
17313 		}
17314 
17315 		func_proto = btf_type_by_id(btf, type->type);
17316 		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
17317 			/* btf_func_check() already verified it during BTF load */
17318 			goto err_free;
17319 
17320 		prev_offset = krecord[i].insn_off;
17321 		bpfptr_add(&urecord, urec_size);
17322 	}
17323 
17324 	prog->aux->func_info = krecord;
17325 	prog->aux->func_info_cnt = nfuncs;
17326 	return 0;
17327 
17328 err_free:
17329 	kvfree(krecord);
17330 	return ret;
17331 }
17332 
check_btf_func(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17333 static int check_btf_func(struct bpf_verifier_env *env,
17334 			  const union bpf_attr *attr,
17335 			  bpfptr_t uattr)
17336 {
17337 	const struct btf_type *type, *func_proto, *ret_type;
17338 	u32 i, nfuncs, urec_size;
17339 	struct bpf_func_info *krecord;
17340 	struct bpf_func_info_aux *info_aux = NULL;
17341 	struct bpf_prog *prog;
17342 	const struct btf *btf;
17343 	bpfptr_t urecord;
17344 	bool scalar_return;
17345 	int ret = -ENOMEM;
17346 
17347 	nfuncs = attr->func_info_cnt;
17348 	if (!nfuncs) {
17349 		if (check_abnormal_return(env))
17350 			return -EINVAL;
17351 		return 0;
17352 	}
17353 	if (nfuncs != env->subprog_cnt) {
17354 		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
17355 		return -EINVAL;
17356 	}
17357 
17358 	urec_size = attr->func_info_rec_size;
17359 
17360 	prog = env->prog;
17361 	btf = prog->aux->btf;
17362 
17363 	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
17364 
17365 	krecord = prog->aux->func_info;
17366 	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
17367 	if (!info_aux)
17368 		return -ENOMEM;
17369 
17370 	for (i = 0; i < nfuncs; i++) {
17371 		/* check insn_off */
17372 		ret = -EINVAL;
17373 
17374 		if (env->subprog_info[i].start != krecord[i].insn_off) {
17375 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
17376 			goto err_free;
17377 		}
17378 
17379 		/* Already checked type_id */
17380 		type = btf_type_by_id(btf, krecord[i].type_id);
17381 		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
17382 		/* Already checked func_proto */
17383 		func_proto = btf_type_by_id(btf, type->type);
17384 
17385 		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
17386 		scalar_return =
17387 			btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
17388 		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
17389 			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
17390 			goto err_free;
17391 		}
17392 		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
17393 			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
17394 			goto err_free;
17395 		}
17396 
17397 		bpfptr_add(&urecord, urec_size);
17398 	}
17399 
17400 	prog->aux->func_info_aux = info_aux;
17401 	return 0;
17402 
17403 err_free:
17404 	kfree(info_aux);
17405 	return ret;
17406 }
17407 
adjust_btf_func(struct bpf_verifier_env * env)17408 static void adjust_btf_func(struct bpf_verifier_env *env)
17409 {
17410 	struct bpf_prog_aux *aux = env->prog->aux;
17411 	int i;
17412 
17413 	if (!aux->func_info)
17414 		return;
17415 
17416 	/* func_info is not available for hidden subprogs */
17417 	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
17418 		aux->func_info[i].insn_off = env->subprog_info[i].start;
17419 }
17420 
17421 #define MIN_BPF_LINEINFO_SIZE	offsetofend(struct bpf_line_info, line_col)
17422 #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
17423 
check_btf_line(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17424 static int check_btf_line(struct bpf_verifier_env *env,
17425 			  const union bpf_attr *attr,
17426 			  bpfptr_t uattr)
17427 {
17428 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
17429 	struct bpf_subprog_info *sub;
17430 	struct bpf_line_info *linfo;
17431 	struct bpf_prog *prog;
17432 	const struct btf *btf;
17433 	bpfptr_t ulinfo;
17434 	int err;
17435 
17436 	nr_linfo = attr->line_info_cnt;
17437 	if (!nr_linfo)
17438 		return 0;
17439 	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
17440 		return -EINVAL;
17441 
17442 	rec_size = attr->line_info_rec_size;
17443 	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
17444 	    rec_size > MAX_LINEINFO_REC_SIZE ||
17445 	    rec_size & (sizeof(u32) - 1))
17446 		return -EINVAL;
17447 
17448 	/* Need to zero it in case the userspace may
17449 	 * pass in a smaller bpf_line_info object.
17450 	 */
17451 	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
17452 			 GFP_KERNEL | __GFP_NOWARN);
17453 	if (!linfo)
17454 		return -ENOMEM;
17455 
17456 	prog = env->prog;
17457 	btf = prog->aux->btf;
17458 
17459 	s = 0;
17460 	sub = env->subprog_info;
17461 	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
17462 	expected_size = sizeof(struct bpf_line_info);
17463 	ncopy = min_t(u32, expected_size, rec_size);
17464 	for (i = 0; i < nr_linfo; i++) {
17465 		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
17466 		if (err) {
17467 			if (err == -E2BIG) {
17468 				verbose(env, "nonzero tailing record in line_info");
17469 				if (copy_to_bpfptr_offset(uattr,
17470 							  offsetof(union bpf_attr, line_info_rec_size),
17471 							  &expected_size, sizeof(expected_size)))
17472 					err = -EFAULT;
17473 			}
17474 			goto err_free;
17475 		}
17476 
17477 		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
17478 			err = -EFAULT;
17479 			goto err_free;
17480 		}
17481 
17482 		/*
17483 		 * Check insn_off to ensure
17484 		 * 1) strictly increasing AND
17485 		 * 2) bounded by prog->len
17486 		 *
17487 		 * The linfo[0].insn_off == 0 check logically falls into
17488 		 * the later "missing bpf_line_info for func..." case
17489 		 * because the first linfo[0].insn_off must be the
17490 		 * first sub also and the first sub must have
17491 		 * subprog_info[0].start == 0.
17492 		 */
17493 		if ((i && linfo[i].insn_off <= prev_offset) ||
17494 		    linfo[i].insn_off >= prog->len) {
17495 			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
17496 				i, linfo[i].insn_off, prev_offset,
17497 				prog->len);
17498 			err = -EINVAL;
17499 			goto err_free;
17500 		}
17501 
17502 		if (!prog->insnsi[linfo[i].insn_off].code) {
17503 			verbose(env,
17504 				"Invalid insn code at line_info[%u].insn_off\n",
17505 				i);
17506 			err = -EINVAL;
17507 			goto err_free;
17508 		}
17509 
17510 		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
17511 		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
17512 			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
17513 			err = -EINVAL;
17514 			goto err_free;
17515 		}
17516 
17517 		if (s != env->subprog_cnt) {
17518 			if (linfo[i].insn_off == sub[s].start) {
17519 				sub[s].linfo_idx = i;
17520 				s++;
17521 			} else if (sub[s].start < linfo[i].insn_off) {
17522 				verbose(env, "missing bpf_line_info for func#%u\n", s);
17523 				err = -EINVAL;
17524 				goto err_free;
17525 			}
17526 		}
17527 
17528 		prev_offset = linfo[i].insn_off;
17529 		bpfptr_add(&ulinfo, rec_size);
17530 	}
17531 
17532 	if (s != env->subprog_cnt) {
17533 		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
17534 			env->subprog_cnt - s, s);
17535 		err = -EINVAL;
17536 		goto err_free;
17537 	}
17538 
17539 	prog->aux->linfo = linfo;
17540 	prog->aux->nr_linfo = nr_linfo;
17541 
17542 	return 0;
17543 
17544 err_free:
17545 	kvfree(linfo);
17546 	return err;
17547 }
17548 
17549 #define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
17550 #define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
17551 
check_core_relo(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17552 static int check_core_relo(struct bpf_verifier_env *env,
17553 			   const union bpf_attr *attr,
17554 			   bpfptr_t uattr)
17555 {
17556 	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
17557 	struct bpf_core_relo core_relo = {};
17558 	struct bpf_prog *prog = env->prog;
17559 	const struct btf *btf = prog->aux->btf;
17560 	struct bpf_core_ctx ctx = {
17561 		.log = &env->log,
17562 		.btf = btf,
17563 	};
17564 	bpfptr_t u_core_relo;
17565 	int err;
17566 
17567 	nr_core_relo = attr->core_relo_cnt;
17568 	if (!nr_core_relo)
17569 		return 0;
17570 	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
17571 		return -EINVAL;
17572 
17573 	rec_size = attr->core_relo_rec_size;
17574 	if (rec_size < MIN_CORE_RELO_SIZE ||
17575 	    rec_size > MAX_CORE_RELO_SIZE ||
17576 	    rec_size % sizeof(u32))
17577 		return -EINVAL;
17578 
17579 	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
17580 	expected_size = sizeof(struct bpf_core_relo);
17581 	ncopy = min_t(u32, expected_size, rec_size);
17582 
17583 	/* Unlike func_info and line_info, copy and apply each CO-RE
17584 	 * relocation record one at a time.
17585 	 */
17586 	for (i = 0; i < nr_core_relo; i++) {
17587 		/* future proofing when sizeof(bpf_core_relo) changes */
17588 		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
17589 		if (err) {
17590 			if (err == -E2BIG) {
17591 				verbose(env, "nonzero tailing record in core_relo");
17592 				if (copy_to_bpfptr_offset(uattr,
17593 							  offsetof(union bpf_attr, core_relo_rec_size),
17594 							  &expected_size, sizeof(expected_size)))
17595 					err = -EFAULT;
17596 			}
17597 			break;
17598 		}
17599 
17600 		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
17601 			err = -EFAULT;
17602 			break;
17603 		}
17604 
17605 		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
17606 			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
17607 				i, core_relo.insn_off, prog->len);
17608 			err = -EINVAL;
17609 			break;
17610 		}
17611 
17612 		err = bpf_core_apply(&ctx, &core_relo, i,
17613 				     &prog->insnsi[core_relo.insn_off / 8]);
17614 		if (err)
17615 			break;
17616 		bpfptr_add(&u_core_relo, rec_size);
17617 	}
17618 	return err;
17619 }
17620 
check_btf_info_early(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17621 static int check_btf_info_early(struct bpf_verifier_env *env,
17622 				const union bpf_attr *attr,
17623 				bpfptr_t uattr)
17624 {
17625 	struct btf *btf;
17626 	int err;
17627 
17628 	if (!attr->func_info_cnt && !attr->line_info_cnt) {
17629 		if (check_abnormal_return(env))
17630 			return -EINVAL;
17631 		return 0;
17632 	}
17633 
17634 	btf = btf_get_by_fd(attr->prog_btf_fd);
17635 	if (IS_ERR(btf))
17636 		return PTR_ERR(btf);
17637 	if (btf_is_kernel(btf)) {
17638 		btf_put(btf);
17639 		return -EACCES;
17640 	}
17641 	env->prog->aux->btf = btf;
17642 
17643 	err = check_btf_func_early(env, attr, uattr);
17644 	if (err)
17645 		return err;
17646 	return 0;
17647 }
17648 
check_btf_info(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17649 static int check_btf_info(struct bpf_verifier_env *env,
17650 			  const union bpf_attr *attr,
17651 			  bpfptr_t uattr)
17652 {
17653 	int err;
17654 
17655 	if (!attr->func_info_cnt && !attr->line_info_cnt) {
17656 		if (check_abnormal_return(env))
17657 			return -EINVAL;
17658 		return 0;
17659 	}
17660 
17661 	err = check_btf_func(env, attr, uattr);
17662 	if (err)
17663 		return err;
17664 
17665 	err = check_btf_line(env, attr, uattr);
17666 	if (err)
17667 		return err;
17668 
17669 	err = check_core_relo(env, attr, uattr);
17670 	if (err)
17671 		return err;
17672 
17673 	return 0;
17674 }
17675 
17676 /* check %cur's range satisfies %old's */
range_within(const struct bpf_reg_state * old,const struct bpf_reg_state * cur)17677 static bool range_within(const struct bpf_reg_state *old,
17678 			 const struct bpf_reg_state *cur)
17679 {
17680 	return old->umin_value <= cur->umin_value &&
17681 	       old->umax_value >= cur->umax_value &&
17682 	       old->smin_value <= cur->smin_value &&
17683 	       old->smax_value >= cur->smax_value &&
17684 	       old->u32_min_value <= cur->u32_min_value &&
17685 	       old->u32_max_value >= cur->u32_max_value &&
17686 	       old->s32_min_value <= cur->s32_min_value &&
17687 	       old->s32_max_value >= cur->s32_max_value;
17688 }
17689 
17690 /* If in the old state two registers had the same id, then they need to have
17691  * the same id in the new state as well.  But that id could be different from
17692  * the old state, so we need to track the mapping from old to new ids.
17693  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
17694  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
17695  * regs with a different old id could still have new id 9, we don't care about
17696  * that.
17697  * So we look through our idmap to see if this old id has been seen before.  If
17698  * so, we require the new id to match; otherwise, we add the id pair to the map.
17699  */
check_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)17700 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
17701 {
17702 	struct bpf_id_pair *map = idmap->map;
17703 	unsigned int i;
17704 
17705 	/* either both IDs should be set or both should be zero */
17706 	if (!!old_id != !!cur_id)
17707 		return false;
17708 
17709 	if (old_id == 0) /* cur_id == 0 as well */
17710 		return true;
17711 
17712 	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
17713 		if (!map[i].old) {
17714 			/* Reached an empty slot; haven't seen this id before */
17715 			map[i].old = old_id;
17716 			map[i].cur = cur_id;
17717 			return true;
17718 		}
17719 		if (map[i].old == old_id)
17720 			return map[i].cur == cur_id;
17721 		if (map[i].cur == cur_id)
17722 			return false;
17723 	}
17724 	/* We ran out of idmap slots, which should be impossible */
17725 	WARN_ON_ONCE(1);
17726 	return false;
17727 }
17728 
17729 /* Similar to check_ids(), but allocate a unique temporary ID
17730  * for 'old_id' or 'cur_id' of zero.
17731  * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
17732  */
check_scalar_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)17733 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
17734 {
17735 	old_id = old_id ? old_id : ++idmap->tmp_id_gen;
17736 	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
17737 
17738 	return check_ids(old_id, cur_id, idmap);
17739 }
17740 
clean_func_state(struct bpf_verifier_env * env,struct bpf_func_state * st)17741 static void clean_func_state(struct bpf_verifier_env *env,
17742 			     struct bpf_func_state *st)
17743 {
17744 	enum bpf_reg_liveness live;
17745 	int i, j;
17746 
17747 	for (i = 0; i < BPF_REG_FP; i++) {
17748 		live = st->regs[i].live;
17749 		/* liveness must not touch this register anymore */
17750 		st->regs[i].live |= REG_LIVE_DONE;
17751 		if (!(live & REG_LIVE_READ))
17752 			/* since the register is unused, clear its state
17753 			 * to make further comparison simpler
17754 			 */
17755 			__mark_reg_not_init(env, &st->regs[i]);
17756 	}
17757 
17758 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
17759 		live = st->stack[i].spilled_ptr.live;
17760 		/* liveness must not touch this stack slot anymore */
17761 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
17762 		if (!(live & REG_LIVE_READ)) {
17763 			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
17764 			for (j = 0; j < BPF_REG_SIZE; j++)
17765 				st->stack[i].slot_type[j] = STACK_INVALID;
17766 		}
17767 	}
17768 }
17769 
clean_verifier_state(struct bpf_verifier_env * env,struct bpf_verifier_state * st)17770 static void clean_verifier_state(struct bpf_verifier_env *env,
17771 				 struct bpf_verifier_state *st)
17772 {
17773 	int i;
17774 
17775 	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
17776 		/* all regs in this state in all frames were already marked */
17777 		return;
17778 
17779 	for (i = 0; i <= st->curframe; i++)
17780 		clean_func_state(env, st->frame[i]);
17781 }
17782 
17783 /* the parentage chains form a tree.
17784  * the verifier states are added to state lists at given insn and
17785  * pushed into state stack for future exploration.
17786  * when the verifier reaches bpf_exit insn some of the verifer states
17787  * stored in the state lists have their final liveness state already,
17788  * but a lot of states will get revised from liveness point of view when
17789  * the verifier explores other branches.
17790  * Example:
17791  * 1: r0 = 1
17792  * 2: if r1 == 100 goto pc+1
17793  * 3: r0 = 2
17794  * 4: exit
17795  * when the verifier reaches exit insn the register r0 in the state list of
17796  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
17797  * of insn 2 and goes exploring further. At the insn 4 it will walk the
17798  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
17799  *
17800  * Since the verifier pushes the branch states as it sees them while exploring
17801  * the program the condition of walking the branch instruction for the second
17802  * time means that all states below this branch were already explored and
17803  * their final liveness marks are already propagated.
17804  * Hence when the verifier completes the search of state list in is_state_visited()
17805  * we can call this clean_live_states() function to mark all liveness states
17806  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
17807  * will not be used.
17808  * This function also clears the registers and stack for states that !READ
17809  * to simplify state merging.
17810  *
17811  * Important note here that walking the same branch instruction in the callee
17812  * doesn't meant that the states are DONE. The verifier has to compare
17813  * the callsites
17814  */
clean_live_states(struct bpf_verifier_env * env,int insn,struct bpf_verifier_state * cur)17815 static void clean_live_states(struct bpf_verifier_env *env, int insn,
17816 			      struct bpf_verifier_state *cur)
17817 {
17818 	struct bpf_verifier_state_list *sl;
17819 
17820 	sl = *explored_state(env, insn);
17821 	while (sl) {
17822 		if (sl->state.branches)
17823 			goto next;
17824 		if (sl->state.insn_idx != insn ||
17825 		    !same_callsites(&sl->state, cur))
17826 			goto next;
17827 		clean_verifier_state(env, &sl->state);
17828 next:
17829 		sl = sl->next;
17830 	}
17831 }
17832 
regs_exact(const struct bpf_reg_state * rold,const struct bpf_reg_state * rcur,struct bpf_idmap * idmap)17833 static bool regs_exact(const struct bpf_reg_state *rold,
17834 		       const struct bpf_reg_state *rcur,
17835 		       struct bpf_idmap *idmap)
17836 {
17837 	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
17838 	       check_ids(rold->id, rcur->id, idmap) &&
17839 	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
17840 }
17841 
17842 enum exact_level {
17843 	NOT_EXACT,
17844 	EXACT,
17845 	RANGE_WITHIN
17846 };
17847 
17848 /* Returns true if (rold safe implies rcur safe) */
regsafe(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur,struct bpf_idmap * idmap,enum exact_level exact)17849 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
17850 		    struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
17851 		    enum exact_level exact)
17852 {
17853 	if (exact == EXACT)
17854 		return regs_exact(rold, rcur, idmap);
17855 
17856 	if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT)
17857 		/* explored state didn't use this */
17858 		return true;
17859 	if (rold->type == NOT_INIT) {
17860 		if (exact == NOT_EXACT || rcur->type == NOT_INIT)
17861 			/* explored state can't have used this */
17862 			return true;
17863 	}
17864 
17865 	/* Enforce that register types have to match exactly, including their
17866 	 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
17867 	 * rule.
17868 	 *
17869 	 * One can make a point that using a pointer register as unbounded
17870 	 * SCALAR would be technically acceptable, but this could lead to
17871 	 * pointer leaks because scalars are allowed to leak while pointers
17872 	 * are not. We could make this safe in special cases if root is
17873 	 * calling us, but it's probably not worth the hassle.
17874 	 *
17875 	 * Also, register types that are *not* MAYBE_NULL could technically be
17876 	 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
17877 	 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
17878 	 * to the same map).
17879 	 * However, if the old MAYBE_NULL register then got NULL checked,
17880 	 * doing so could have affected others with the same id, and we can't
17881 	 * check for that because we lost the id when we converted to
17882 	 * a non-MAYBE_NULL variant.
17883 	 * So, as a general rule we don't allow mixing MAYBE_NULL and
17884 	 * non-MAYBE_NULL registers as well.
17885 	 */
17886 	if (rold->type != rcur->type)
17887 		return false;
17888 
17889 	switch (base_type(rold->type)) {
17890 	case SCALAR_VALUE:
17891 		if (env->explore_alu_limits) {
17892 			/* explore_alu_limits disables tnum_in() and range_within()
17893 			 * logic and requires everything to be strict
17894 			 */
17895 			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
17896 			       check_scalar_ids(rold->id, rcur->id, idmap);
17897 		}
17898 		if (!rold->precise && exact == NOT_EXACT)
17899 			return true;
17900 		if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
17901 			return false;
17902 		if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off))
17903 			return false;
17904 		/* Why check_ids() for scalar registers?
17905 		 *
17906 		 * Consider the following BPF code:
17907 		 *   1: r6 = ... unbound scalar, ID=a ...
17908 		 *   2: r7 = ... unbound scalar, ID=b ...
17909 		 *   3: if (r6 > r7) goto +1
17910 		 *   4: r6 = r7
17911 		 *   5: if (r6 > X) goto ...
17912 		 *   6: ... memory operation using r7 ...
17913 		 *
17914 		 * First verification path is [1-6]:
17915 		 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
17916 		 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
17917 		 *   r7 <= X, because r6 and r7 share same id.
17918 		 * Next verification path is [1-4, 6].
17919 		 *
17920 		 * Instruction (6) would be reached in two states:
17921 		 *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
17922 		 *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
17923 		 *
17924 		 * Use check_ids() to distinguish these states.
17925 		 * ---
17926 		 * Also verify that new value satisfies old value range knowledge.
17927 		 */
17928 		return range_within(rold, rcur) &&
17929 		       tnum_in(rold->var_off, rcur->var_off) &&
17930 		       check_scalar_ids(rold->id, rcur->id, idmap);
17931 	case PTR_TO_MAP_KEY:
17932 	case PTR_TO_MAP_VALUE:
17933 	case PTR_TO_MEM:
17934 	case PTR_TO_BUF:
17935 	case PTR_TO_TP_BUFFER:
17936 		/* If the new min/max/var_off satisfy the old ones and
17937 		 * everything else matches, we are OK.
17938 		 */
17939 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
17940 		       range_within(rold, rcur) &&
17941 		       tnum_in(rold->var_off, rcur->var_off) &&
17942 		       check_ids(rold->id, rcur->id, idmap) &&
17943 		       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
17944 	case PTR_TO_PACKET_META:
17945 	case PTR_TO_PACKET:
17946 		/* We must have at least as much range as the old ptr
17947 		 * did, so that any accesses which were safe before are
17948 		 * still safe.  This is true even if old range < old off,
17949 		 * since someone could have accessed through (ptr - k), or
17950 		 * even done ptr -= k in a register, to get a safe access.
17951 		 */
17952 		if (rold->range > rcur->range)
17953 			return false;
17954 		/* If the offsets don't match, we can't trust our alignment;
17955 		 * nor can we be sure that we won't fall out of range.
17956 		 */
17957 		if (rold->off != rcur->off)
17958 			return false;
17959 		/* id relations must be preserved */
17960 		if (!check_ids(rold->id, rcur->id, idmap))
17961 			return false;
17962 		/* new val must satisfy old val knowledge */
17963 		return range_within(rold, rcur) &&
17964 		       tnum_in(rold->var_off, rcur->var_off);
17965 	case PTR_TO_STACK:
17966 		/* two stack pointers are equal only if they're pointing to
17967 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
17968 		 */
17969 		return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
17970 	case PTR_TO_ARENA:
17971 		return true;
17972 	default:
17973 		return regs_exact(rold, rcur, idmap);
17974 	}
17975 }
17976 
17977 static struct bpf_reg_state unbound_reg;
17978 
unbound_reg_init(void)17979 static __init int unbound_reg_init(void)
17980 {
17981 	__mark_reg_unknown_imprecise(&unbound_reg);
17982 	unbound_reg.live |= REG_LIVE_READ;
17983 	return 0;
17984 }
17985 late_initcall(unbound_reg_init);
17986 
is_stack_all_misc(struct bpf_verifier_env * env,struct bpf_stack_state * stack)17987 static bool is_stack_all_misc(struct bpf_verifier_env *env,
17988 			      struct bpf_stack_state *stack)
17989 {
17990 	u32 i;
17991 
17992 	for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) {
17993 		if ((stack->slot_type[i] == STACK_MISC) ||
17994 		    (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
17995 			continue;
17996 		return false;
17997 	}
17998 
17999 	return true;
18000 }
18001 
scalar_reg_for_stack(struct bpf_verifier_env * env,struct bpf_stack_state * stack)18002 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
18003 						  struct bpf_stack_state *stack)
18004 {
18005 	if (is_spilled_scalar_reg64(stack))
18006 		return &stack->spilled_ptr;
18007 
18008 	if (is_stack_all_misc(env, stack))
18009 		return &unbound_reg;
18010 
18011 	return NULL;
18012 }
18013 
stacksafe(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,struct bpf_idmap * idmap,enum exact_level exact)18014 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
18015 		      struct bpf_func_state *cur, struct bpf_idmap *idmap,
18016 		      enum exact_level exact)
18017 {
18018 	int i, spi;
18019 
18020 	/* walk slots of the explored stack and ignore any additional
18021 	 * slots in the current stack, since explored(safe) state
18022 	 * didn't use them
18023 	 */
18024 	for (i = 0; i < old->allocated_stack; i++) {
18025 		struct bpf_reg_state *old_reg, *cur_reg;
18026 
18027 		spi = i / BPF_REG_SIZE;
18028 
18029 		if (exact != NOT_EXACT &&
18030 		    (i >= cur->allocated_stack ||
18031 		     old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
18032 		     cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
18033 			return false;
18034 
18035 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
18036 		    && exact == NOT_EXACT) {
18037 			i += BPF_REG_SIZE - 1;
18038 			/* explored state didn't use this */
18039 			continue;
18040 		}
18041 
18042 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
18043 			continue;
18044 
18045 		if (env->allow_uninit_stack &&
18046 		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
18047 			continue;
18048 
18049 		/* explored stack has more populated slots than current stack
18050 		 * and these slots were used
18051 		 */
18052 		if (i >= cur->allocated_stack)
18053 			return false;
18054 
18055 		/* 64-bit scalar spill vs all slots MISC and vice versa.
18056 		 * Load from all slots MISC produces unbound scalar.
18057 		 * Construct a fake register for such stack and call
18058 		 * regsafe() to ensure scalar ids are compared.
18059 		 */
18060 		old_reg = scalar_reg_for_stack(env, &old->stack[spi]);
18061 		cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]);
18062 		if (old_reg && cur_reg) {
18063 			if (!regsafe(env, old_reg, cur_reg, idmap, exact))
18064 				return false;
18065 			i += BPF_REG_SIZE - 1;
18066 			continue;
18067 		}
18068 
18069 		/* if old state was safe with misc data in the stack
18070 		 * it will be safe with zero-initialized stack.
18071 		 * The opposite is not true
18072 		 */
18073 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
18074 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
18075 			continue;
18076 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
18077 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
18078 			/* Ex: old explored (safe) state has STACK_SPILL in
18079 			 * this stack slot, but current has STACK_MISC ->
18080 			 * this verifier states are not equivalent,
18081 			 * return false to continue verification of this path
18082 			 */
18083 			return false;
18084 		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
18085 			continue;
18086 		/* Both old and cur are having same slot_type */
18087 		switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
18088 		case STACK_SPILL:
18089 			/* when explored and current stack slot are both storing
18090 			 * spilled registers, check that stored pointers types
18091 			 * are the same as well.
18092 			 * Ex: explored safe path could have stored
18093 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
18094 			 * but current path has stored:
18095 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
18096 			 * such verifier states are not equivalent.
18097 			 * return false to continue verification of this path
18098 			 */
18099 			if (!regsafe(env, &old->stack[spi].spilled_ptr,
18100 				     &cur->stack[spi].spilled_ptr, idmap, exact))
18101 				return false;
18102 			break;
18103 		case STACK_DYNPTR:
18104 			old_reg = &old->stack[spi].spilled_ptr;
18105 			cur_reg = &cur->stack[spi].spilled_ptr;
18106 			if (old_reg->dynptr.type != cur_reg->dynptr.type ||
18107 			    old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
18108 			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
18109 				return false;
18110 			break;
18111 		case STACK_ITER:
18112 			old_reg = &old->stack[spi].spilled_ptr;
18113 			cur_reg = &cur->stack[spi].spilled_ptr;
18114 			/* iter.depth is not compared between states as it
18115 			 * doesn't matter for correctness and would otherwise
18116 			 * prevent convergence; we maintain it only to prevent
18117 			 * infinite loop check triggering, see
18118 			 * iter_active_depths_differ()
18119 			 */
18120 			if (old_reg->iter.btf != cur_reg->iter.btf ||
18121 			    old_reg->iter.btf_id != cur_reg->iter.btf_id ||
18122 			    old_reg->iter.state != cur_reg->iter.state ||
18123 			    /* ignore {old_reg,cur_reg}->iter.depth, see above */
18124 			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
18125 				return false;
18126 			break;
18127 		case STACK_IRQ_FLAG:
18128 			old_reg = &old->stack[spi].spilled_ptr;
18129 			cur_reg = &cur->stack[spi].spilled_ptr;
18130 			if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
18131 				return false;
18132 			break;
18133 		case STACK_MISC:
18134 		case STACK_ZERO:
18135 		case STACK_INVALID:
18136 			continue;
18137 		/* Ensure that new unhandled slot types return false by default */
18138 		default:
18139 			return false;
18140 		}
18141 	}
18142 	return true;
18143 }
18144 
refsafe(struct bpf_verifier_state * old,struct bpf_verifier_state * cur,struct bpf_idmap * idmap)18145 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
18146 		    struct bpf_idmap *idmap)
18147 {
18148 	int i;
18149 
18150 	if (old->acquired_refs != cur->acquired_refs)
18151 		return false;
18152 
18153 	if (old->active_locks != cur->active_locks)
18154 		return false;
18155 
18156 	if (old->active_preempt_locks != cur->active_preempt_locks)
18157 		return false;
18158 
18159 	if (old->active_rcu_lock != cur->active_rcu_lock)
18160 		return false;
18161 
18162 	if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
18163 		return false;
18164 
18165 	for (i = 0; i < old->acquired_refs; i++) {
18166 		if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
18167 		    old->refs[i].type != cur->refs[i].type)
18168 			return false;
18169 		switch (old->refs[i].type) {
18170 		case REF_TYPE_PTR:
18171 		case REF_TYPE_IRQ:
18172 			break;
18173 		case REF_TYPE_LOCK:
18174 			if (old->refs[i].ptr != cur->refs[i].ptr)
18175 				return false;
18176 			break;
18177 		default:
18178 			WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
18179 			return false;
18180 		}
18181 	}
18182 
18183 	return true;
18184 }
18185 
18186 /* compare two verifier states
18187  *
18188  * all states stored in state_list are known to be valid, since
18189  * verifier reached 'bpf_exit' instruction through them
18190  *
18191  * this function is called when verifier exploring different branches of
18192  * execution popped from the state stack. If it sees an old state that has
18193  * more strict register state and more strict stack state then this execution
18194  * branch doesn't need to be explored further, since verifier already
18195  * concluded that more strict state leads to valid finish.
18196  *
18197  * Therefore two states are equivalent if register state is more conservative
18198  * and explored stack state is more conservative than the current one.
18199  * Example:
18200  *       explored                   current
18201  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
18202  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
18203  *
18204  * In other words if current stack state (one being explored) has more
18205  * valid slots than old one that already passed validation, it means
18206  * the verifier can stop exploring and conclude that current state is valid too
18207  *
18208  * Similarly with registers. If explored state has register type as invalid
18209  * whereas register type in current state is meaningful, it means that
18210  * the current state will reach 'bpf_exit' instruction safely
18211  */
func_states_equal(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,enum exact_level exact)18212 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
18213 			      struct bpf_func_state *cur, enum exact_level exact)
18214 {
18215 	int i;
18216 
18217 	if (old->callback_depth > cur->callback_depth)
18218 		return false;
18219 
18220 	for (i = 0; i < MAX_BPF_REG; i++)
18221 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
18222 			     &env->idmap_scratch, exact))
18223 			return false;
18224 
18225 	if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
18226 		return false;
18227 
18228 	return true;
18229 }
18230 
reset_idmap_scratch(struct bpf_verifier_env * env)18231 static void reset_idmap_scratch(struct bpf_verifier_env *env)
18232 {
18233 	env->idmap_scratch.tmp_id_gen = env->id_gen;
18234 	memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
18235 }
18236 
states_equal(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur,enum exact_level exact)18237 static bool states_equal(struct bpf_verifier_env *env,
18238 			 struct bpf_verifier_state *old,
18239 			 struct bpf_verifier_state *cur,
18240 			 enum exact_level exact)
18241 {
18242 	int i;
18243 
18244 	if (old->curframe != cur->curframe)
18245 		return false;
18246 
18247 	reset_idmap_scratch(env);
18248 
18249 	/* Verification state from speculative execution simulation
18250 	 * must never prune a non-speculative execution one.
18251 	 */
18252 	if (old->speculative && !cur->speculative)
18253 		return false;
18254 
18255 	if (old->in_sleepable != cur->in_sleepable)
18256 		return false;
18257 
18258 	if (!refsafe(old, cur, &env->idmap_scratch))
18259 		return false;
18260 
18261 	/* for states to be equal callsites have to be the same
18262 	 * and all frame states need to be equivalent
18263 	 */
18264 	for (i = 0; i <= old->curframe; i++) {
18265 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
18266 			return false;
18267 		if (!func_states_equal(env, old->frame[i], cur->frame[i], exact))
18268 			return false;
18269 	}
18270 	return true;
18271 }
18272 
18273 /* Return 0 if no propagation happened. Return negative error code if error
18274  * happened. Otherwise, return the propagated bit.
18275  */
propagate_liveness_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_reg_state * parent_reg)18276 static int propagate_liveness_reg(struct bpf_verifier_env *env,
18277 				  struct bpf_reg_state *reg,
18278 				  struct bpf_reg_state *parent_reg)
18279 {
18280 	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
18281 	u8 flag = reg->live & REG_LIVE_READ;
18282 	int err;
18283 
18284 	/* When comes here, read flags of PARENT_REG or REG could be any of
18285 	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
18286 	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
18287 	 */
18288 	if (parent_flag == REG_LIVE_READ64 ||
18289 	    /* Or if there is no read flag from REG. */
18290 	    !flag ||
18291 	    /* Or if the read flag from REG is the same as PARENT_REG. */
18292 	    parent_flag == flag)
18293 		return 0;
18294 
18295 	err = mark_reg_read(env, reg, parent_reg, flag);
18296 	if (err)
18297 		return err;
18298 
18299 	return flag;
18300 }
18301 
18302 /* A write screens off any subsequent reads; but write marks come from the
18303  * straight-line code between a state and its parent.  When we arrive at an
18304  * equivalent state (jump target or such) we didn't arrive by the straight-line
18305  * code, so read marks in the state must propagate to the parent regardless
18306  * of the state's write marks. That's what 'parent == state->parent' comparison
18307  * in mark_reg_read() is for.
18308  */
propagate_liveness(struct bpf_verifier_env * env,const struct bpf_verifier_state * vstate,struct bpf_verifier_state * vparent)18309 static int propagate_liveness(struct bpf_verifier_env *env,
18310 			      const struct bpf_verifier_state *vstate,
18311 			      struct bpf_verifier_state *vparent)
18312 {
18313 	struct bpf_reg_state *state_reg, *parent_reg;
18314 	struct bpf_func_state *state, *parent;
18315 	int i, frame, err = 0;
18316 
18317 	if (vparent->curframe != vstate->curframe) {
18318 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
18319 		     vparent->curframe, vstate->curframe);
18320 		return -EFAULT;
18321 	}
18322 	/* Propagate read liveness of registers... */
18323 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
18324 	for (frame = 0; frame <= vstate->curframe; frame++) {
18325 		parent = vparent->frame[frame];
18326 		state = vstate->frame[frame];
18327 		parent_reg = parent->regs;
18328 		state_reg = state->regs;
18329 		/* We don't need to worry about FP liveness, it's read-only */
18330 		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
18331 			err = propagate_liveness_reg(env, &state_reg[i],
18332 						     &parent_reg[i]);
18333 			if (err < 0)
18334 				return err;
18335 			if (err == REG_LIVE_READ64)
18336 				mark_insn_zext(env, &parent_reg[i]);
18337 		}
18338 
18339 		/* Propagate stack slots. */
18340 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
18341 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
18342 			parent_reg = &parent->stack[i].spilled_ptr;
18343 			state_reg = &state->stack[i].spilled_ptr;
18344 			err = propagate_liveness_reg(env, state_reg,
18345 						     parent_reg);
18346 			if (err < 0)
18347 				return err;
18348 		}
18349 	}
18350 	return 0;
18351 }
18352 
18353 /* find precise scalars in the previous equivalent state and
18354  * propagate them into the current state
18355  */
propagate_precision(struct bpf_verifier_env * env,const struct bpf_verifier_state * old)18356 static int propagate_precision(struct bpf_verifier_env *env,
18357 			       const struct bpf_verifier_state *old)
18358 {
18359 	struct bpf_reg_state *state_reg;
18360 	struct bpf_func_state *state;
18361 	int i, err = 0, fr;
18362 	bool first;
18363 
18364 	for (fr = old->curframe; fr >= 0; fr--) {
18365 		state = old->frame[fr];
18366 		state_reg = state->regs;
18367 		first = true;
18368 		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
18369 			if (state_reg->type != SCALAR_VALUE ||
18370 			    !state_reg->precise ||
18371 			    !(state_reg->live & REG_LIVE_READ))
18372 				continue;
18373 			if (env->log.level & BPF_LOG_LEVEL2) {
18374 				if (first)
18375 					verbose(env, "frame %d: propagating r%d", fr, i);
18376 				else
18377 					verbose(env, ",r%d", i);
18378 			}
18379 			bt_set_frame_reg(&env->bt, fr, i);
18380 			first = false;
18381 		}
18382 
18383 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
18384 			if (!is_spilled_reg(&state->stack[i]))
18385 				continue;
18386 			state_reg = &state->stack[i].spilled_ptr;
18387 			if (state_reg->type != SCALAR_VALUE ||
18388 			    !state_reg->precise ||
18389 			    !(state_reg->live & REG_LIVE_READ))
18390 				continue;
18391 			if (env->log.level & BPF_LOG_LEVEL2) {
18392 				if (first)
18393 					verbose(env, "frame %d: propagating fp%d",
18394 						fr, (-i - 1) * BPF_REG_SIZE);
18395 				else
18396 					verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
18397 			}
18398 			bt_set_frame_slot(&env->bt, fr, i);
18399 			first = false;
18400 		}
18401 		if (!first)
18402 			verbose(env, "\n");
18403 	}
18404 
18405 	err = mark_chain_precision_batch(env);
18406 	if (err < 0)
18407 		return err;
18408 
18409 	return 0;
18410 }
18411 
states_maybe_looping(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)18412 static bool states_maybe_looping(struct bpf_verifier_state *old,
18413 				 struct bpf_verifier_state *cur)
18414 {
18415 	struct bpf_func_state *fold, *fcur;
18416 	int i, fr = cur->curframe;
18417 
18418 	if (old->curframe != fr)
18419 		return false;
18420 
18421 	fold = old->frame[fr];
18422 	fcur = cur->frame[fr];
18423 	for (i = 0; i < MAX_BPF_REG; i++)
18424 		if (memcmp(&fold->regs[i], &fcur->regs[i],
18425 			   offsetof(struct bpf_reg_state, parent)))
18426 			return false;
18427 	return true;
18428 }
18429 
is_iter_next_insn(struct bpf_verifier_env * env,int insn_idx)18430 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
18431 {
18432 	return env->insn_aux_data[insn_idx].is_iter_next;
18433 }
18434 
18435 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
18436  * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
18437  * states to match, which otherwise would look like an infinite loop. So while
18438  * iter_next() calls are taken care of, we still need to be careful and
18439  * prevent erroneous and too eager declaration of "ininite loop", when
18440  * iterators are involved.
18441  *
18442  * Here's a situation in pseudo-BPF assembly form:
18443  *
18444  *   0: again:                          ; set up iter_next() call args
18445  *   1:   r1 = &it                      ; <CHECKPOINT HERE>
18446  *   2:   call bpf_iter_num_next        ; this is iter_next() call
18447  *   3:   if r0 == 0 goto done
18448  *   4:   ... something useful here ...
18449  *   5:   goto again                    ; another iteration
18450  *   6: done:
18451  *   7:   r1 = &it
18452  *   8:   call bpf_iter_num_destroy     ; clean up iter state
18453  *   9:   exit
18454  *
18455  * This is a typical loop. Let's assume that we have a prune point at 1:,
18456  * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
18457  * again`, assuming other heuristics don't get in a way).
18458  *
18459  * When we first time come to 1:, let's say we have some state X. We proceed
18460  * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
18461  * Now we come back to validate that forked ACTIVE state. We proceed through
18462  * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
18463  * are converging. But the problem is that we don't know that yet, as this
18464  * convergence has to happen at iter_next() call site only. So if nothing is
18465  * done, at 1: verifier will use bounded loop logic and declare infinite
18466  * looping (and would be *technically* correct, if not for iterator's
18467  * "eventual sticky NULL" contract, see process_iter_next_call()). But we
18468  * don't want that. So what we do in process_iter_next_call() when we go on
18469  * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
18470  * a different iteration. So when we suspect an infinite loop, we additionally
18471  * check if any of the *ACTIVE* iterator states depths differ. If yes, we
18472  * pretend we are not looping and wait for next iter_next() call.
18473  *
18474  * This only applies to ACTIVE state. In DRAINED state we don't expect to
18475  * loop, because that would actually mean infinite loop, as DRAINED state is
18476  * "sticky", and so we'll keep returning into the same instruction with the
18477  * same state (at least in one of possible code paths).
18478  *
18479  * This approach allows to keep infinite loop heuristic even in the face of
18480  * active iterator. E.g., C snippet below is and will be detected as
18481  * inifintely looping:
18482  *
18483  *   struct bpf_iter_num it;
18484  *   int *p, x;
18485  *
18486  *   bpf_iter_num_new(&it, 0, 10);
18487  *   while ((p = bpf_iter_num_next(&t))) {
18488  *       x = p;
18489  *       while (x--) {} // <<-- infinite loop here
18490  *   }
18491  *
18492  */
iter_active_depths_differ(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)18493 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
18494 {
18495 	struct bpf_reg_state *slot, *cur_slot;
18496 	struct bpf_func_state *state;
18497 	int i, fr;
18498 
18499 	for (fr = old->curframe; fr >= 0; fr--) {
18500 		state = old->frame[fr];
18501 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
18502 			if (state->stack[i].slot_type[0] != STACK_ITER)
18503 				continue;
18504 
18505 			slot = &state->stack[i].spilled_ptr;
18506 			if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
18507 				continue;
18508 
18509 			cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
18510 			if (cur_slot->iter.depth != slot->iter.depth)
18511 				return true;
18512 		}
18513 	}
18514 	return false;
18515 }
18516 
is_state_visited(struct bpf_verifier_env * env,int insn_idx)18517 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
18518 {
18519 	struct bpf_verifier_state_list *new_sl;
18520 	struct bpf_verifier_state_list *sl, **pprev;
18521 	struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
18522 	int i, j, n, err, states_cnt = 0;
18523 	bool force_new_state, add_new_state, force_exact;
18524 
18525 	force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
18526 			  /* Avoid accumulating infinitely long jmp history */
18527 			  cur->insn_hist_end - cur->insn_hist_start > 40;
18528 
18529 	/* bpf progs typically have pruning point every 4 instructions
18530 	 * http://vger.kernel.org/bpfconf2019.html#session-1
18531 	 * Do not add new state for future pruning if the verifier hasn't seen
18532 	 * at least 2 jumps and at least 8 instructions.
18533 	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
18534 	 * In tests that amounts to up to 50% reduction into total verifier
18535 	 * memory consumption and 20% verifier time speedup.
18536 	 */
18537 	add_new_state = force_new_state;
18538 	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
18539 	    env->insn_processed - env->prev_insn_processed >= 8)
18540 		add_new_state = true;
18541 
18542 	pprev = explored_state(env, insn_idx);
18543 	sl = *pprev;
18544 
18545 	clean_live_states(env, insn_idx, cur);
18546 
18547 	while (sl) {
18548 		states_cnt++;
18549 		if (sl->state.insn_idx != insn_idx)
18550 			goto next;
18551 
18552 		if (sl->state.branches) {
18553 			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
18554 
18555 			if (frame->in_async_callback_fn &&
18556 			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
18557 				/* Different async_entry_cnt means that the verifier is
18558 				 * processing another entry into async callback.
18559 				 * Seeing the same state is not an indication of infinite
18560 				 * loop or infinite recursion.
18561 				 * But finding the same state doesn't mean that it's safe
18562 				 * to stop processing the current state. The previous state
18563 				 * hasn't yet reached bpf_exit, since state.branches > 0.
18564 				 * Checking in_async_callback_fn alone is not enough either.
18565 				 * Since the verifier still needs to catch infinite loops
18566 				 * inside async callbacks.
18567 				 */
18568 				goto skip_inf_loop_check;
18569 			}
18570 			/* BPF open-coded iterators loop detection is special.
18571 			 * states_maybe_looping() logic is too simplistic in detecting
18572 			 * states that *might* be equivalent, because it doesn't know
18573 			 * about ID remapping, so don't even perform it.
18574 			 * See process_iter_next_call() and iter_active_depths_differ()
18575 			 * for overview of the logic. When current and one of parent
18576 			 * states are detected as equivalent, it's a good thing: we prove
18577 			 * convergence and can stop simulating further iterations.
18578 			 * It's safe to assume that iterator loop will finish, taking into
18579 			 * account iter_next() contract of eventually returning
18580 			 * sticky NULL result.
18581 			 *
18582 			 * Note, that states have to be compared exactly in this case because
18583 			 * read and precision marks might not be finalized inside the loop.
18584 			 * E.g. as in the program below:
18585 			 *
18586 			 *     1. r7 = -16
18587 			 *     2. r6 = bpf_get_prandom_u32()
18588 			 *     3. while (bpf_iter_num_next(&fp[-8])) {
18589 			 *     4.   if (r6 != 42) {
18590 			 *     5.     r7 = -32
18591 			 *     6.     r6 = bpf_get_prandom_u32()
18592 			 *     7.     continue
18593 			 *     8.   }
18594 			 *     9.   r0 = r10
18595 			 *    10.   r0 += r7
18596 			 *    11.   r8 = *(u64 *)(r0 + 0)
18597 			 *    12.   r6 = bpf_get_prandom_u32()
18598 			 *    13. }
18599 			 *
18600 			 * Here verifier would first visit path 1-3, create a checkpoint at 3
18601 			 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
18602 			 * not have read or precision mark for r7 yet, thus inexact states
18603 			 * comparison would discard current state with r7=-32
18604 			 * => unsafe memory access at 11 would not be caught.
18605 			 */
18606 			if (is_iter_next_insn(env, insn_idx)) {
18607 				if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
18608 					struct bpf_func_state *cur_frame;
18609 					struct bpf_reg_state *iter_state, *iter_reg;
18610 					int spi;
18611 
18612 					cur_frame = cur->frame[cur->curframe];
18613 					/* btf_check_iter_kfuncs() enforces that
18614 					 * iter state pointer is always the first arg
18615 					 */
18616 					iter_reg = &cur_frame->regs[BPF_REG_1];
18617 					/* current state is valid due to states_equal(),
18618 					 * so we can assume valid iter and reg state,
18619 					 * no need for extra (re-)validations
18620 					 */
18621 					spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
18622 					iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
18623 					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
18624 						update_loop_entry(cur, &sl->state);
18625 						goto hit;
18626 					}
18627 				}
18628 				goto skip_inf_loop_check;
18629 			}
18630 			if (is_may_goto_insn_at(env, insn_idx)) {
18631 				if (sl->state.may_goto_depth != cur->may_goto_depth &&
18632 				    states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
18633 					update_loop_entry(cur, &sl->state);
18634 					goto hit;
18635 				}
18636 			}
18637 			if (calls_callback(env, insn_idx)) {
18638 				if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
18639 					goto hit;
18640 				goto skip_inf_loop_check;
18641 			}
18642 			/* attempt to detect infinite loop to avoid unnecessary doomed work */
18643 			if (states_maybe_looping(&sl->state, cur) &&
18644 			    states_equal(env, &sl->state, cur, EXACT) &&
18645 			    !iter_active_depths_differ(&sl->state, cur) &&
18646 			    sl->state.may_goto_depth == cur->may_goto_depth &&
18647 			    sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
18648 				verbose_linfo(env, insn_idx, "; ");
18649 				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
18650 				verbose(env, "cur state:");
18651 				print_verifier_state(env, cur, cur->curframe, true);
18652 				verbose(env, "old state:");
18653 				print_verifier_state(env, &sl->state, cur->curframe, true);
18654 				return -EINVAL;
18655 			}
18656 			/* if the verifier is processing a loop, avoid adding new state
18657 			 * too often, since different loop iterations have distinct
18658 			 * states and may not help future pruning.
18659 			 * This threshold shouldn't be too low to make sure that
18660 			 * a loop with large bound will be rejected quickly.
18661 			 * The most abusive loop will be:
18662 			 * r1 += 1
18663 			 * if r1 < 1000000 goto pc-2
18664 			 * 1M insn_procssed limit / 100 == 10k peak states.
18665 			 * This threshold shouldn't be too high either, since states
18666 			 * at the end of the loop are likely to be useful in pruning.
18667 			 */
18668 skip_inf_loop_check:
18669 			if (!force_new_state &&
18670 			    env->jmps_processed - env->prev_jmps_processed < 20 &&
18671 			    env->insn_processed - env->prev_insn_processed < 100)
18672 				add_new_state = false;
18673 			goto miss;
18674 		}
18675 		/* If sl->state is a part of a loop and this loop's entry is a part of
18676 		 * current verification path then states have to be compared exactly.
18677 		 * 'force_exact' is needed to catch the following case:
18678 		 *
18679 		 *                initial     Here state 'succ' was processed first,
18680 		 *                  |         it was eventually tracked to produce a
18681 		 *                  V         state identical to 'hdr'.
18682 		 *     .---------> hdr        All branches from 'succ' had been explored
18683 		 *     |            |         and thus 'succ' has its .branches == 0.
18684 		 *     |            V
18685 		 *     |    .------...        Suppose states 'cur' and 'succ' correspond
18686 		 *     |    |       |         to the same instruction + callsites.
18687 		 *     |    V       V         In such case it is necessary to check
18688 		 *     |   ...     ...        if 'succ' and 'cur' are states_equal().
18689 		 *     |    |       |         If 'succ' and 'cur' are a part of the
18690 		 *     |    V       V         same loop exact flag has to be set.
18691 		 *     |   succ <- cur        To check if that is the case, verify
18692 		 *     |    |                 if loop entry of 'succ' is in current
18693 		 *     |    V                 DFS path.
18694 		 *     |   ...
18695 		 *     |    |
18696 		 *     '----'
18697 		 *
18698 		 * Additional details are in the comment before get_loop_entry().
18699 		 */
18700 		loop_entry = get_loop_entry(&sl->state);
18701 		force_exact = loop_entry && loop_entry->branches > 0;
18702 		if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
18703 			if (force_exact)
18704 				update_loop_entry(cur, loop_entry);
18705 hit:
18706 			sl->hit_cnt++;
18707 			/* reached equivalent register/stack state,
18708 			 * prune the search.
18709 			 * Registers read by the continuation are read by us.
18710 			 * If we have any write marks in env->cur_state, they
18711 			 * will prevent corresponding reads in the continuation
18712 			 * from reaching our parent (an explored_state).  Our
18713 			 * own state will get the read marks recorded, but
18714 			 * they'll be immediately forgotten as we're pruning
18715 			 * this state and will pop a new one.
18716 			 */
18717 			err = propagate_liveness(env, &sl->state, cur);
18718 
18719 			/* if previous state reached the exit with precision and
18720 			 * current state is equivalent to it (except precision marks)
18721 			 * the precision needs to be propagated back in
18722 			 * the current state.
18723 			 */
18724 			if (is_jmp_point(env, env->insn_idx))
18725 				err = err ? : push_insn_history(env, cur, 0, 0);
18726 			err = err ? : propagate_precision(env, &sl->state);
18727 			if (err)
18728 				return err;
18729 			return 1;
18730 		}
18731 miss:
18732 		/* when new state is not going to be added do not increase miss count.
18733 		 * Otherwise several loop iterations will remove the state
18734 		 * recorded earlier. The goal of these heuristics is to have
18735 		 * states from some iterations of the loop (some in the beginning
18736 		 * and some at the end) to help pruning.
18737 		 */
18738 		if (add_new_state)
18739 			sl->miss_cnt++;
18740 		/* heuristic to determine whether this state is beneficial
18741 		 * to keep checking from state equivalence point of view.
18742 		 * Higher numbers increase max_states_per_insn and verification time,
18743 		 * but do not meaningfully decrease insn_processed.
18744 		 * 'n' controls how many times state could miss before eviction.
18745 		 * Use bigger 'n' for checkpoints because evicting checkpoint states
18746 		 * too early would hinder iterator convergence.
18747 		 */
18748 		n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
18749 		if (sl->miss_cnt > sl->hit_cnt * n + n) {
18750 			/* the state is unlikely to be useful. Remove it to
18751 			 * speed up verification
18752 			 */
18753 			*pprev = sl->next;
18754 			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE &&
18755 			    !sl->state.used_as_loop_entry) {
18756 				u32 br = sl->state.branches;
18757 
18758 				WARN_ONCE(br,
18759 					  "BUG live_done but branches_to_explore %d\n",
18760 					  br);
18761 				free_verifier_state(&sl->state, false);
18762 				kfree(sl);
18763 				env->peak_states--;
18764 			} else {
18765 				/* cannot free this state, since parentage chain may
18766 				 * walk it later. Add it for free_list instead to
18767 				 * be freed at the end of verification
18768 				 */
18769 				sl->next = env->free_list;
18770 				env->free_list = sl;
18771 			}
18772 			sl = *pprev;
18773 			continue;
18774 		}
18775 next:
18776 		pprev = &sl->next;
18777 		sl = *pprev;
18778 	}
18779 
18780 	if (env->max_states_per_insn < states_cnt)
18781 		env->max_states_per_insn = states_cnt;
18782 
18783 	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
18784 		return 0;
18785 
18786 	if (!add_new_state)
18787 		return 0;
18788 
18789 	/* There were no equivalent states, remember the current one.
18790 	 * Technically the current state is not proven to be safe yet,
18791 	 * but it will either reach outer most bpf_exit (which means it's safe)
18792 	 * or it will be rejected. When there are no loops the verifier won't be
18793 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
18794 	 * again on the way to bpf_exit.
18795 	 * When looping the sl->state.branches will be > 0 and this state
18796 	 * will not be considered for equivalence until branches == 0.
18797 	 */
18798 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
18799 	if (!new_sl)
18800 		return -ENOMEM;
18801 	env->total_states++;
18802 	env->peak_states++;
18803 	env->prev_jmps_processed = env->jmps_processed;
18804 	env->prev_insn_processed = env->insn_processed;
18805 
18806 	/* forget precise markings we inherited, see __mark_chain_precision */
18807 	if (env->bpf_capable)
18808 		mark_all_scalars_imprecise(env, cur);
18809 
18810 	/* add new state to the head of linked list */
18811 	new = &new_sl->state;
18812 	err = copy_verifier_state(new, cur);
18813 	if (err) {
18814 		free_verifier_state(new, false);
18815 		kfree(new_sl);
18816 		return err;
18817 	}
18818 	new->insn_idx = insn_idx;
18819 	WARN_ONCE(new->branches != 1,
18820 		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
18821 
18822 	cur->parent = new;
18823 	cur->first_insn_idx = insn_idx;
18824 	cur->insn_hist_start = cur->insn_hist_end;
18825 	cur->dfs_depth = new->dfs_depth + 1;
18826 	new_sl->next = *explored_state(env, insn_idx);
18827 	*explored_state(env, insn_idx) = new_sl;
18828 	/* connect new state to parentage chain. Current frame needs all
18829 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
18830 	 * to the stack implicitly by JITs) so in callers' frames connect just
18831 	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
18832 	 * the state of the call instruction (with WRITTEN set), and r0 comes
18833 	 * from callee with its full parentage chain, anyway.
18834 	 */
18835 	/* clear write marks in current state: the writes we did are not writes
18836 	 * our child did, so they don't screen off its reads from us.
18837 	 * (There are no read marks in current state, because reads always mark
18838 	 * their parent and current state never has children yet.  Only
18839 	 * explored_states can get read marks.)
18840 	 */
18841 	for (j = 0; j <= cur->curframe; j++) {
18842 		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
18843 			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
18844 		for (i = 0; i < BPF_REG_FP; i++)
18845 			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
18846 	}
18847 
18848 	/* all stack frames are accessible from callee, clear them all */
18849 	for (j = 0; j <= cur->curframe; j++) {
18850 		struct bpf_func_state *frame = cur->frame[j];
18851 		struct bpf_func_state *newframe = new->frame[j];
18852 
18853 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
18854 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
18855 			frame->stack[i].spilled_ptr.parent =
18856 						&newframe->stack[i].spilled_ptr;
18857 		}
18858 	}
18859 	return 0;
18860 }
18861 
18862 /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)18863 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
18864 {
18865 	switch (base_type(type)) {
18866 	case PTR_TO_CTX:
18867 	case PTR_TO_SOCKET:
18868 	case PTR_TO_SOCK_COMMON:
18869 	case PTR_TO_TCP_SOCK:
18870 	case PTR_TO_XDP_SOCK:
18871 	case PTR_TO_BTF_ID:
18872 	case PTR_TO_ARENA:
18873 		return false;
18874 	default:
18875 		return true;
18876 	}
18877 }
18878 
18879 /* If an instruction was previously used with particular pointer types, then we
18880  * need to be careful to avoid cases such as the below, where it may be ok
18881  * for one branch accessing the pointer, but not ok for the other branch:
18882  *
18883  * R1 = sock_ptr
18884  * goto X;
18885  * ...
18886  * R1 = some_other_valid_ptr;
18887  * goto X;
18888  * ...
18889  * R2 = *(u32 *)(R1 + 0);
18890  */
reg_type_mismatch(enum bpf_reg_type src,enum bpf_reg_type prev)18891 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
18892 {
18893 	return src != prev && (!reg_type_mismatch_ok(src) ||
18894 			       !reg_type_mismatch_ok(prev));
18895 }
18896 
save_aux_ptr_type(struct bpf_verifier_env * env,enum bpf_reg_type type,bool allow_trust_mismatch)18897 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
18898 			     bool allow_trust_mismatch)
18899 {
18900 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
18901 
18902 	if (*prev_type == NOT_INIT) {
18903 		/* Saw a valid insn
18904 		 * dst_reg = *(u32 *)(src_reg + off)
18905 		 * save type to validate intersecting paths
18906 		 */
18907 		*prev_type = type;
18908 	} else if (reg_type_mismatch(type, *prev_type)) {
18909 		/* Abuser program is trying to use the same insn
18910 		 * dst_reg = *(u32*) (src_reg + off)
18911 		 * with different pointer types:
18912 		 * src_reg == ctx in one branch and
18913 		 * src_reg == stack|map in some other branch.
18914 		 * Reject it.
18915 		 */
18916 		if (allow_trust_mismatch &&
18917 		    base_type(type) == PTR_TO_BTF_ID &&
18918 		    base_type(*prev_type) == PTR_TO_BTF_ID) {
18919 			/*
18920 			 * Have to support a use case when one path through
18921 			 * the program yields TRUSTED pointer while another
18922 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
18923 			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
18924 			 */
18925 			*prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
18926 		} else {
18927 			verbose(env, "same insn cannot be used with different pointers\n");
18928 			return -EINVAL;
18929 		}
18930 	}
18931 
18932 	return 0;
18933 }
18934 
do_check(struct bpf_verifier_env * env)18935 static int do_check(struct bpf_verifier_env *env)
18936 {
18937 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18938 	struct bpf_verifier_state *state = env->cur_state;
18939 	struct bpf_insn *insns = env->prog->insnsi;
18940 	struct bpf_reg_state *regs;
18941 	int insn_cnt = env->prog->len;
18942 	bool do_print_state = false;
18943 	int prev_insn_idx = -1;
18944 
18945 	for (;;) {
18946 		bool exception_exit = false;
18947 		struct bpf_insn *insn;
18948 		u8 class;
18949 		int err;
18950 
18951 		/* reset current history entry on each new instruction */
18952 		env->cur_hist_ent = NULL;
18953 
18954 		env->prev_insn_idx = prev_insn_idx;
18955 		if (env->insn_idx >= insn_cnt) {
18956 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
18957 				env->insn_idx, insn_cnt);
18958 			return -EFAULT;
18959 		}
18960 
18961 		insn = &insns[env->insn_idx];
18962 		class = BPF_CLASS(insn->code);
18963 
18964 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
18965 			verbose(env,
18966 				"BPF program is too large. Processed %d insn\n",
18967 				env->insn_processed);
18968 			return -E2BIG;
18969 		}
18970 
18971 		state->last_insn_idx = env->prev_insn_idx;
18972 
18973 		if (is_prune_point(env, env->insn_idx)) {
18974 			err = is_state_visited(env, env->insn_idx);
18975 			if (err < 0)
18976 				return err;
18977 			if (err == 1) {
18978 				/* found equivalent state, can prune the search */
18979 				if (env->log.level & BPF_LOG_LEVEL) {
18980 					if (do_print_state)
18981 						verbose(env, "\nfrom %d to %d%s: safe\n",
18982 							env->prev_insn_idx, env->insn_idx,
18983 							env->cur_state->speculative ?
18984 							" (speculative execution)" : "");
18985 					else
18986 						verbose(env, "%d: safe\n", env->insn_idx);
18987 				}
18988 				goto process_bpf_exit;
18989 			}
18990 		}
18991 
18992 		if (is_jmp_point(env, env->insn_idx)) {
18993 			err = push_insn_history(env, state, 0, 0);
18994 			if (err)
18995 				return err;
18996 		}
18997 
18998 		if (signal_pending(current))
18999 			return -EAGAIN;
19000 
19001 		if (need_resched())
19002 			cond_resched();
19003 
19004 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
19005 			verbose(env, "\nfrom %d to %d%s:",
19006 				env->prev_insn_idx, env->insn_idx,
19007 				env->cur_state->speculative ?
19008 				" (speculative execution)" : "");
19009 			print_verifier_state(env, state, state->curframe, true);
19010 			do_print_state = false;
19011 		}
19012 
19013 		if (env->log.level & BPF_LOG_LEVEL) {
19014 			const struct bpf_insn_cbs cbs = {
19015 				.cb_call	= disasm_kfunc_name,
19016 				.cb_print	= verbose,
19017 				.private_data	= env,
19018 			};
19019 
19020 			if (verifier_state_scratched(env))
19021 				print_insn_state(env, state, state->curframe);
19022 
19023 			verbose_linfo(env, env->insn_idx, "; ");
19024 			env->prev_log_pos = env->log.end_pos;
19025 			verbose(env, "%d: ", env->insn_idx);
19026 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
19027 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
19028 			env->prev_log_pos = env->log.end_pos;
19029 		}
19030 
19031 		if (bpf_prog_is_offloaded(env->prog->aux)) {
19032 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
19033 							   env->prev_insn_idx);
19034 			if (err)
19035 				return err;
19036 		}
19037 
19038 		regs = cur_regs(env);
19039 		sanitize_mark_insn_seen(env);
19040 		prev_insn_idx = env->insn_idx;
19041 
19042 		if (class == BPF_ALU || class == BPF_ALU64) {
19043 			err = check_alu_op(env, insn);
19044 			if (err)
19045 				return err;
19046 
19047 		} else if (class == BPF_LDX) {
19048 			enum bpf_reg_type src_reg_type;
19049 
19050 			/* check for reserved fields is already done */
19051 
19052 			/* check src operand */
19053 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
19054 			if (err)
19055 				return err;
19056 
19057 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
19058 			if (err)
19059 				return err;
19060 
19061 			src_reg_type = regs[insn->src_reg].type;
19062 
19063 			/* check that memory (src_reg + off) is readable,
19064 			 * the state of dst_reg will be updated by this func
19065 			 */
19066 			err = check_mem_access(env, env->insn_idx, insn->src_reg,
19067 					       insn->off, BPF_SIZE(insn->code),
19068 					       BPF_READ, insn->dst_reg, false,
19069 					       BPF_MODE(insn->code) == BPF_MEMSX);
19070 			err = err ?: save_aux_ptr_type(env, src_reg_type, true);
19071 			err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], "ldx");
19072 			if (err)
19073 				return err;
19074 		} else if (class == BPF_STX) {
19075 			enum bpf_reg_type dst_reg_type;
19076 
19077 			if (BPF_MODE(insn->code) == BPF_ATOMIC) {
19078 				err = check_atomic(env, env->insn_idx, insn);
19079 				if (err)
19080 					return err;
19081 				env->insn_idx++;
19082 				continue;
19083 			}
19084 
19085 			if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
19086 				verbose(env, "BPF_STX uses reserved fields\n");
19087 				return -EINVAL;
19088 			}
19089 
19090 			/* check src1 operand */
19091 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
19092 			if (err)
19093 				return err;
19094 			/* check src2 operand */
19095 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
19096 			if (err)
19097 				return err;
19098 
19099 			dst_reg_type = regs[insn->dst_reg].type;
19100 
19101 			/* check that memory (dst_reg + off) is writeable */
19102 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
19103 					       insn->off, BPF_SIZE(insn->code),
19104 					       BPF_WRITE, insn->src_reg, false, false);
19105 			if (err)
19106 				return err;
19107 
19108 			err = save_aux_ptr_type(env, dst_reg_type, false);
19109 			if (err)
19110 				return err;
19111 		} else if (class == BPF_ST) {
19112 			enum bpf_reg_type dst_reg_type;
19113 
19114 			if (BPF_MODE(insn->code) != BPF_MEM ||
19115 			    insn->src_reg != BPF_REG_0) {
19116 				verbose(env, "BPF_ST uses reserved fields\n");
19117 				return -EINVAL;
19118 			}
19119 			/* check src operand */
19120 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
19121 			if (err)
19122 				return err;
19123 
19124 			dst_reg_type = regs[insn->dst_reg].type;
19125 
19126 			/* check that memory (dst_reg + off) is writeable */
19127 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
19128 					       insn->off, BPF_SIZE(insn->code),
19129 					       BPF_WRITE, -1, false, false);
19130 			if (err)
19131 				return err;
19132 
19133 			err = save_aux_ptr_type(env, dst_reg_type, false);
19134 			if (err)
19135 				return err;
19136 		} else if (class == BPF_JMP || class == BPF_JMP32) {
19137 			u8 opcode = BPF_OP(insn->code);
19138 
19139 			env->jmps_processed++;
19140 			if (opcode == BPF_CALL) {
19141 				if (BPF_SRC(insn->code) != BPF_K ||
19142 				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
19143 				     && insn->off != 0) ||
19144 				    (insn->src_reg != BPF_REG_0 &&
19145 				     insn->src_reg != BPF_PSEUDO_CALL &&
19146 				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
19147 				    insn->dst_reg != BPF_REG_0 ||
19148 				    class == BPF_JMP32) {
19149 					verbose(env, "BPF_CALL uses reserved fields\n");
19150 					return -EINVAL;
19151 				}
19152 
19153 				if (env->cur_state->active_locks) {
19154 					if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
19155 					    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
19156 					     (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
19157 						verbose(env, "function calls are not allowed while holding a lock\n");
19158 						return -EINVAL;
19159 					}
19160 				}
19161 				if (insn->src_reg == BPF_PSEUDO_CALL) {
19162 					err = check_func_call(env, insn, &env->insn_idx);
19163 				} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
19164 					err = check_kfunc_call(env, insn, &env->insn_idx);
19165 					if (!err && is_bpf_throw_kfunc(insn)) {
19166 						exception_exit = true;
19167 						goto process_bpf_exit_full;
19168 					}
19169 				} else {
19170 					err = check_helper_call(env, insn, &env->insn_idx);
19171 				}
19172 				if (err)
19173 					return err;
19174 
19175 				mark_reg_scratched(env, BPF_REG_0);
19176 			} else if (opcode == BPF_JA) {
19177 				if (BPF_SRC(insn->code) != BPF_K ||
19178 				    insn->src_reg != BPF_REG_0 ||
19179 				    insn->dst_reg != BPF_REG_0 ||
19180 				    (class == BPF_JMP && insn->imm != 0) ||
19181 				    (class == BPF_JMP32 && insn->off != 0)) {
19182 					verbose(env, "BPF_JA uses reserved fields\n");
19183 					return -EINVAL;
19184 				}
19185 
19186 				if (class == BPF_JMP)
19187 					env->insn_idx += insn->off + 1;
19188 				else
19189 					env->insn_idx += insn->imm + 1;
19190 				continue;
19191 
19192 			} else if (opcode == BPF_EXIT) {
19193 				if (BPF_SRC(insn->code) != BPF_K ||
19194 				    insn->imm != 0 ||
19195 				    insn->src_reg != BPF_REG_0 ||
19196 				    insn->dst_reg != BPF_REG_0 ||
19197 				    class == BPF_JMP32) {
19198 					verbose(env, "BPF_EXIT uses reserved fields\n");
19199 					return -EINVAL;
19200 				}
19201 process_bpf_exit_full:
19202 				/* We must do check_reference_leak here before
19203 				 * prepare_func_exit to handle the case when
19204 				 * state->curframe > 0, it may be a callback
19205 				 * function, for which reference_state must
19206 				 * match caller reference state when it exits.
19207 				 */
19208 				err = check_resource_leak(env, exception_exit, !env->cur_state->curframe,
19209 							  "BPF_EXIT instruction in main prog");
19210 				if (err)
19211 					return err;
19212 
19213 				/* The side effect of the prepare_func_exit
19214 				 * which is being skipped is that it frees
19215 				 * bpf_func_state. Typically, process_bpf_exit
19216 				 * will only be hit with outermost exit.
19217 				 * copy_verifier_state in pop_stack will handle
19218 				 * freeing of any extra bpf_func_state left over
19219 				 * from not processing all nested function
19220 				 * exits. We also skip return code checks as
19221 				 * they are not needed for exceptional exits.
19222 				 */
19223 				if (exception_exit)
19224 					goto process_bpf_exit;
19225 
19226 				if (state->curframe) {
19227 					/* exit from nested function */
19228 					err = prepare_func_exit(env, &env->insn_idx);
19229 					if (err)
19230 						return err;
19231 					do_print_state = true;
19232 					continue;
19233 				}
19234 
19235 				err = check_return_code(env, BPF_REG_0, "R0");
19236 				if (err)
19237 					return err;
19238 process_bpf_exit:
19239 				mark_verifier_state_scratched(env);
19240 				update_branch_counts(env, env->cur_state);
19241 				err = pop_stack(env, &prev_insn_idx,
19242 						&env->insn_idx, pop_log);
19243 				if (err < 0) {
19244 					if (err != -ENOENT)
19245 						return err;
19246 					break;
19247 				} else {
19248 					do_print_state = true;
19249 					continue;
19250 				}
19251 			} else {
19252 				err = check_cond_jmp_op(env, insn, &env->insn_idx);
19253 				if (err)
19254 					return err;
19255 			}
19256 		} else if (class == BPF_LD) {
19257 			u8 mode = BPF_MODE(insn->code);
19258 
19259 			if (mode == BPF_ABS || mode == BPF_IND) {
19260 				err = check_ld_abs(env, insn);
19261 				if (err)
19262 					return err;
19263 
19264 			} else if (mode == BPF_IMM) {
19265 				err = check_ld_imm(env, insn);
19266 				if (err)
19267 					return err;
19268 
19269 				env->insn_idx++;
19270 				sanitize_mark_insn_seen(env);
19271 			} else {
19272 				verbose(env, "invalid BPF_LD mode\n");
19273 				return -EINVAL;
19274 			}
19275 		} else {
19276 			verbose(env, "unknown insn class %d\n", class);
19277 			return -EINVAL;
19278 		}
19279 
19280 		env->insn_idx++;
19281 	}
19282 
19283 	return 0;
19284 }
19285 
find_btf_percpu_datasec(struct btf * btf)19286 static int find_btf_percpu_datasec(struct btf *btf)
19287 {
19288 	const struct btf_type *t;
19289 	const char *tname;
19290 	int i, n;
19291 
19292 	/*
19293 	 * Both vmlinux and module each have their own ".data..percpu"
19294 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
19295 	 * types to look at only module's own BTF types.
19296 	 */
19297 	n = btf_nr_types(btf);
19298 	if (btf_is_module(btf))
19299 		i = btf_nr_types(btf_vmlinux);
19300 	else
19301 		i = 1;
19302 
19303 	for(; i < n; i++) {
19304 		t = btf_type_by_id(btf, i);
19305 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
19306 			continue;
19307 
19308 		tname = btf_name_by_offset(btf, t->name_off);
19309 		if (!strcmp(tname, ".data..percpu"))
19310 			return i;
19311 	}
19312 
19313 	return -ENOENT;
19314 }
19315 
19316 /*
19317  * Add btf to the used_btfs array and return the index. (If the btf was
19318  * already added, then just return the index.) Upon successful insertion
19319  * increase btf refcnt, and, if present, also refcount the corresponding
19320  * kernel module.
19321  */
__add_used_btf(struct bpf_verifier_env * env,struct btf * btf)19322 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
19323 {
19324 	struct btf_mod_pair *btf_mod;
19325 	int i;
19326 
19327 	/* check whether we recorded this BTF (and maybe module) already */
19328 	for (i = 0; i < env->used_btf_cnt; i++)
19329 		if (env->used_btfs[i].btf == btf)
19330 			return i;
19331 
19332 	if (env->used_btf_cnt >= MAX_USED_BTFS)
19333 		return -E2BIG;
19334 
19335 	btf_get(btf);
19336 
19337 	btf_mod = &env->used_btfs[env->used_btf_cnt];
19338 	btf_mod->btf = btf;
19339 	btf_mod->module = NULL;
19340 
19341 	/* if we reference variables from kernel module, bump its refcount */
19342 	if (btf_is_module(btf)) {
19343 		btf_mod->module = btf_try_get_module(btf);
19344 		if (!btf_mod->module) {
19345 			btf_put(btf);
19346 			return -ENXIO;
19347 		}
19348 	}
19349 
19350 	return env->used_btf_cnt++;
19351 }
19352 
19353 /* replace pseudo btf_id with kernel symbol address */
__check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux,struct btf * btf)19354 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
19355 				 struct bpf_insn *insn,
19356 				 struct bpf_insn_aux_data *aux,
19357 				 struct btf *btf)
19358 {
19359 	const struct btf_var_secinfo *vsi;
19360 	const struct btf_type *datasec;
19361 	const struct btf_type *t;
19362 	const char *sym_name;
19363 	bool percpu = false;
19364 	u32 type, id = insn->imm;
19365 	s32 datasec_id;
19366 	u64 addr;
19367 	int i;
19368 
19369 	t = btf_type_by_id(btf, id);
19370 	if (!t) {
19371 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
19372 		return -ENOENT;
19373 	}
19374 
19375 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
19376 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
19377 		return -EINVAL;
19378 	}
19379 
19380 	sym_name = btf_name_by_offset(btf, t->name_off);
19381 	addr = kallsyms_lookup_name(sym_name);
19382 	if (!addr) {
19383 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
19384 			sym_name);
19385 		return -ENOENT;
19386 	}
19387 	insn[0].imm = (u32)addr;
19388 	insn[1].imm = addr >> 32;
19389 
19390 	if (btf_type_is_func(t)) {
19391 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
19392 		aux->btf_var.mem_size = 0;
19393 		return 0;
19394 	}
19395 
19396 	datasec_id = find_btf_percpu_datasec(btf);
19397 	if (datasec_id > 0) {
19398 		datasec = btf_type_by_id(btf, datasec_id);
19399 		for_each_vsi(i, datasec, vsi) {
19400 			if (vsi->type == id) {
19401 				percpu = true;
19402 				break;
19403 			}
19404 		}
19405 	}
19406 
19407 	type = t->type;
19408 	t = btf_type_skip_modifiers(btf, type, NULL);
19409 	if (percpu) {
19410 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
19411 		aux->btf_var.btf = btf;
19412 		aux->btf_var.btf_id = type;
19413 	} else if (!btf_type_is_struct(t)) {
19414 		const struct btf_type *ret;
19415 		const char *tname;
19416 		u32 tsize;
19417 
19418 		/* resolve the type size of ksym. */
19419 		ret = btf_resolve_size(btf, t, &tsize);
19420 		if (IS_ERR(ret)) {
19421 			tname = btf_name_by_offset(btf, t->name_off);
19422 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
19423 				tname, PTR_ERR(ret));
19424 			return -EINVAL;
19425 		}
19426 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
19427 		aux->btf_var.mem_size = tsize;
19428 	} else {
19429 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
19430 		aux->btf_var.btf = btf;
19431 		aux->btf_var.btf_id = type;
19432 	}
19433 
19434 	return 0;
19435 }
19436 
check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux)19437 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
19438 			       struct bpf_insn *insn,
19439 			       struct bpf_insn_aux_data *aux)
19440 {
19441 	struct btf *btf;
19442 	int btf_fd;
19443 	int err;
19444 
19445 	btf_fd = insn[1].imm;
19446 	if (btf_fd) {
19447 		CLASS(fd, f)(btf_fd);
19448 
19449 		btf = __btf_get_by_fd(f);
19450 		if (IS_ERR(btf)) {
19451 			verbose(env, "invalid module BTF object FD specified.\n");
19452 			return -EINVAL;
19453 		}
19454 	} else {
19455 		if (!btf_vmlinux) {
19456 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
19457 			return -EINVAL;
19458 		}
19459 		btf = btf_vmlinux;
19460 	}
19461 
19462 	err = __check_pseudo_btf_id(env, insn, aux, btf);
19463 	if (err)
19464 		return err;
19465 
19466 	err = __add_used_btf(env, btf);
19467 	if (err < 0)
19468 		return err;
19469 	return 0;
19470 }
19471 
is_tracing_prog_type(enum bpf_prog_type type)19472 static bool is_tracing_prog_type(enum bpf_prog_type type)
19473 {
19474 	switch (type) {
19475 	case BPF_PROG_TYPE_KPROBE:
19476 	case BPF_PROG_TYPE_TRACEPOINT:
19477 	case BPF_PROG_TYPE_PERF_EVENT:
19478 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
19479 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
19480 		return true;
19481 	default:
19482 		return false;
19483 	}
19484 }
19485 
bpf_map_is_cgroup_storage(struct bpf_map * map)19486 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
19487 {
19488 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
19489 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
19490 }
19491 
check_map_prog_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,struct bpf_prog * prog)19492 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
19493 					struct bpf_map *map,
19494 					struct bpf_prog *prog)
19495 
19496 {
19497 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
19498 
19499 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
19500 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
19501 		if (is_tracing_prog_type(prog_type)) {
19502 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
19503 			return -EINVAL;
19504 		}
19505 	}
19506 
19507 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
19508 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
19509 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
19510 			return -EINVAL;
19511 		}
19512 
19513 		if (is_tracing_prog_type(prog_type)) {
19514 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
19515 			return -EINVAL;
19516 		}
19517 	}
19518 
19519 	if (btf_record_has_field(map->record, BPF_TIMER)) {
19520 		if (is_tracing_prog_type(prog_type)) {
19521 			verbose(env, "tracing progs cannot use bpf_timer yet\n");
19522 			return -EINVAL;
19523 		}
19524 	}
19525 
19526 	if (btf_record_has_field(map->record, BPF_WORKQUEUE)) {
19527 		if (is_tracing_prog_type(prog_type)) {
19528 			verbose(env, "tracing progs cannot use bpf_wq yet\n");
19529 			return -EINVAL;
19530 		}
19531 	}
19532 
19533 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
19534 	    !bpf_offload_prog_map_match(prog, map)) {
19535 		verbose(env, "offload device mismatch between prog and map\n");
19536 		return -EINVAL;
19537 	}
19538 
19539 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
19540 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
19541 		return -EINVAL;
19542 	}
19543 
19544 	if (prog->sleepable)
19545 		switch (map->map_type) {
19546 		case BPF_MAP_TYPE_HASH:
19547 		case BPF_MAP_TYPE_LRU_HASH:
19548 		case BPF_MAP_TYPE_ARRAY:
19549 		case BPF_MAP_TYPE_PERCPU_HASH:
19550 		case BPF_MAP_TYPE_PERCPU_ARRAY:
19551 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
19552 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
19553 		case BPF_MAP_TYPE_HASH_OF_MAPS:
19554 		case BPF_MAP_TYPE_RINGBUF:
19555 		case BPF_MAP_TYPE_USER_RINGBUF:
19556 		case BPF_MAP_TYPE_INODE_STORAGE:
19557 		case BPF_MAP_TYPE_SK_STORAGE:
19558 		case BPF_MAP_TYPE_TASK_STORAGE:
19559 		case BPF_MAP_TYPE_CGRP_STORAGE:
19560 		case BPF_MAP_TYPE_QUEUE:
19561 		case BPF_MAP_TYPE_STACK:
19562 		case BPF_MAP_TYPE_ARENA:
19563 			break;
19564 		default:
19565 			verbose(env,
19566 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
19567 			return -EINVAL;
19568 		}
19569 
19570 	if (bpf_map_is_cgroup_storage(map) &&
19571 	    bpf_cgroup_storage_assign(env->prog->aux, map)) {
19572 		verbose(env, "only one cgroup storage of each type is allowed\n");
19573 		return -EBUSY;
19574 	}
19575 
19576 	if (map->map_type == BPF_MAP_TYPE_ARENA) {
19577 		if (env->prog->aux->arena) {
19578 			verbose(env, "Only one arena per program\n");
19579 			return -EBUSY;
19580 		}
19581 		if (!env->allow_ptr_leaks || !env->bpf_capable) {
19582 			verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
19583 			return -EPERM;
19584 		}
19585 		if (!env->prog->jit_requested) {
19586 			verbose(env, "JIT is required to use arena\n");
19587 			return -EOPNOTSUPP;
19588 		}
19589 		if (!bpf_jit_supports_arena()) {
19590 			verbose(env, "JIT doesn't support arena\n");
19591 			return -EOPNOTSUPP;
19592 		}
19593 		env->prog->aux->arena = (void *)map;
19594 		if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
19595 			verbose(env, "arena's user address must be set via map_extra or mmap()\n");
19596 			return -EINVAL;
19597 		}
19598 	}
19599 
19600 	return 0;
19601 }
19602 
__add_used_map(struct bpf_verifier_env * env,struct bpf_map * map)19603 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
19604 {
19605 	int i, err;
19606 
19607 	/* check whether we recorded this map already */
19608 	for (i = 0; i < env->used_map_cnt; i++)
19609 		if (env->used_maps[i] == map)
19610 			return i;
19611 
19612 	if (env->used_map_cnt >= MAX_USED_MAPS) {
19613 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
19614 			MAX_USED_MAPS);
19615 		return -E2BIG;
19616 	}
19617 
19618 	err = check_map_prog_compatibility(env, map, env->prog);
19619 	if (err)
19620 		return err;
19621 
19622 	if (env->prog->sleepable)
19623 		atomic64_inc(&map->sleepable_refcnt);
19624 
19625 	/* hold the map. If the program is rejected by verifier,
19626 	 * the map will be released by release_maps() or it
19627 	 * will be used by the valid program until it's unloaded
19628 	 * and all maps are released in bpf_free_used_maps()
19629 	 */
19630 	bpf_map_inc(map);
19631 
19632 	env->used_maps[env->used_map_cnt++] = map;
19633 
19634 	return env->used_map_cnt - 1;
19635 }
19636 
19637 /* Add map behind fd to used maps list, if it's not already there, and return
19638  * its index.
19639  * Returns <0 on error, or >= 0 index, on success.
19640  */
add_used_map(struct bpf_verifier_env * env,int fd)19641 static int add_used_map(struct bpf_verifier_env *env, int fd)
19642 {
19643 	struct bpf_map *map;
19644 	CLASS(fd, f)(fd);
19645 
19646 	map = __bpf_map_get(f);
19647 	if (IS_ERR(map)) {
19648 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
19649 		return PTR_ERR(map);
19650 	}
19651 
19652 	return __add_used_map(env, map);
19653 }
19654 
19655 /* find and rewrite pseudo imm in ld_imm64 instructions:
19656  *
19657  * 1. if it accesses map FD, replace it with actual map pointer.
19658  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
19659  *
19660  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
19661  */
resolve_pseudo_ldimm64(struct bpf_verifier_env * env)19662 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
19663 {
19664 	struct bpf_insn *insn = env->prog->insnsi;
19665 	int insn_cnt = env->prog->len;
19666 	int i, err;
19667 
19668 	err = bpf_prog_calc_tag(env->prog);
19669 	if (err)
19670 		return err;
19671 
19672 	for (i = 0; i < insn_cnt; i++, insn++) {
19673 		if (BPF_CLASS(insn->code) == BPF_LDX &&
19674 		    ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
19675 		    insn->imm != 0)) {
19676 			verbose(env, "BPF_LDX uses reserved fields\n");
19677 			return -EINVAL;
19678 		}
19679 
19680 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
19681 			struct bpf_insn_aux_data *aux;
19682 			struct bpf_map *map;
19683 			int map_idx;
19684 			u64 addr;
19685 			u32 fd;
19686 
19687 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
19688 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
19689 			    insn[1].off != 0) {
19690 				verbose(env, "invalid bpf_ld_imm64 insn\n");
19691 				return -EINVAL;
19692 			}
19693 
19694 			if (insn[0].src_reg == 0)
19695 				/* valid generic load 64-bit imm */
19696 				goto next_insn;
19697 
19698 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
19699 				aux = &env->insn_aux_data[i];
19700 				err = check_pseudo_btf_id(env, insn, aux);
19701 				if (err)
19702 					return err;
19703 				goto next_insn;
19704 			}
19705 
19706 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
19707 				aux = &env->insn_aux_data[i];
19708 				aux->ptr_type = PTR_TO_FUNC;
19709 				goto next_insn;
19710 			}
19711 
19712 			/* In final convert_pseudo_ld_imm64() step, this is
19713 			 * converted into regular 64-bit imm load insn.
19714 			 */
19715 			switch (insn[0].src_reg) {
19716 			case BPF_PSEUDO_MAP_VALUE:
19717 			case BPF_PSEUDO_MAP_IDX_VALUE:
19718 				break;
19719 			case BPF_PSEUDO_MAP_FD:
19720 			case BPF_PSEUDO_MAP_IDX:
19721 				if (insn[1].imm == 0)
19722 					break;
19723 				fallthrough;
19724 			default:
19725 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
19726 				return -EINVAL;
19727 			}
19728 
19729 			switch (insn[0].src_reg) {
19730 			case BPF_PSEUDO_MAP_IDX_VALUE:
19731 			case BPF_PSEUDO_MAP_IDX:
19732 				if (bpfptr_is_null(env->fd_array)) {
19733 					verbose(env, "fd_idx without fd_array is invalid\n");
19734 					return -EPROTO;
19735 				}
19736 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
19737 							    insn[0].imm * sizeof(fd),
19738 							    sizeof(fd)))
19739 					return -EFAULT;
19740 				break;
19741 			default:
19742 				fd = insn[0].imm;
19743 				break;
19744 			}
19745 
19746 			map_idx = add_used_map(env, fd);
19747 			if (map_idx < 0)
19748 				return map_idx;
19749 			map = env->used_maps[map_idx];
19750 
19751 			aux = &env->insn_aux_data[i];
19752 			aux->map_index = map_idx;
19753 
19754 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
19755 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
19756 				addr = (unsigned long)map;
19757 			} else {
19758 				u32 off = insn[1].imm;
19759 
19760 				if (off >= BPF_MAX_VAR_OFF) {
19761 					verbose(env, "direct value offset of %u is not allowed\n", off);
19762 					return -EINVAL;
19763 				}
19764 
19765 				if (!map->ops->map_direct_value_addr) {
19766 					verbose(env, "no direct value access support for this map type\n");
19767 					return -EINVAL;
19768 				}
19769 
19770 				err = map->ops->map_direct_value_addr(map, &addr, off);
19771 				if (err) {
19772 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
19773 						map->value_size, off);
19774 					return err;
19775 				}
19776 
19777 				aux->map_off = off;
19778 				addr += off;
19779 			}
19780 
19781 			insn[0].imm = (u32)addr;
19782 			insn[1].imm = addr >> 32;
19783 
19784 next_insn:
19785 			insn++;
19786 			i++;
19787 			continue;
19788 		}
19789 
19790 		/* Basic sanity check before we invest more work here. */
19791 		if (!bpf_opcode_in_insntable(insn->code)) {
19792 			verbose(env, "unknown opcode %02x\n", insn->code);
19793 			return -EINVAL;
19794 		}
19795 	}
19796 
19797 	/* now all pseudo BPF_LD_IMM64 instructions load valid
19798 	 * 'struct bpf_map *' into a register instead of user map_fd.
19799 	 * These pointers will be used later by verifier to validate map access.
19800 	 */
19801 	return 0;
19802 }
19803 
19804 /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env * env)19805 static void release_maps(struct bpf_verifier_env *env)
19806 {
19807 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
19808 			     env->used_map_cnt);
19809 }
19810 
19811 /* drop refcnt of maps used by the rejected program */
release_btfs(struct bpf_verifier_env * env)19812 static void release_btfs(struct bpf_verifier_env *env)
19813 {
19814 	__bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
19815 }
19816 
19817 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env * env)19818 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
19819 {
19820 	struct bpf_insn *insn = env->prog->insnsi;
19821 	int insn_cnt = env->prog->len;
19822 	int i;
19823 
19824 	for (i = 0; i < insn_cnt; i++, insn++) {
19825 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
19826 			continue;
19827 		if (insn->src_reg == BPF_PSEUDO_FUNC)
19828 			continue;
19829 		insn->src_reg = 0;
19830 	}
19831 }
19832 
19833 /* single env->prog->insni[off] instruction was replaced with the range
19834  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
19835  * [0, off) and [off, end) to new locations, so the patched range stays zero
19836  */
adjust_insn_aux_data(struct bpf_verifier_env * env,struct bpf_insn_aux_data * new_data,struct bpf_prog * new_prog,u32 off,u32 cnt)19837 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
19838 				 struct bpf_insn_aux_data *new_data,
19839 				 struct bpf_prog *new_prog, u32 off, u32 cnt)
19840 {
19841 	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
19842 	struct bpf_insn *insn = new_prog->insnsi;
19843 	u32 old_seen = old_data[off].seen;
19844 	u32 prog_len;
19845 	int i;
19846 
19847 	/* aux info at OFF always needs adjustment, no matter fast path
19848 	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
19849 	 * original insn at old prog.
19850 	 */
19851 	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
19852 
19853 	if (cnt == 1)
19854 		return;
19855 	prog_len = new_prog->len;
19856 
19857 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
19858 	memcpy(new_data + off + cnt - 1, old_data + off,
19859 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
19860 	for (i = off; i < off + cnt - 1; i++) {
19861 		/* Expand insni[off]'s seen count to the patched range. */
19862 		new_data[i].seen = old_seen;
19863 		new_data[i].zext_dst = insn_has_def32(env, insn + i);
19864 	}
19865 	env->insn_aux_data = new_data;
19866 	vfree(old_data);
19867 }
19868 
adjust_subprog_starts(struct bpf_verifier_env * env,u32 off,u32 len)19869 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
19870 {
19871 	int i;
19872 
19873 	if (len == 1)
19874 		return;
19875 	/* NOTE: fake 'exit' subprog should be updated as well. */
19876 	for (i = 0; i <= env->subprog_cnt; i++) {
19877 		if (env->subprog_info[i].start <= off)
19878 			continue;
19879 		env->subprog_info[i].start += len - 1;
19880 	}
19881 }
19882 
adjust_poke_descs(struct bpf_prog * prog,u32 off,u32 len)19883 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
19884 {
19885 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
19886 	int i, sz = prog->aux->size_poke_tab;
19887 	struct bpf_jit_poke_descriptor *desc;
19888 
19889 	for (i = 0; i < sz; i++) {
19890 		desc = &tab[i];
19891 		if (desc->insn_idx <= off)
19892 			continue;
19893 		desc->insn_idx += len - 1;
19894 	}
19895 }
19896 
bpf_patch_insn_data(struct bpf_verifier_env * env,u32 off,const struct bpf_insn * patch,u32 len)19897 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
19898 					    const struct bpf_insn *patch, u32 len)
19899 {
19900 	struct bpf_prog *new_prog;
19901 	struct bpf_insn_aux_data *new_data = NULL;
19902 
19903 	if (len > 1) {
19904 		new_data = vzalloc(array_size(env->prog->len + len - 1,
19905 					      sizeof(struct bpf_insn_aux_data)));
19906 		if (!new_data)
19907 			return NULL;
19908 	}
19909 
19910 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
19911 	if (IS_ERR(new_prog)) {
19912 		if (PTR_ERR(new_prog) == -ERANGE)
19913 			verbose(env,
19914 				"insn %d cannot be patched due to 16-bit range\n",
19915 				env->insn_aux_data[off].orig_idx);
19916 		vfree(new_data);
19917 		return NULL;
19918 	}
19919 	adjust_insn_aux_data(env, new_data, new_prog, off, len);
19920 	adjust_subprog_starts(env, off, len);
19921 	adjust_poke_descs(new_prog, off, len);
19922 	return new_prog;
19923 }
19924 
19925 /*
19926  * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
19927  * jump offset by 'delta'.
19928  */
adjust_jmp_off(struct bpf_prog * prog,u32 tgt_idx,u32 delta)19929 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
19930 {
19931 	struct bpf_insn *insn = prog->insnsi;
19932 	u32 insn_cnt = prog->len, i;
19933 	s32 imm;
19934 	s16 off;
19935 
19936 	for (i = 0; i < insn_cnt; i++, insn++) {
19937 		u8 code = insn->code;
19938 
19939 		if (tgt_idx <= i && i < tgt_idx + delta)
19940 			continue;
19941 
19942 		if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
19943 		    BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
19944 			continue;
19945 
19946 		if (insn->code == (BPF_JMP32 | BPF_JA)) {
19947 			if (i + 1 + insn->imm != tgt_idx)
19948 				continue;
19949 			if (check_add_overflow(insn->imm, delta, &imm))
19950 				return -ERANGE;
19951 			insn->imm = imm;
19952 		} else {
19953 			if (i + 1 + insn->off != tgt_idx)
19954 				continue;
19955 			if (check_add_overflow(insn->off, delta, &off))
19956 				return -ERANGE;
19957 			insn->off = off;
19958 		}
19959 	}
19960 	return 0;
19961 }
19962 
adjust_subprog_starts_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)19963 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
19964 					      u32 off, u32 cnt)
19965 {
19966 	int i, j;
19967 
19968 	/* find first prog starting at or after off (first to remove) */
19969 	for (i = 0; i < env->subprog_cnt; i++)
19970 		if (env->subprog_info[i].start >= off)
19971 			break;
19972 	/* find first prog starting at or after off + cnt (first to stay) */
19973 	for (j = i; j < env->subprog_cnt; j++)
19974 		if (env->subprog_info[j].start >= off + cnt)
19975 			break;
19976 	/* if j doesn't start exactly at off + cnt, we are just removing
19977 	 * the front of previous prog
19978 	 */
19979 	if (env->subprog_info[j].start != off + cnt)
19980 		j--;
19981 
19982 	if (j > i) {
19983 		struct bpf_prog_aux *aux = env->prog->aux;
19984 		int move;
19985 
19986 		/* move fake 'exit' subprog as well */
19987 		move = env->subprog_cnt + 1 - j;
19988 
19989 		memmove(env->subprog_info + i,
19990 			env->subprog_info + j,
19991 			sizeof(*env->subprog_info) * move);
19992 		env->subprog_cnt -= j - i;
19993 
19994 		/* remove func_info */
19995 		if (aux->func_info) {
19996 			move = aux->func_info_cnt - j;
19997 
19998 			memmove(aux->func_info + i,
19999 				aux->func_info + j,
20000 				sizeof(*aux->func_info) * move);
20001 			aux->func_info_cnt -= j - i;
20002 			/* func_info->insn_off is set after all code rewrites,
20003 			 * in adjust_btf_func() - no need to adjust
20004 			 */
20005 		}
20006 	} else {
20007 		/* convert i from "first prog to remove" to "first to adjust" */
20008 		if (env->subprog_info[i].start == off)
20009 			i++;
20010 	}
20011 
20012 	/* update fake 'exit' subprog as well */
20013 	for (; i <= env->subprog_cnt; i++)
20014 		env->subprog_info[i].start -= cnt;
20015 
20016 	return 0;
20017 }
20018 
bpf_adj_linfo_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)20019 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
20020 				      u32 cnt)
20021 {
20022 	struct bpf_prog *prog = env->prog;
20023 	u32 i, l_off, l_cnt, nr_linfo;
20024 	struct bpf_line_info *linfo;
20025 
20026 	nr_linfo = prog->aux->nr_linfo;
20027 	if (!nr_linfo)
20028 		return 0;
20029 
20030 	linfo = prog->aux->linfo;
20031 
20032 	/* find first line info to remove, count lines to be removed */
20033 	for (i = 0; i < nr_linfo; i++)
20034 		if (linfo[i].insn_off >= off)
20035 			break;
20036 
20037 	l_off = i;
20038 	l_cnt = 0;
20039 	for (; i < nr_linfo; i++)
20040 		if (linfo[i].insn_off < off + cnt)
20041 			l_cnt++;
20042 		else
20043 			break;
20044 
20045 	/* First live insn doesn't match first live linfo, it needs to "inherit"
20046 	 * last removed linfo.  prog is already modified, so prog->len == off
20047 	 * means no live instructions after (tail of the program was removed).
20048 	 */
20049 	if (prog->len != off && l_cnt &&
20050 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
20051 		l_cnt--;
20052 		linfo[--i].insn_off = off + cnt;
20053 	}
20054 
20055 	/* remove the line info which refer to the removed instructions */
20056 	if (l_cnt) {
20057 		memmove(linfo + l_off, linfo + i,
20058 			sizeof(*linfo) * (nr_linfo - i));
20059 
20060 		prog->aux->nr_linfo -= l_cnt;
20061 		nr_linfo = prog->aux->nr_linfo;
20062 	}
20063 
20064 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
20065 	for (i = l_off; i < nr_linfo; i++)
20066 		linfo[i].insn_off -= cnt;
20067 
20068 	/* fix up all subprogs (incl. 'exit') which start >= off */
20069 	for (i = 0; i <= env->subprog_cnt; i++)
20070 		if (env->subprog_info[i].linfo_idx > l_off) {
20071 			/* program may have started in the removed region but
20072 			 * may not be fully removed
20073 			 */
20074 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
20075 				env->subprog_info[i].linfo_idx -= l_cnt;
20076 			else
20077 				env->subprog_info[i].linfo_idx = l_off;
20078 		}
20079 
20080 	return 0;
20081 }
20082 
verifier_remove_insns(struct bpf_verifier_env * env,u32 off,u32 cnt)20083 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
20084 {
20085 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20086 	unsigned int orig_prog_len = env->prog->len;
20087 	int err;
20088 
20089 	if (bpf_prog_is_offloaded(env->prog->aux))
20090 		bpf_prog_offload_remove_insns(env, off, cnt);
20091 
20092 	err = bpf_remove_insns(env->prog, off, cnt);
20093 	if (err)
20094 		return err;
20095 
20096 	err = adjust_subprog_starts_after_remove(env, off, cnt);
20097 	if (err)
20098 		return err;
20099 
20100 	err = bpf_adj_linfo_after_remove(env, off, cnt);
20101 	if (err)
20102 		return err;
20103 
20104 	memmove(aux_data + off,	aux_data + off + cnt,
20105 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
20106 
20107 	return 0;
20108 }
20109 
20110 /* The verifier does more data flow analysis than llvm and will not
20111  * explore branches that are dead at run time. Malicious programs can
20112  * have dead code too. Therefore replace all dead at-run-time code
20113  * with 'ja -1'.
20114  *
20115  * Just nops are not optimal, e.g. if they would sit at the end of the
20116  * program and through another bug we would manage to jump there, then
20117  * we'd execute beyond program memory otherwise. Returning exception
20118  * code also wouldn't work since we can have subprogs where the dead
20119  * code could be located.
20120  */
sanitize_dead_code(struct bpf_verifier_env * env)20121 static void sanitize_dead_code(struct bpf_verifier_env *env)
20122 {
20123 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20124 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
20125 	struct bpf_insn *insn = env->prog->insnsi;
20126 	const int insn_cnt = env->prog->len;
20127 	int i;
20128 
20129 	for (i = 0; i < insn_cnt; i++) {
20130 		if (aux_data[i].seen)
20131 			continue;
20132 		memcpy(insn + i, &trap, sizeof(trap));
20133 		aux_data[i].zext_dst = false;
20134 	}
20135 }
20136 
insn_is_cond_jump(u8 code)20137 static bool insn_is_cond_jump(u8 code)
20138 {
20139 	u8 op;
20140 
20141 	op = BPF_OP(code);
20142 	if (BPF_CLASS(code) == BPF_JMP32)
20143 		return op != BPF_JA;
20144 
20145 	if (BPF_CLASS(code) != BPF_JMP)
20146 		return false;
20147 
20148 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
20149 }
20150 
opt_hard_wire_dead_code_branches(struct bpf_verifier_env * env)20151 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
20152 {
20153 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20154 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
20155 	struct bpf_insn *insn = env->prog->insnsi;
20156 	const int insn_cnt = env->prog->len;
20157 	int i;
20158 
20159 	for (i = 0; i < insn_cnt; i++, insn++) {
20160 		if (!insn_is_cond_jump(insn->code))
20161 			continue;
20162 
20163 		if (!aux_data[i + 1].seen)
20164 			ja.off = insn->off;
20165 		else if (!aux_data[i + 1 + insn->off].seen)
20166 			ja.off = 0;
20167 		else
20168 			continue;
20169 
20170 		if (bpf_prog_is_offloaded(env->prog->aux))
20171 			bpf_prog_offload_replace_insn(env, i, &ja);
20172 
20173 		memcpy(insn, &ja, sizeof(ja));
20174 	}
20175 }
20176 
opt_remove_dead_code(struct bpf_verifier_env * env)20177 static int opt_remove_dead_code(struct bpf_verifier_env *env)
20178 {
20179 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20180 	int insn_cnt = env->prog->len;
20181 	int i, err;
20182 
20183 	for (i = 0; i < insn_cnt; i++) {
20184 		int j;
20185 
20186 		j = 0;
20187 		while (i + j < insn_cnt && !aux_data[i + j].seen)
20188 			j++;
20189 		if (!j)
20190 			continue;
20191 
20192 		err = verifier_remove_insns(env, i, j);
20193 		if (err)
20194 			return err;
20195 		insn_cnt = env->prog->len;
20196 	}
20197 
20198 	return 0;
20199 }
20200 
20201 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
20202 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
20203 
opt_remove_nops(struct bpf_verifier_env * env)20204 static int opt_remove_nops(struct bpf_verifier_env *env)
20205 {
20206 	struct bpf_insn *insn = env->prog->insnsi;
20207 	int insn_cnt = env->prog->len;
20208 	bool is_may_goto_0, is_ja;
20209 	int i, err;
20210 
20211 	for (i = 0; i < insn_cnt; i++) {
20212 		is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
20213 		is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
20214 
20215 		if (!is_may_goto_0 && !is_ja)
20216 			continue;
20217 
20218 		err = verifier_remove_insns(env, i, 1);
20219 		if (err)
20220 			return err;
20221 		insn_cnt--;
20222 		/* Go back one insn to catch may_goto +1; may_goto +0 sequence */
20223 		i -= (is_may_goto_0 && i > 0) ? 2 : 1;
20224 	}
20225 
20226 	return 0;
20227 }
20228 
opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env * env,const union bpf_attr * attr)20229 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
20230 					 const union bpf_attr *attr)
20231 {
20232 	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
20233 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
20234 	int i, patch_len, delta = 0, len = env->prog->len;
20235 	struct bpf_insn *insns = env->prog->insnsi;
20236 	struct bpf_prog *new_prog;
20237 	bool rnd_hi32;
20238 
20239 	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
20240 	zext_patch[1] = BPF_ZEXT_REG(0);
20241 	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
20242 	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
20243 	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
20244 	for (i = 0; i < len; i++) {
20245 		int adj_idx = i + delta;
20246 		struct bpf_insn insn;
20247 		int load_reg;
20248 
20249 		insn = insns[adj_idx];
20250 		load_reg = insn_def_regno(&insn);
20251 		if (!aux[adj_idx].zext_dst) {
20252 			u8 code, class;
20253 			u32 imm_rnd;
20254 
20255 			if (!rnd_hi32)
20256 				continue;
20257 
20258 			code = insn.code;
20259 			class = BPF_CLASS(code);
20260 			if (load_reg == -1)
20261 				continue;
20262 
20263 			/* NOTE: arg "reg" (the fourth one) is only used for
20264 			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
20265 			 *       here.
20266 			 */
20267 			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
20268 				if (class == BPF_LD &&
20269 				    BPF_MODE(code) == BPF_IMM)
20270 					i++;
20271 				continue;
20272 			}
20273 
20274 			/* ctx load could be transformed into wider load. */
20275 			if (class == BPF_LDX &&
20276 			    aux[adj_idx].ptr_type == PTR_TO_CTX)
20277 				continue;
20278 
20279 			imm_rnd = get_random_u32();
20280 			rnd_hi32_patch[0] = insn;
20281 			rnd_hi32_patch[1].imm = imm_rnd;
20282 			rnd_hi32_patch[3].dst_reg = load_reg;
20283 			patch = rnd_hi32_patch;
20284 			patch_len = 4;
20285 			goto apply_patch_buffer;
20286 		}
20287 
20288 		/* Add in an zero-extend instruction if a) the JIT has requested
20289 		 * it or b) it's a CMPXCHG.
20290 		 *
20291 		 * The latter is because: BPF_CMPXCHG always loads a value into
20292 		 * R0, therefore always zero-extends. However some archs'
20293 		 * equivalent instruction only does this load when the
20294 		 * comparison is successful. This detail of CMPXCHG is
20295 		 * orthogonal to the general zero-extension behaviour of the
20296 		 * CPU, so it's treated independently of bpf_jit_needs_zext.
20297 		 */
20298 		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
20299 			continue;
20300 
20301 		/* Zero-extension is done by the caller. */
20302 		if (bpf_pseudo_kfunc_call(&insn))
20303 			continue;
20304 
20305 		if (WARN_ON(load_reg == -1)) {
20306 			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
20307 			return -EFAULT;
20308 		}
20309 
20310 		zext_patch[0] = insn;
20311 		zext_patch[1].dst_reg = load_reg;
20312 		zext_patch[1].src_reg = load_reg;
20313 		patch = zext_patch;
20314 		patch_len = 2;
20315 apply_patch_buffer:
20316 		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
20317 		if (!new_prog)
20318 			return -ENOMEM;
20319 		env->prog = new_prog;
20320 		insns = new_prog->insnsi;
20321 		aux = env->insn_aux_data;
20322 		delta += patch_len - 1;
20323 	}
20324 
20325 	return 0;
20326 }
20327 
20328 /* convert load instructions that access fields of a context type into a
20329  * sequence of instructions that access fields of the underlying structure:
20330  *     struct __sk_buff    -> struct sk_buff
20331  *     struct bpf_sock_ops -> struct sock
20332  */
convert_ctx_accesses(struct bpf_verifier_env * env)20333 static int convert_ctx_accesses(struct bpf_verifier_env *env)
20334 {
20335 	struct bpf_subprog_info *subprogs = env->subprog_info;
20336 	const struct bpf_verifier_ops *ops = env->ops;
20337 	int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
20338 	const int insn_cnt = env->prog->len;
20339 	struct bpf_insn *epilogue_buf = env->epilogue_buf;
20340 	struct bpf_insn *insn_buf = env->insn_buf;
20341 	struct bpf_insn *insn;
20342 	u32 target_size, size_default, off;
20343 	struct bpf_prog *new_prog;
20344 	enum bpf_access_type type;
20345 	bool is_narrower_load;
20346 	int epilogue_idx = 0;
20347 
20348 	if (ops->gen_epilogue) {
20349 		epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
20350 						 -(subprogs[0].stack_depth + 8));
20351 		if (epilogue_cnt >= INSN_BUF_SIZE) {
20352 			verbose(env, "bpf verifier is misconfigured\n");
20353 			return -EINVAL;
20354 		} else if (epilogue_cnt) {
20355 			/* Save the ARG_PTR_TO_CTX for the epilogue to use */
20356 			cnt = 0;
20357 			subprogs[0].stack_depth += 8;
20358 			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
20359 						      -subprogs[0].stack_depth);
20360 			insn_buf[cnt++] = env->prog->insnsi[0];
20361 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
20362 			if (!new_prog)
20363 				return -ENOMEM;
20364 			env->prog = new_prog;
20365 			delta += cnt - 1;
20366 		}
20367 	}
20368 
20369 	if (ops->gen_prologue || env->seen_direct_write) {
20370 		if (!ops->gen_prologue) {
20371 			verbose(env, "bpf verifier is misconfigured\n");
20372 			return -EINVAL;
20373 		}
20374 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
20375 					env->prog);
20376 		if (cnt >= INSN_BUF_SIZE) {
20377 			verbose(env, "bpf verifier is misconfigured\n");
20378 			return -EINVAL;
20379 		} else if (cnt) {
20380 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
20381 			if (!new_prog)
20382 				return -ENOMEM;
20383 
20384 			env->prog = new_prog;
20385 			delta += cnt - 1;
20386 		}
20387 	}
20388 
20389 	if (delta)
20390 		WARN_ON(adjust_jmp_off(env->prog, 0, delta));
20391 
20392 	if (bpf_prog_is_offloaded(env->prog->aux))
20393 		return 0;
20394 
20395 	insn = env->prog->insnsi + delta;
20396 
20397 	for (i = 0; i < insn_cnt; i++, insn++) {
20398 		bpf_convert_ctx_access_t convert_ctx_access;
20399 		u8 mode;
20400 
20401 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
20402 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
20403 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
20404 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
20405 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
20406 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
20407 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
20408 			type = BPF_READ;
20409 		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
20410 			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
20411 			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
20412 			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
20413 			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
20414 			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
20415 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
20416 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
20417 			type = BPF_WRITE;
20418 		} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
20419 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
20420 			   env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
20421 			insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
20422 			env->prog->aux->num_exentries++;
20423 			continue;
20424 		} else if (insn->code == (BPF_JMP | BPF_EXIT) &&
20425 			   epilogue_cnt &&
20426 			   i + delta < subprogs[1].start) {
20427 			/* Generate epilogue for the main prog */
20428 			if (epilogue_idx) {
20429 				/* jump back to the earlier generated epilogue */
20430 				insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
20431 				cnt = 1;
20432 			} else {
20433 				memcpy(insn_buf, epilogue_buf,
20434 				       epilogue_cnt * sizeof(*epilogue_buf));
20435 				cnt = epilogue_cnt;
20436 				/* epilogue_idx cannot be 0. It must have at
20437 				 * least one ctx ptr saving insn before the
20438 				 * epilogue.
20439 				 */
20440 				epilogue_idx = i + delta;
20441 			}
20442 			goto patch_insn_buf;
20443 		} else {
20444 			continue;
20445 		}
20446 
20447 		if (type == BPF_WRITE &&
20448 		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
20449 			struct bpf_insn patch[] = {
20450 				*insn,
20451 				BPF_ST_NOSPEC(),
20452 			};
20453 
20454 			cnt = ARRAY_SIZE(patch);
20455 			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
20456 			if (!new_prog)
20457 				return -ENOMEM;
20458 
20459 			delta    += cnt - 1;
20460 			env->prog = new_prog;
20461 			insn      = new_prog->insnsi + i + delta;
20462 			continue;
20463 		}
20464 
20465 		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
20466 		case PTR_TO_CTX:
20467 			if (!ops->convert_ctx_access)
20468 				continue;
20469 			convert_ctx_access = ops->convert_ctx_access;
20470 			break;
20471 		case PTR_TO_SOCKET:
20472 		case PTR_TO_SOCK_COMMON:
20473 			convert_ctx_access = bpf_sock_convert_ctx_access;
20474 			break;
20475 		case PTR_TO_TCP_SOCK:
20476 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
20477 			break;
20478 		case PTR_TO_XDP_SOCK:
20479 			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
20480 			break;
20481 		case PTR_TO_BTF_ID:
20482 		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
20483 		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
20484 		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
20485 		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
20486 		 * any faults for loads into such types. BPF_WRITE is disallowed
20487 		 * for this case.
20488 		 */
20489 		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
20490 			if (type == BPF_READ) {
20491 				if (BPF_MODE(insn->code) == BPF_MEM)
20492 					insn->code = BPF_LDX | BPF_PROBE_MEM |
20493 						     BPF_SIZE((insn)->code);
20494 				else
20495 					insn->code = BPF_LDX | BPF_PROBE_MEMSX |
20496 						     BPF_SIZE((insn)->code);
20497 				env->prog->aux->num_exentries++;
20498 			}
20499 			continue;
20500 		case PTR_TO_ARENA:
20501 			if (BPF_MODE(insn->code) == BPF_MEMSX) {
20502 				verbose(env, "sign extending loads from arena are not supported yet\n");
20503 				return -EOPNOTSUPP;
20504 			}
20505 			insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
20506 			env->prog->aux->num_exentries++;
20507 			continue;
20508 		default:
20509 			continue;
20510 		}
20511 
20512 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
20513 		size = BPF_LDST_BYTES(insn);
20514 		mode = BPF_MODE(insn->code);
20515 
20516 		/* If the read access is a narrower load of the field,
20517 		 * convert to a 4/8-byte load, to minimum program type specific
20518 		 * convert_ctx_access changes. If conversion is successful,
20519 		 * we will apply proper mask to the result.
20520 		 */
20521 		is_narrower_load = size < ctx_field_size;
20522 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
20523 		off = insn->off;
20524 		if (is_narrower_load) {
20525 			u8 size_code;
20526 
20527 			if (type == BPF_WRITE) {
20528 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
20529 				return -EINVAL;
20530 			}
20531 
20532 			size_code = BPF_H;
20533 			if (ctx_field_size == 4)
20534 				size_code = BPF_W;
20535 			else if (ctx_field_size == 8)
20536 				size_code = BPF_DW;
20537 
20538 			insn->off = off & ~(size_default - 1);
20539 			insn->code = BPF_LDX | BPF_MEM | size_code;
20540 		}
20541 
20542 		target_size = 0;
20543 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
20544 					 &target_size);
20545 		if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
20546 		    (ctx_field_size && !target_size)) {
20547 			verbose(env, "bpf verifier is misconfigured\n");
20548 			return -EINVAL;
20549 		}
20550 
20551 		if (is_narrower_load && size < target_size) {
20552 			u8 shift = bpf_ctx_narrow_access_offset(
20553 				off, size, size_default) * 8;
20554 			if (shift && cnt + 1 >= INSN_BUF_SIZE) {
20555 				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
20556 				return -EINVAL;
20557 			}
20558 			if (ctx_field_size <= 4) {
20559 				if (shift)
20560 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
20561 									insn->dst_reg,
20562 									shift);
20563 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
20564 								(1 << size * 8) - 1);
20565 			} else {
20566 				if (shift)
20567 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
20568 									insn->dst_reg,
20569 									shift);
20570 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
20571 								(1ULL << size * 8) - 1);
20572 			}
20573 		}
20574 		if (mode == BPF_MEMSX)
20575 			insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
20576 						       insn->dst_reg, insn->dst_reg,
20577 						       size * 8, 0);
20578 
20579 patch_insn_buf:
20580 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
20581 		if (!new_prog)
20582 			return -ENOMEM;
20583 
20584 		delta += cnt - 1;
20585 
20586 		/* keep walking new program and skip insns we just inserted */
20587 		env->prog = new_prog;
20588 		insn      = new_prog->insnsi + i + delta;
20589 	}
20590 
20591 	return 0;
20592 }
20593 
jit_subprogs(struct bpf_verifier_env * env)20594 static int jit_subprogs(struct bpf_verifier_env *env)
20595 {
20596 	struct bpf_prog *prog = env->prog, **func, *tmp;
20597 	int i, j, subprog_start, subprog_end = 0, len, subprog;
20598 	struct bpf_map *map_ptr;
20599 	struct bpf_insn *insn;
20600 	void *old_bpf_func;
20601 	int err, num_exentries;
20602 
20603 	if (env->subprog_cnt <= 1)
20604 		return 0;
20605 
20606 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
20607 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
20608 			continue;
20609 
20610 		/* Upon error here we cannot fall back to interpreter but
20611 		 * need a hard reject of the program. Thus -EFAULT is
20612 		 * propagated in any case.
20613 		 */
20614 		subprog = find_subprog(env, i + insn->imm + 1);
20615 		if (subprog < 0) {
20616 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
20617 				  i + insn->imm + 1);
20618 			return -EFAULT;
20619 		}
20620 		/* temporarily remember subprog id inside insn instead of
20621 		 * aux_data, since next loop will split up all insns into funcs
20622 		 */
20623 		insn->off = subprog;
20624 		/* remember original imm in case JIT fails and fallback
20625 		 * to interpreter will be needed
20626 		 */
20627 		env->insn_aux_data[i].call_imm = insn->imm;
20628 		/* point imm to __bpf_call_base+1 from JITs point of view */
20629 		insn->imm = 1;
20630 		if (bpf_pseudo_func(insn)) {
20631 #if defined(MODULES_VADDR)
20632 			u64 addr = MODULES_VADDR;
20633 #else
20634 			u64 addr = VMALLOC_START;
20635 #endif
20636 			/* jit (e.g. x86_64) may emit fewer instructions
20637 			 * if it learns a u32 imm is the same as a u64 imm.
20638 			 * Set close enough to possible prog address.
20639 			 */
20640 			insn[0].imm = (u32)addr;
20641 			insn[1].imm = addr >> 32;
20642 		}
20643 	}
20644 
20645 	err = bpf_prog_alloc_jited_linfo(prog);
20646 	if (err)
20647 		goto out_undo_insn;
20648 
20649 	err = -ENOMEM;
20650 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
20651 	if (!func)
20652 		goto out_undo_insn;
20653 
20654 	for (i = 0; i < env->subprog_cnt; i++) {
20655 		subprog_start = subprog_end;
20656 		subprog_end = env->subprog_info[i + 1].start;
20657 
20658 		len = subprog_end - subprog_start;
20659 		/* bpf_prog_run() doesn't call subprogs directly,
20660 		 * hence main prog stats include the runtime of subprogs.
20661 		 * subprogs don't have IDs and not reachable via prog_get_next_id
20662 		 * func[i]->stats will never be accessed and stays NULL
20663 		 */
20664 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
20665 		if (!func[i])
20666 			goto out_free;
20667 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
20668 		       len * sizeof(struct bpf_insn));
20669 		func[i]->type = prog->type;
20670 		func[i]->len = len;
20671 		if (bpf_prog_calc_tag(func[i]))
20672 			goto out_free;
20673 		func[i]->is_func = 1;
20674 		func[i]->sleepable = prog->sleepable;
20675 		func[i]->aux->func_idx = i;
20676 		/* Below members will be freed only at prog->aux */
20677 		func[i]->aux->btf = prog->aux->btf;
20678 		func[i]->aux->func_info = prog->aux->func_info;
20679 		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
20680 		func[i]->aux->poke_tab = prog->aux->poke_tab;
20681 		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
20682 
20683 		for (j = 0; j < prog->aux->size_poke_tab; j++) {
20684 			struct bpf_jit_poke_descriptor *poke;
20685 
20686 			poke = &prog->aux->poke_tab[j];
20687 			if (poke->insn_idx < subprog_end &&
20688 			    poke->insn_idx >= subprog_start)
20689 				poke->aux = func[i]->aux;
20690 		}
20691 
20692 		func[i]->aux->name[0] = 'F';
20693 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
20694 		if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
20695 			func[i]->aux->jits_use_priv_stack = true;
20696 
20697 		func[i]->jit_requested = 1;
20698 		func[i]->blinding_requested = prog->blinding_requested;
20699 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
20700 		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
20701 		func[i]->aux->linfo = prog->aux->linfo;
20702 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
20703 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
20704 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
20705 		func[i]->aux->arena = prog->aux->arena;
20706 		num_exentries = 0;
20707 		insn = func[i]->insnsi;
20708 		for (j = 0; j < func[i]->len; j++, insn++) {
20709 			if (BPF_CLASS(insn->code) == BPF_LDX &&
20710 			    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
20711 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
20712 			     BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
20713 				num_exentries++;
20714 			if ((BPF_CLASS(insn->code) == BPF_STX ||
20715 			     BPF_CLASS(insn->code) == BPF_ST) &&
20716 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32)
20717 				num_exentries++;
20718 			if (BPF_CLASS(insn->code) == BPF_STX &&
20719 			     BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
20720 				num_exentries++;
20721 		}
20722 		func[i]->aux->num_exentries = num_exentries;
20723 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
20724 		func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
20725 		func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
20726 		if (!i)
20727 			func[i]->aux->exception_boundary = env->seen_exception;
20728 		func[i] = bpf_int_jit_compile(func[i]);
20729 		if (!func[i]->jited) {
20730 			err = -ENOTSUPP;
20731 			goto out_free;
20732 		}
20733 		cond_resched();
20734 	}
20735 
20736 	/* at this point all bpf functions were successfully JITed
20737 	 * now populate all bpf_calls with correct addresses and
20738 	 * run last pass of JIT
20739 	 */
20740 	for (i = 0; i < env->subprog_cnt; i++) {
20741 		insn = func[i]->insnsi;
20742 		for (j = 0; j < func[i]->len; j++, insn++) {
20743 			if (bpf_pseudo_func(insn)) {
20744 				subprog = insn->off;
20745 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
20746 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
20747 				continue;
20748 			}
20749 			if (!bpf_pseudo_call(insn))
20750 				continue;
20751 			subprog = insn->off;
20752 			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
20753 		}
20754 
20755 		/* we use the aux data to keep a list of the start addresses
20756 		 * of the JITed images for each function in the program
20757 		 *
20758 		 * for some architectures, such as powerpc64, the imm field
20759 		 * might not be large enough to hold the offset of the start
20760 		 * address of the callee's JITed image from __bpf_call_base
20761 		 *
20762 		 * in such cases, we can lookup the start address of a callee
20763 		 * by using its subprog id, available from the off field of
20764 		 * the call instruction, as an index for this list
20765 		 */
20766 		func[i]->aux->func = func;
20767 		func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
20768 		func[i]->aux->real_func_cnt = env->subprog_cnt;
20769 	}
20770 	for (i = 0; i < env->subprog_cnt; i++) {
20771 		old_bpf_func = func[i]->bpf_func;
20772 		tmp = bpf_int_jit_compile(func[i]);
20773 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
20774 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
20775 			err = -ENOTSUPP;
20776 			goto out_free;
20777 		}
20778 		cond_resched();
20779 	}
20780 
20781 	/* finally lock prog and jit images for all functions and
20782 	 * populate kallsysm. Begin at the first subprogram, since
20783 	 * bpf_prog_load will add the kallsyms for the main program.
20784 	 */
20785 	for (i = 1; i < env->subprog_cnt; i++) {
20786 		err = bpf_prog_lock_ro(func[i]);
20787 		if (err)
20788 			goto out_free;
20789 	}
20790 
20791 	for (i = 1; i < env->subprog_cnt; i++)
20792 		bpf_prog_kallsyms_add(func[i]);
20793 
20794 	/* Last step: make now unused interpreter insns from main
20795 	 * prog consistent for later dump requests, so they can
20796 	 * later look the same as if they were interpreted only.
20797 	 */
20798 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
20799 		if (bpf_pseudo_func(insn)) {
20800 			insn[0].imm = env->insn_aux_data[i].call_imm;
20801 			insn[1].imm = insn->off;
20802 			insn->off = 0;
20803 			continue;
20804 		}
20805 		if (!bpf_pseudo_call(insn))
20806 			continue;
20807 		insn->off = env->insn_aux_data[i].call_imm;
20808 		subprog = find_subprog(env, i + insn->off + 1);
20809 		insn->imm = subprog;
20810 	}
20811 
20812 	prog->jited = 1;
20813 	prog->bpf_func = func[0]->bpf_func;
20814 	prog->jited_len = func[0]->jited_len;
20815 	prog->aux->extable = func[0]->aux->extable;
20816 	prog->aux->num_exentries = func[0]->aux->num_exentries;
20817 	prog->aux->func = func;
20818 	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
20819 	prog->aux->real_func_cnt = env->subprog_cnt;
20820 	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
20821 	prog->aux->exception_boundary = func[0]->aux->exception_boundary;
20822 	bpf_prog_jit_attempt_done(prog);
20823 	return 0;
20824 out_free:
20825 	/* We failed JIT'ing, so at this point we need to unregister poke
20826 	 * descriptors from subprogs, so that kernel is not attempting to
20827 	 * patch it anymore as we're freeing the subprog JIT memory.
20828 	 */
20829 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
20830 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
20831 		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
20832 	}
20833 	/* At this point we're guaranteed that poke descriptors are not
20834 	 * live anymore. We can just unlink its descriptor table as it's
20835 	 * released with the main prog.
20836 	 */
20837 	for (i = 0; i < env->subprog_cnt; i++) {
20838 		if (!func[i])
20839 			continue;
20840 		func[i]->aux->poke_tab = NULL;
20841 		bpf_jit_free(func[i]);
20842 	}
20843 	kfree(func);
20844 out_undo_insn:
20845 	/* cleanup main prog to be interpreted */
20846 	prog->jit_requested = 0;
20847 	prog->blinding_requested = 0;
20848 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
20849 		if (!bpf_pseudo_call(insn))
20850 			continue;
20851 		insn->off = 0;
20852 		insn->imm = env->insn_aux_data[i].call_imm;
20853 	}
20854 	bpf_prog_jit_attempt_done(prog);
20855 	return err;
20856 }
20857 
fixup_call_args(struct bpf_verifier_env * env)20858 static int fixup_call_args(struct bpf_verifier_env *env)
20859 {
20860 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
20861 	struct bpf_prog *prog = env->prog;
20862 	struct bpf_insn *insn = prog->insnsi;
20863 	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
20864 	int i, depth;
20865 #endif
20866 	int err = 0;
20867 
20868 	if (env->prog->jit_requested &&
20869 	    !bpf_prog_is_offloaded(env->prog->aux)) {
20870 		err = jit_subprogs(env);
20871 		if (err == 0)
20872 			return 0;
20873 		if (err == -EFAULT)
20874 			return err;
20875 	}
20876 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
20877 	if (has_kfunc_call) {
20878 		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
20879 		return -EINVAL;
20880 	}
20881 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
20882 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
20883 		 * have to be rejected, since interpreter doesn't support them yet.
20884 		 */
20885 		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
20886 		return -EINVAL;
20887 	}
20888 	for (i = 0; i < prog->len; i++, insn++) {
20889 		if (bpf_pseudo_func(insn)) {
20890 			/* When JIT fails the progs with callback calls
20891 			 * have to be rejected, since interpreter doesn't support them yet.
20892 			 */
20893 			verbose(env, "callbacks are not allowed in non-JITed programs\n");
20894 			return -EINVAL;
20895 		}
20896 
20897 		if (!bpf_pseudo_call(insn))
20898 			continue;
20899 		depth = get_callee_stack_depth(env, insn, i);
20900 		if (depth < 0)
20901 			return depth;
20902 		bpf_patch_call_args(insn, depth);
20903 	}
20904 	err = 0;
20905 #endif
20906 	return err;
20907 }
20908 
20909 /* replace a generic kfunc with a specialized version if necessary */
specialize_kfunc(struct bpf_verifier_env * env,u32 func_id,u16 offset,unsigned long * addr)20910 static void specialize_kfunc(struct bpf_verifier_env *env,
20911 			     u32 func_id, u16 offset, unsigned long *addr)
20912 {
20913 	struct bpf_prog *prog = env->prog;
20914 	bool seen_direct_write;
20915 	void *xdp_kfunc;
20916 	bool is_rdonly;
20917 
20918 	if (bpf_dev_bound_kfunc_id(func_id)) {
20919 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
20920 		if (xdp_kfunc) {
20921 			*addr = (unsigned long)xdp_kfunc;
20922 			return;
20923 		}
20924 		/* fallback to default kfunc when not supported by netdev */
20925 	}
20926 
20927 	if (offset)
20928 		return;
20929 
20930 	if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
20931 		seen_direct_write = env->seen_direct_write;
20932 		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
20933 
20934 		if (is_rdonly)
20935 			*addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
20936 
20937 		/* restore env->seen_direct_write to its original value, since
20938 		 * may_access_direct_pkt_data mutates it
20939 		 */
20940 		env->seen_direct_write = seen_direct_write;
20941 	}
20942 }
20943 
__fixup_collection_insert_kfunc(struct bpf_insn_aux_data * insn_aux,u16 struct_meta_reg,u16 node_offset_reg,struct bpf_insn * insn,struct bpf_insn * insn_buf,int * cnt)20944 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
20945 					    u16 struct_meta_reg,
20946 					    u16 node_offset_reg,
20947 					    struct bpf_insn *insn,
20948 					    struct bpf_insn *insn_buf,
20949 					    int *cnt)
20950 {
20951 	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
20952 	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
20953 
20954 	insn_buf[0] = addr[0];
20955 	insn_buf[1] = addr[1];
20956 	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
20957 	insn_buf[3] = *insn;
20958 	*cnt = 4;
20959 }
20960 
fixup_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn * insn_buf,int insn_idx,int * cnt)20961 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
20962 			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
20963 {
20964 	const struct bpf_kfunc_desc *desc;
20965 
20966 	if (!insn->imm) {
20967 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
20968 		return -EINVAL;
20969 	}
20970 
20971 	*cnt = 0;
20972 
20973 	/* insn->imm has the btf func_id. Replace it with an offset relative to
20974 	 * __bpf_call_base, unless the JIT needs to call functions that are
20975 	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
20976 	 */
20977 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
20978 	if (!desc) {
20979 		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
20980 			insn->imm);
20981 		return -EFAULT;
20982 	}
20983 
20984 	if (!bpf_jit_supports_far_kfunc_call())
20985 		insn->imm = BPF_CALL_IMM(desc->addr);
20986 	if (insn->off)
20987 		return 0;
20988 	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
20989 	    desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
20990 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
20991 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
20992 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
20993 
20994 		if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
20995 			verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
20996 				insn_idx);
20997 			return -EFAULT;
20998 		}
20999 
21000 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
21001 		insn_buf[1] = addr[0];
21002 		insn_buf[2] = addr[1];
21003 		insn_buf[3] = *insn;
21004 		*cnt = 4;
21005 	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
21006 		   desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
21007 		   desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
21008 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
21009 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
21010 
21011 		if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
21012 			verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
21013 				insn_idx);
21014 			return -EFAULT;
21015 		}
21016 
21017 		if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
21018 		    !kptr_struct_meta) {
21019 			verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
21020 				insn_idx);
21021 			return -EFAULT;
21022 		}
21023 
21024 		insn_buf[0] = addr[0];
21025 		insn_buf[1] = addr[1];
21026 		insn_buf[2] = *insn;
21027 		*cnt = 3;
21028 	} else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
21029 		   desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
21030 		   desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
21031 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
21032 		int struct_meta_reg = BPF_REG_3;
21033 		int node_offset_reg = BPF_REG_4;
21034 
21035 		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
21036 		if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
21037 			struct_meta_reg = BPF_REG_4;
21038 			node_offset_reg = BPF_REG_5;
21039 		}
21040 
21041 		if (!kptr_struct_meta) {
21042 			verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
21043 				insn_idx);
21044 			return -EFAULT;
21045 		}
21046 
21047 		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
21048 						node_offset_reg, insn, insn_buf, cnt);
21049 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
21050 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
21051 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
21052 		*cnt = 1;
21053 	} else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
21054 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
21055 
21056 		insn_buf[0] = ld_addrs[0];
21057 		insn_buf[1] = ld_addrs[1];
21058 		insn_buf[2] = *insn;
21059 		*cnt = 3;
21060 	}
21061 	return 0;
21062 }
21063 
21064 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
add_hidden_subprog(struct bpf_verifier_env * env,struct bpf_insn * patch,int len)21065 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
21066 {
21067 	struct bpf_subprog_info *info = env->subprog_info;
21068 	int cnt = env->subprog_cnt;
21069 	struct bpf_prog *prog;
21070 
21071 	/* We only reserve one slot for hidden subprogs in subprog_info. */
21072 	if (env->hidden_subprog_cnt) {
21073 		verbose(env, "verifier internal error: only one hidden subprog supported\n");
21074 		return -EFAULT;
21075 	}
21076 	/* We're not patching any existing instruction, just appending the new
21077 	 * ones for the hidden subprog. Hence all of the adjustment operations
21078 	 * in bpf_patch_insn_data are no-ops.
21079 	 */
21080 	prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
21081 	if (!prog)
21082 		return -ENOMEM;
21083 	env->prog = prog;
21084 	info[cnt + 1].start = info[cnt].start;
21085 	info[cnt].start = prog->len - len + 1;
21086 	env->subprog_cnt++;
21087 	env->hidden_subprog_cnt++;
21088 	return 0;
21089 }
21090 
21091 /* Do various post-verification rewrites in a single program pass.
21092  * These rewrites simplify JIT and interpreter implementations.
21093  */
do_misc_fixups(struct bpf_verifier_env * env)21094 static int do_misc_fixups(struct bpf_verifier_env *env)
21095 {
21096 	struct bpf_prog *prog = env->prog;
21097 	enum bpf_attach_type eatype = prog->expected_attach_type;
21098 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
21099 	struct bpf_insn *insn = prog->insnsi;
21100 	const struct bpf_func_proto *fn;
21101 	const int insn_cnt = prog->len;
21102 	const struct bpf_map_ops *ops;
21103 	struct bpf_insn_aux_data *aux;
21104 	struct bpf_insn *insn_buf = env->insn_buf;
21105 	struct bpf_prog *new_prog;
21106 	struct bpf_map *map_ptr;
21107 	int i, ret, cnt, delta = 0, cur_subprog = 0;
21108 	struct bpf_subprog_info *subprogs = env->subprog_info;
21109 	u16 stack_depth = subprogs[cur_subprog].stack_depth;
21110 	u16 stack_depth_extra = 0;
21111 
21112 	if (env->seen_exception && !env->exception_callback_subprog) {
21113 		struct bpf_insn patch[] = {
21114 			env->prog->insnsi[insn_cnt - 1],
21115 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
21116 			BPF_EXIT_INSN(),
21117 		};
21118 
21119 		ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
21120 		if (ret < 0)
21121 			return ret;
21122 		prog = env->prog;
21123 		insn = prog->insnsi;
21124 
21125 		env->exception_callback_subprog = env->subprog_cnt - 1;
21126 		/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
21127 		mark_subprog_exc_cb(env, env->exception_callback_subprog);
21128 	}
21129 
21130 	for (i = 0; i < insn_cnt;) {
21131 		if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
21132 			if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
21133 			    (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
21134 				/* convert to 32-bit mov that clears upper 32-bit */
21135 				insn->code = BPF_ALU | BPF_MOV | BPF_X;
21136 				/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
21137 				insn->off = 0;
21138 				insn->imm = 0;
21139 			} /* cast from as(0) to as(1) should be handled by JIT */
21140 			goto next_insn;
21141 		}
21142 
21143 		if (env->insn_aux_data[i + delta].needs_zext)
21144 			/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
21145 			insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
21146 
21147 		/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
21148 		if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
21149 		     insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
21150 		     insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
21151 		     insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
21152 		    insn->off == 1 && insn->imm == -1) {
21153 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
21154 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
21155 			struct bpf_insn *patchlet;
21156 			struct bpf_insn chk_and_sdiv[] = {
21157 				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21158 					     BPF_NEG | BPF_K, insn->dst_reg,
21159 					     0, 0, 0),
21160 			};
21161 			struct bpf_insn chk_and_smod[] = {
21162 				BPF_MOV32_IMM(insn->dst_reg, 0),
21163 			};
21164 
21165 			patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
21166 			cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
21167 
21168 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
21169 			if (!new_prog)
21170 				return -ENOMEM;
21171 
21172 			delta    += cnt - 1;
21173 			env->prog = prog = new_prog;
21174 			insn      = new_prog->insnsi + i + delta;
21175 			goto next_insn;
21176 		}
21177 
21178 		/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
21179 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
21180 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
21181 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
21182 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
21183 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
21184 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
21185 			bool is_sdiv = isdiv && insn->off == 1;
21186 			bool is_smod = !isdiv && insn->off == 1;
21187 			struct bpf_insn *patchlet;
21188 			struct bpf_insn chk_and_div[] = {
21189 				/* [R,W]x div 0 -> 0 */
21190 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21191 					     BPF_JNE | BPF_K, insn->src_reg,
21192 					     0, 2, 0),
21193 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
21194 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21195 				*insn,
21196 			};
21197 			struct bpf_insn chk_and_mod[] = {
21198 				/* [R,W]x mod 0 -> [R,W]x */
21199 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21200 					     BPF_JEQ | BPF_K, insn->src_reg,
21201 					     0, 1 + (is64 ? 0 : 1), 0),
21202 				*insn,
21203 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21204 				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
21205 			};
21206 			struct bpf_insn chk_and_sdiv[] = {
21207 				/* [R,W]x sdiv 0 -> 0
21208 				 * LLONG_MIN sdiv -1 -> LLONG_MIN
21209 				 * INT_MIN sdiv -1 -> INT_MIN
21210 				 */
21211 				BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
21212 				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21213 					     BPF_ADD | BPF_K, BPF_REG_AX,
21214 					     0, 0, 1),
21215 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21216 					     BPF_JGT | BPF_K, BPF_REG_AX,
21217 					     0, 4, 1),
21218 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21219 					     BPF_JEQ | BPF_K, BPF_REG_AX,
21220 					     0, 1, 0),
21221 				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21222 					     BPF_MOV | BPF_K, insn->dst_reg,
21223 					     0, 0, 0),
21224 				/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
21225 				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21226 					     BPF_NEG | BPF_K, insn->dst_reg,
21227 					     0, 0, 0),
21228 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21229 				*insn,
21230 			};
21231 			struct bpf_insn chk_and_smod[] = {
21232 				/* [R,W]x mod 0 -> [R,W]x */
21233 				/* [R,W]x mod -1 -> 0 */
21234 				BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
21235 				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21236 					     BPF_ADD | BPF_K, BPF_REG_AX,
21237 					     0, 0, 1),
21238 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21239 					     BPF_JGT | BPF_K, BPF_REG_AX,
21240 					     0, 3, 1),
21241 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21242 					     BPF_JEQ | BPF_K, BPF_REG_AX,
21243 					     0, 3 + (is64 ? 0 : 1), 1),
21244 				BPF_MOV32_IMM(insn->dst_reg, 0),
21245 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21246 				*insn,
21247 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21248 				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
21249 			};
21250 
21251 			if (is_sdiv) {
21252 				patchlet = chk_and_sdiv;
21253 				cnt = ARRAY_SIZE(chk_and_sdiv);
21254 			} else if (is_smod) {
21255 				patchlet = chk_and_smod;
21256 				cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
21257 			} else {
21258 				patchlet = isdiv ? chk_and_div : chk_and_mod;
21259 				cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
21260 					      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
21261 			}
21262 
21263 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
21264 			if (!new_prog)
21265 				return -ENOMEM;
21266 
21267 			delta    += cnt - 1;
21268 			env->prog = prog = new_prog;
21269 			insn      = new_prog->insnsi + i + delta;
21270 			goto next_insn;
21271 		}
21272 
21273 		/* Make it impossible to de-reference a userspace address */
21274 		if (BPF_CLASS(insn->code) == BPF_LDX &&
21275 		    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
21276 		     BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
21277 			struct bpf_insn *patch = &insn_buf[0];
21278 			u64 uaddress_limit = bpf_arch_uaddress_limit();
21279 
21280 			if (!uaddress_limit)
21281 				goto next_insn;
21282 
21283 			*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
21284 			if (insn->off)
21285 				*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
21286 			*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
21287 			*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
21288 			*patch++ = *insn;
21289 			*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
21290 			*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
21291 
21292 			cnt = patch - insn_buf;
21293 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21294 			if (!new_prog)
21295 				return -ENOMEM;
21296 
21297 			delta    += cnt - 1;
21298 			env->prog = prog = new_prog;
21299 			insn      = new_prog->insnsi + i + delta;
21300 			goto next_insn;
21301 		}
21302 
21303 		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
21304 		if (BPF_CLASS(insn->code) == BPF_LD &&
21305 		    (BPF_MODE(insn->code) == BPF_ABS ||
21306 		     BPF_MODE(insn->code) == BPF_IND)) {
21307 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
21308 			if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
21309 				verbose(env, "bpf verifier is misconfigured\n");
21310 				return -EINVAL;
21311 			}
21312 
21313 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21314 			if (!new_prog)
21315 				return -ENOMEM;
21316 
21317 			delta    += cnt - 1;
21318 			env->prog = prog = new_prog;
21319 			insn      = new_prog->insnsi + i + delta;
21320 			goto next_insn;
21321 		}
21322 
21323 		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
21324 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
21325 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
21326 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
21327 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
21328 			struct bpf_insn *patch = &insn_buf[0];
21329 			bool issrc, isneg, isimm;
21330 			u32 off_reg;
21331 
21332 			aux = &env->insn_aux_data[i + delta];
21333 			if (!aux->alu_state ||
21334 			    aux->alu_state == BPF_ALU_NON_POINTER)
21335 				goto next_insn;
21336 
21337 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
21338 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
21339 				BPF_ALU_SANITIZE_SRC;
21340 			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
21341 
21342 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
21343 			if (isimm) {
21344 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
21345 			} else {
21346 				if (isneg)
21347 					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
21348 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
21349 				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
21350 				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
21351 				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
21352 				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
21353 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
21354 			}
21355 			if (!issrc)
21356 				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
21357 			insn->src_reg = BPF_REG_AX;
21358 			if (isneg)
21359 				insn->code = insn->code == code_add ?
21360 					     code_sub : code_add;
21361 			*patch++ = *insn;
21362 			if (issrc && isneg && !isimm)
21363 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
21364 			cnt = patch - insn_buf;
21365 
21366 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21367 			if (!new_prog)
21368 				return -ENOMEM;
21369 
21370 			delta    += cnt - 1;
21371 			env->prog = prog = new_prog;
21372 			insn      = new_prog->insnsi + i + delta;
21373 			goto next_insn;
21374 		}
21375 
21376 		if (is_may_goto_insn(insn)) {
21377 			int stack_off = -stack_depth - 8;
21378 
21379 			stack_depth_extra = 8;
21380 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
21381 			if (insn->off >= 0)
21382 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
21383 			else
21384 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
21385 			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
21386 			insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
21387 			cnt = 4;
21388 
21389 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21390 			if (!new_prog)
21391 				return -ENOMEM;
21392 
21393 			delta += cnt - 1;
21394 			env->prog = prog = new_prog;
21395 			insn = new_prog->insnsi + i + delta;
21396 			goto next_insn;
21397 		}
21398 
21399 		if (insn->code != (BPF_JMP | BPF_CALL))
21400 			goto next_insn;
21401 		if (insn->src_reg == BPF_PSEUDO_CALL)
21402 			goto next_insn;
21403 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
21404 			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
21405 			if (ret)
21406 				return ret;
21407 			if (cnt == 0)
21408 				goto next_insn;
21409 
21410 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21411 			if (!new_prog)
21412 				return -ENOMEM;
21413 
21414 			delta	 += cnt - 1;
21415 			env->prog = prog = new_prog;
21416 			insn	  = new_prog->insnsi + i + delta;
21417 			goto next_insn;
21418 		}
21419 
21420 		/* Skip inlining the helper call if the JIT does it. */
21421 		if (bpf_jit_inlines_helper_call(insn->imm))
21422 			goto next_insn;
21423 
21424 		if (insn->imm == BPF_FUNC_get_route_realm)
21425 			prog->dst_needed = 1;
21426 		if (insn->imm == BPF_FUNC_get_prandom_u32)
21427 			bpf_user_rnd_init_once();
21428 		if (insn->imm == BPF_FUNC_override_return)
21429 			prog->kprobe_override = 1;
21430 		if (insn->imm == BPF_FUNC_tail_call) {
21431 			/* If we tail call into other programs, we
21432 			 * cannot make any assumptions since they can
21433 			 * be replaced dynamically during runtime in
21434 			 * the program array.
21435 			 */
21436 			prog->cb_access = 1;
21437 			if (!allow_tail_call_in_subprogs(env))
21438 				prog->aux->stack_depth = MAX_BPF_STACK;
21439 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
21440 
21441 			/* mark bpf_tail_call as different opcode to avoid
21442 			 * conditional branch in the interpreter for every normal
21443 			 * call and to prevent accidental JITing by JIT compiler
21444 			 * that doesn't support bpf_tail_call yet
21445 			 */
21446 			insn->imm = 0;
21447 			insn->code = BPF_JMP | BPF_TAIL_CALL;
21448 
21449 			aux = &env->insn_aux_data[i + delta];
21450 			if (env->bpf_capable && !prog->blinding_requested &&
21451 			    prog->jit_requested &&
21452 			    !bpf_map_key_poisoned(aux) &&
21453 			    !bpf_map_ptr_poisoned(aux) &&
21454 			    !bpf_map_ptr_unpriv(aux)) {
21455 				struct bpf_jit_poke_descriptor desc = {
21456 					.reason = BPF_POKE_REASON_TAIL_CALL,
21457 					.tail_call.map = aux->map_ptr_state.map_ptr,
21458 					.tail_call.key = bpf_map_key_immediate(aux),
21459 					.insn_idx = i + delta,
21460 				};
21461 
21462 				ret = bpf_jit_add_poke_descriptor(prog, &desc);
21463 				if (ret < 0) {
21464 					verbose(env, "adding tail call poke descriptor failed\n");
21465 					return ret;
21466 				}
21467 
21468 				insn->imm = ret + 1;
21469 				goto next_insn;
21470 			}
21471 
21472 			if (!bpf_map_ptr_unpriv(aux))
21473 				goto next_insn;
21474 
21475 			/* instead of changing every JIT dealing with tail_call
21476 			 * emit two extra insns:
21477 			 * if (index >= max_entries) goto out;
21478 			 * index &= array->index_mask;
21479 			 * to avoid out-of-bounds cpu speculation
21480 			 */
21481 			if (bpf_map_ptr_poisoned(aux)) {
21482 				verbose(env, "tail_call abusing map_ptr\n");
21483 				return -EINVAL;
21484 			}
21485 
21486 			map_ptr = aux->map_ptr_state.map_ptr;
21487 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
21488 						  map_ptr->max_entries, 2);
21489 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
21490 						    container_of(map_ptr,
21491 								 struct bpf_array,
21492 								 map)->index_mask);
21493 			insn_buf[2] = *insn;
21494 			cnt = 3;
21495 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21496 			if (!new_prog)
21497 				return -ENOMEM;
21498 
21499 			delta    += cnt - 1;
21500 			env->prog = prog = new_prog;
21501 			insn      = new_prog->insnsi + i + delta;
21502 			goto next_insn;
21503 		}
21504 
21505 		if (insn->imm == BPF_FUNC_timer_set_callback) {
21506 			/* The verifier will process callback_fn as many times as necessary
21507 			 * with different maps and the register states prepared by
21508 			 * set_timer_callback_state will be accurate.
21509 			 *
21510 			 * The following use case is valid:
21511 			 *   map1 is shared by prog1, prog2, prog3.
21512 			 *   prog1 calls bpf_timer_init for some map1 elements
21513 			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
21514 			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
21515 			 *   prog3 calls bpf_timer_start for some map1 elements.
21516 			 *     Those that were not both bpf_timer_init-ed and
21517 			 *     bpf_timer_set_callback-ed will return -EINVAL.
21518 			 */
21519 			struct bpf_insn ld_addrs[2] = {
21520 				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
21521 			};
21522 
21523 			insn_buf[0] = ld_addrs[0];
21524 			insn_buf[1] = ld_addrs[1];
21525 			insn_buf[2] = *insn;
21526 			cnt = 3;
21527 
21528 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21529 			if (!new_prog)
21530 				return -ENOMEM;
21531 
21532 			delta    += cnt - 1;
21533 			env->prog = prog = new_prog;
21534 			insn      = new_prog->insnsi + i + delta;
21535 			goto patch_call_imm;
21536 		}
21537 
21538 		if (is_storage_get_function(insn->imm)) {
21539 			if (!in_sleepable(env) ||
21540 			    env->insn_aux_data[i + delta].storage_get_func_atomic)
21541 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
21542 			else
21543 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
21544 			insn_buf[1] = *insn;
21545 			cnt = 2;
21546 
21547 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21548 			if (!new_prog)
21549 				return -ENOMEM;
21550 
21551 			delta += cnt - 1;
21552 			env->prog = prog = new_prog;
21553 			insn = new_prog->insnsi + i + delta;
21554 			goto patch_call_imm;
21555 		}
21556 
21557 		/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
21558 		if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
21559 			/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
21560 			 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
21561 			 */
21562 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
21563 			insn_buf[1] = *insn;
21564 			cnt = 2;
21565 
21566 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21567 			if (!new_prog)
21568 				return -ENOMEM;
21569 
21570 			delta += cnt - 1;
21571 			env->prog = prog = new_prog;
21572 			insn = new_prog->insnsi + i + delta;
21573 			goto patch_call_imm;
21574 		}
21575 
21576 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
21577 		 * and other inlining handlers are currently limited to 64 bit
21578 		 * only.
21579 		 */
21580 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
21581 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
21582 		     insn->imm == BPF_FUNC_map_update_elem ||
21583 		     insn->imm == BPF_FUNC_map_delete_elem ||
21584 		     insn->imm == BPF_FUNC_map_push_elem   ||
21585 		     insn->imm == BPF_FUNC_map_pop_elem    ||
21586 		     insn->imm == BPF_FUNC_map_peek_elem   ||
21587 		     insn->imm == BPF_FUNC_redirect_map    ||
21588 		     insn->imm == BPF_FUNC_for_each_map_elem ||
21589 		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
21590 			aux = &env->insn_aux_data[i + delta];
21591 			if (bpf_map_ptr_poisoned(aux))
21592 				goto patch_call_imm;
21593 
21594 			map_ptr = aux->map_ptr_state.map_ptr;
21595 			ops = map_ptr->ops;
21596 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
21597 			    ops->map_gen_lookup) {
21598 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
21599 				if (cnt == -EOPNOTSUPP)
21600 					goto patch_map_ops_generic;
21601 				if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
21602 					verbose(env, "bpf verifier is misconfigured\n");
21603 					return -EINVAL;
21604 				}
21605 
21606 				new_prog = bpf_patch_insn_data(env, i + delta,
21607 							       insn_buf, cnt);
21608 				if (!new_prog)
21609 					return -ENOMEM;
21610 
21611 				delta    += cnt - 1;
21612 				env->prog = prog = new_prog;
21613 				insn      = new_prog->insnsi + i + delta;
21614 				goto next_insn;
21615 			}
21616 
21617 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
21618 				     (void *(*)(struct bpf_map *map, void *key))NULL));
21619 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
21620 				     (long (*)(struct bpf_map *map, void *key))NULL));
21621 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
21622 				     (long (*)(struct bpf_map *map, void *key, void *value,
21623 					      u64 flags))NULL));
21624 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
21625 				     (long (*)(struct bpf_map *map, void *value,
21626 					      u64 flags))NULL));
21627 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
21628 				     (long (*)(struct bpf_map *map, void *value))NULL));
21629 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
21630 				     (long (*)(struct bpf_map *map, void *value))NULL));
21631 			BUILD_BUG_ON(!__same_type(ops->map_redirect,
21632 				     (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
21633 			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
21634 				     (long (*)(struct bpf_map *map,
21635 					      bpf_callback_t callback_fn,
21636 					      void *callback_ctx,
21637 					      u64 flags))NULL));
21638 			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
21639 				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
21640 
21641 patch_map_ops_generic:
21642 			switch (insn->imm) {
21643 			case BPF_FUNC_map_lookup_elem:
21644 				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
21645 				goto next_insn;
21646 			case BPF_FUNC_map_update_elem:
21647 				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
21648 				goto next_insn;
21649 			case BPF_FUNC_map_delete_elem:
21650 				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
21651 				goto next_insn;
21652 			case BPF_FUNC_map_push_elem:
21653 				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
21654 				goto next_insn;
21655 			case BPF_FUNC_map_pop_elem:
21656 				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
21657 				goto next_insn;
21658 			case BPF_FUNC_map_peek_elem:
21659 				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
21660 				goto next_insn;
21661 			case BPF_FUNC_redirect_map:
21662 				insn->imm = BPF_CALL_IMM(ops->map_redirect);
21663 				goto next_insn;
21664 			case BPF_FUNC_for_each_map_elem:
21665 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
21666 				goto next_insn;
21667 			case BPF_FUNC_map_lookup_percpu_elem:
21668 				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
21669 				goto next_insn;
21670 			}
21671 
21672 			goto patch_call_imm;
21673 		}
21674 
21675 		/* Implement bpf_jiffies64 inline. */
21676 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
21677 		    insn->imm == BPF_FUNC_jiffies64) {
21678 			struct bpf_insn ld_jiffies_addr[2] = {
21679 				BPF_LD_IMM64(BPF_REG_0,
21680 					     (unsigned long)&jiffies),
21681 			};
21682 
21683 			insn_buf[0] = ld_jiffies_addr[0];
21684 			insn_buf[1] = ld_jiffies_addr[1];
21685 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
21686 						  BPF_REG_0, 0);
21687 			cnt = 3;
21688 
21689 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
21690 						       cnt);
21691 			if (!new_prog)
21692 				return -ENOMEM;
21693 
21694 			delta    += cnt - 1;
21695 			env->prog = prog = new_prog;
21696 			insn      = new_prog->insnsi + i + delta;
21697 			goto next_insn;
21698 		}
21699 
21700 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
21701 		/* Implement bpf_get_smp_processor_id() inline. */
21702 		if (insn->imm == BPF_FUNC_get_smp_processor_id &&
21703 		    verifier_inlines_helper_call(env, insn->imm)) {
21704 			/* BPF_FUNC_get_smp_processor_id inlining is an
21705 			 * optimization, so if pcpu_hot.cpu_number is ever
21706 			 * changed in some incompatible and hard to support
21707 			 * way, it's fine to back out this inlining logic
21708 			 */
21709 #ifdef CONFIG_SMP
21710 			insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number);
21711 			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
21712 			insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
21713 			cnt = 3;
21714 #else
21715 			insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
21716 			cnt = 1;
21717 #endif
21718 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21719 			if (!new_prog)
21720 				return -ENOMEM;
21721 
21722 			delta    += cnt - 1;
21723 			env->prog = prog = new_prog;
21724 			insn      = new_prog->insnsi + i + delta;
21725 			goto next_insn;
21726 		}
21727 #endif
21728 		/* Implement bpf_get_func_arg inline. */
21729 		if (prog_type == BPF_PROG_TYPE_TRACING &&
21730 		    insn->imm == BPF_FUNC_get_func_arg) {
21731 			/* Load nr_args from ctx - 8 */
21732 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
21733 			insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
21734 			insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
21735 			insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
21736 			insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
21737 			insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
21738 			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
21739 			insn_buf[7] = BPF_JMP_A(1);
21740 			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
21741 			cnt = 9;
21742 
21743 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21744 			if (!new_prog)
21745 				return -ENOMEM;
21746 
21747 			delta    += cnt - 1;
21748 			env->prog = prog = new_prog;
21749 			insn      = new_prog->insnsi + i + delta;
21750 			goto next_insn;
21751 		}
21752 
21753 		/* Implement bpf_get_func_ret inline. */
21754 		if (prog_type == BPF_PROG_TYPE_TRACING &&
21755 		    insn->imm == BPF_FUNC_get_func_ret) {
21756 			if (eatype == BPF_TRACE_FEXIT ||
21757 			    eatype == BPF_MODIFY_RETURN) {
21758 				/* Load nr_args from ctx - 8 */
21759 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
21760 				insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
21761 				insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
21762 				insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
21763 				insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
21764 				insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
21765 				cnt = 6;
21766 			} else {
21767 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
21768 				cnt = 1;
21769 			}
21770 
21771 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21772 			if (!new_prog)
21773 				return -ENOMEM;
21774 
21775 			delta    += cnt - 1;
21776 			env->prog = prog = new_prog;
21777 			insn      = new_prog->insnsi + i + delta;
21778 			goto next_insn;
21779 		}
21780 
21781 		/* Implement get_func_arg_cnt inline. */
21782 		if (prog_type == BPF_PROG_TYPE_TRACING &&
21783 		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
21784 			/* Load nr_args from ctx - 8 */
21785 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
21786 
21787 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
21788 			if (!new_prog)
21789 				return -ENOMEM;
21790 
21791 			env->prog = prog = new_prog;
21792 			insn      = new_prog->insnsi + i + delta;
21793 			goto next_insn;
21794 		}
21795 
21796 		/* Implement bpf_get_func_ip inline. */
21797 		if (prog_type == BPF_PROG_TYPE_TRACING &&
21798 		    insn->imm == BPF_FUNC_get_func_ip) {
21799 			/* Load IP address from ctx - 16 */
21800 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
21801 
21802 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
21803 			if (!new_prog)
21804 				return -ENOMEM;
21805 
21806 			env->prog = prog = new_prog;
21807 			insn      = new_prog->insnsi + i + delta;
21808 			goto next_insn;
21809 		}
21810 
21811 		/* Implement bpf_get_branch_snapshot inline. */
21812 		if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
21813 		    prog->jit_requested && BITS_PER_LONG == 64 &&
21814 		    insn->imm == BPF_FUNC_get_branch_snapshot) {
21815 			/* We are dealing with the following func protos:
21816 			 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
21817 			 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
21818 			 */
21819 			const u32 br_entry_size = sizeof(struct perf_branch_entry);
21820 
21821 			/* struct perf_branch_entry is part of UAPI and is
21822 			 * used as an array element, so extremely unlikely to
21823 			 * ever grow or shrink
21824 			 */
21825 			BUILD_BUG_ON(br_entry_size != 24);
21826 
21827 			/* if (unlikely(flags)) return -EINVAL */
21828 			insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
21829 
21830 			/* Transform size (bytes) into number of entries (cnt = size / 24).
21831 			 * But to avoid expensive division instruction, we implement
21832 			 * divide-by-3 through multiplication, followed by further
21833 			 * division by 8 through 3-bit right shift.
21834 			 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
21835 			 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
21836 			 *
21837 			 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
21838 			 */
21839 			insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
21840 			insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
21841 			insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
21842 
21843 			/* call perf_snapshot_branch_stack implementation */
21844 			insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
21845 			/* if (entry_cnt == 0) return -ENOENT */
21846 			insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
21847 			/* return entry_cnt * sizeof(struct perf_branch_entry) */
21848 			insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
21849 			insn_buf[7] = BPF_JMP_A(3);
21850 			/* return -EINVAL; */
21851 			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
21852 			insn_buf[9] = BPF_JMP_A(1);
21853 			/* return -ENOENT; */
21854 			insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
21855 			cnt = 11;
21856 
21857 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21858 			if (!new_prog)
21859 				return -ENOMEM;
21860 
21861 			delta    += cnt - 1;
21862 			env->prog = prog = new_prog;
21863 			insn      = new_prog->insnsi + i + delta;
21864 			goto next_insn;
21865 		}
21866 
21867 		/* Implement bpf_kptr_xchg inline */
21868 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
21869 		    insn->imm == BPF_FUNC_kptr_xchg &&
21870 		    bpf_jit_supports_ptr_xchg()) {
21871 			insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
21872 			insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
21873 			cnt = 2;
21874 
21875 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21876 			if (!new_prog)
21877 				return -ENOMEM;
21878 
21879 			delta    += cnt - 1;
21880 			env->prog = prog = new_prog;
21881 			insn      = new_prog->insnsi + i + delta;
21882 			goto next_insn;
21883 		}
21884 patch_call_imm:
21885 		fn = env->ops->get_func_proto(insn->imm, env->prog);
21886 		/* all functions that have prototype and verifier allowed
21887 		 * programs to call them, must be real in-kernel functions
21888 		 */
21889 		if (!fn->func) {
21890 			verbose(env,
21891 				"kernel subsystem misconfigured func %s#%d\n",
21892 				func_id_name(insn->imm), insn->imm);
21893 			return -EFAULT;
21894 		}
21895 		insn->imm = fn->func - __bpf_call_base;
21896 next_insn:
21897 		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
21898 			subprogs[cur_subprog].stack_depth += stack_depth_extra;
21899 			subprogs[cur_subprog].stack_extra = stack_depth_extra;
21900 
21901 			stack_depth = subprogs[cur_subprog].stack_depth;
21902 			if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
21903 				verbose(env, "stack size %d(extra %d) is too large\n",
21904 					stack_depth, stack_depth_extra);
21905 				return -EINVAL;
21906 			}
21907 			cur_subprog++;
21908 			stack_depth = subprogs[cur_subprog].stack_depth;
21909 			stack_depth_extra = 0;
21910 		}
21911 		i++;
21912 		insn++;
21913 	}
21914 
21915 	env->prog->aux->stack_depth = subprogs[0].stack_depth;
21916 	for (i = 0; i < env->subprog_cnt; i++) {
21917 		int subprog_start = subprogs[i].start;
21918 		int stack_slots = subprogs[i].stack_extra / 8;
21919 
21920 		if (!stack_slots)
21921 			continue;
21922 		if (stack_slots > 1) {
21923 			verbose(env, "verifier bug: stack_slots supports may_goto only\n");
21924 			return -EFAULT;
21925 		}
21926 
21927 		/* Add ST insn to subprog prologue to init extra stack */
21928 		insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP,
21929 					 -subprogs[i].stack_depth, BPF_MAX_LOOPS);
21930 		/* Copy first actual insn to preserve it */
21931 		insn_buf[1] = env->prog->insnsi[subprog_start];
21932 
21933 		new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2);
21934 		if (!new_prog)
21935 			return -ENOMEM;
21936 		env->prog = prog = new_prog;
21937 		/*
21938 		 * If may_goto is a first insn of a prog there could be a jmp
21939 		 * insn that points to it, hence adjust all such jmps to point
21940 		 * to insn after BPF_ST that inits may_goto count.
21941 		 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
21942 		 */
21943 		WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
21944 	}
21945 
21946 	/* Since poke tab is now finalized, publish aux to tracker. */
21947 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
21948 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
21949 		if (!map_ptr->ops->map_poke_track ||
21950 		    !map_ptr->ops->map_poke_untrack ||
21951 		    !map_ptr->ops->map_poke_run) {
21952 			verbose(env, "bpf verifier is misconfigured\n");
21953 			return -EINVAL;
21954 		}
21955 
21956 		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
21957 		if (ret < 0) {
21958 			verbose(env, "tracking tail call prog failed\n");
21959 			return ret;
21960 		}
21961 	}
21962 
21963 	sort_kfunc_descs_by_imm_off(env->prog);
21964 
21965 	return 0;
21966 }
21967 
inline_bpf_loop(struct bpf_verifier_env * env,int position,s32 stack_base,u32 callback_subprogno,u32 * total_cnt)21968 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
21969 					int position,
21970 					s32 stack_base,
21971 					u32 callback_subprogno,
21972 					u32 *total_cnt)
21973 {
21974 	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
21975 	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
21976 	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
21977 	int reg_loop_max = BPF_REG_6;
21978 	int reg_loop_cnt = BPF_REG_7;
21979 	int reg_loop_ctx = BPF_REG_8;
21980 
21981 	struct bpf_insn *insn_buf = env->insn_buf;
21982 	struct bpf_prog *new_prog;
21983 	u32 callback_start;
21984 	u32 call_insn_offset;
21985 	s32 callback_offset;
21986 	u32 cnt = 0;
21987 
21988 	/* This represents an inlined version of bpf_iter.c:bpf_loop,
21989 	 * be careful to modify this code in sync.
21990 	 */
21991 
21992 	/* Return error and jump to the end of the patch if
21993 	 * expected number of iterations is too big.
21994 	 */
21995 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
21996 	insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
21997 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
21998 	/* spill R6, R7, R8 to use these as loop vars */
21999 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
22000 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
22001 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
22002 	/* initialize loop vars */
22003 	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
22004 	insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
22005 	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
22006 	/* loop header,
22007 	 * if reg_loop_cnt >= reg_loop_max skip the loop body
22008 	 */
22009 	insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
22010 	/* callback call,
22011 	 * correct callback offset would be set after patching
22012 	 */
22013 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
22014 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
22015 	insn_buf[cnt++] = BPF_CALL_REL(0);
22016 	/* increment loop counter */
22017 	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
22018 	/* jump to loop header if callback returned 0 */
22019 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
22020 	/* return value of bpf_loop,
22021 	 * set R0 to the number of iterations
22022 	 */
22023 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
22024 	/* restore original values of R6, R7, R8 */
22025 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
22026 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
22027 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
22028 
22029 	*total_cnt = cnt;
22030 	new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
22031 	if (!new_prog)
22032 		return new_prog;
22033 
22034 	/* callback start is known only after patching */
22035 	callback_start = env->subprog_info[callback_subprogno].start;
22036 	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
22037 	call_insn_offset = position + 12;
22038 	callback_offset = callback_start - call_insn_offset - 1;
22039 	new_prog->insnsi[call_insn_offset].imm = callback_offset;
22040 
22041 	return new_prog;
22042 }
22043 
is_bpf_loop_call(struct bpf_insn * insn)22044 static bool is_bpf_loop_call(struct bpf_insn *insn)
22045 {
22046 	return insn->code == (BPF_JMP | BPF_CALL) &&
22047 		insn->src_reg == 0 &&
22048 		insn->imm == BPF_FUNC_loop;
22049 }
22050 
22051 /* For all sub-programs in the program (including main) check
22052  * insn_aux_data to see if there are bpf_loop calls that require
22053  * inlining. If such calls are found the calls are replaced with a
22054  * sequence of instructions produced by `inline_bpf_loop` function and
22055  * subprog stack_depth is increased by the size of 3 registers.
22056  * This stack space is used to spill values of the R6, R7, R8.  These
22057  * registers are used to store the loop bound, counter and context
22058  * variables.
22059  */
optimize_bpf_loop(struct bpf_verifier_env * env)22060 static int optimize_bpf_loop(struct bpf_verifier_env *env)
22061 {
22062 	struct bpf_subprog_info *subprogs = env->subprog_info;
22063 	int i, cur_subprog = 0, cnt, delta = 0;
22064 	struct bpf_insn *insn = env->prog->insnsi;
22065 	int insn_cnt = env->prog->len;
22066 	u16 stack_depth = subprogs[cur_subprog].stack_depth;
22067 	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
22068 	u16 stack_depth_extra = 0;
22069 
22070 	for (i = 0; i < insn_cnt; i++, insn++) {
22071 		struct bpf_loop_inline_state *inline_state =
22072 			&env->insn_aux_data[i + delta].loop_inline_state;
22073 
22074 		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
22075 			struct bpf_prog *new_prog;
22076 
22077 			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
22078 			new_prog = inline_bpf_loop(env,
22079 						   i + delta,
22080 						   -(stack_depth + stack_depth_extra),
22081 						   inline_state->callback_subprogno,
22082 						   &cnt);
22083 			if (!new_prog)
22084 				return -ENOMEM;
22085 
22086 			delta     += cnt - 1;
22087 			env->prog  = new_prog;
22088 			insn       = new_prog->insnsi + i + delta;
22089 		}
22090 
22091 		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
22092 			subprogs[cur_subprog].stack_depth += stack_depth_extra;
22093 			cur_subprog++;
22094 			stack_depth = subprogs[cur_subprog].stack_depth;
22095 			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
22096 			stack_depth_extra = 0;
22097 		}
22098 	}
22099 
22100 	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
22101 
22102 	return 0;
22103 }
22104 
22105 /* Remove unnecessary spill/fill pairs, members of fastcall pattern,
22106  * adjust subprograms stack depth when possible.
22107  */
remove_fastcall_spills_fills(struct bpf_verifier_env * env)22108 static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
22109 {
22110 	struct bpf_subprog_info *subprog = env->subprog_info;
22111 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
22112 	struct bpf_insn *insn = env->prog->insnsi;
22113 	int insn_cnt = env->prog->len;
22114 	u32 spills_num;
22115 	bool modified = false;
22116 	int i, j;
22117 
22118 	for (i = 0; i < insn_cnt; i++, insn++) {
22119 		if (aux[i].fastcall_spills_num > 0) {
22120 			spills_num = aux[i].fastcall_spills_num;
22121 			/* NOPs would be removed by opt_remove_nops() */
22122 			for (j = 1; j <= spills_num; ++j) {
22123 				*(insn - j) = NOP;
22124 				*(insn + j) = NOP;
22125 			}
22126 			modified = true;
22127 		}
22128 		if ((subprog + 1)->start == i + 1) {
22129 			if (modified && !subprog->keep_fastcall_stack)
22130 				subprog->stack_depth = -subprog->fastcall_stack_off;
22131 			subprog++;
22132 			modified = false;
22133 		}
22134 	}
22135 
22136 	return 0;
22137 }
22138 
free_states(struct bpf_verifier_env * env)22139 static void free_states(struct bpf_verifier_env *env)
22140 {
22141 	struct bpf_verifier_state_list *sl, *sln;
22142 	int i;
22143 
22144 	sl = env->free_list;
22145 	while (sl) {
22146 		sln = sl->next;
22147 		free_verifier_state(&sl->state, false);
22148 		kfree(sl);
22149 		sl = sln;
22150 	}
22151 	env->free_list = NULL;
22152 
22153 	if (!env->explored_states)
22154 		return;
22155 
22156 	for (i = 0; i < state_htab_size(env); i++) {
22157 		sl = env->explored_states[i];
22158 
22159 		while (sl) {
22160 			sln = sl->next;
22161 			free_verifier_state(&sl->state, false);
22162 			kfree(sl);
22163 			sl = sln;
22164 		}
22165 		env->explored_states[i] = NULL;
22166 	}
22167 }
22168 
do_check_common(struct bpf_verifier_env * env,int subprog)22169 static int do_check_common(struct bpf_verifier_env *env, int subprog)
22170 {
22171 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
22172 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
22173 	struct bpf_verifier_state *state;
22174 	struct bpf_reg_state *regs;
22175 	int ret, i;
22176 
22177 	env->prev_linfo = NULL;
22178 	env->pass_cnt++;
22179 
22180 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
22181 	if (!state)
22182 		return -ENOMEM;
22183 	state->curframe = 0;
22184 	state->speculative = false;
22185 	state->branches = 1;
22186 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
22187 	if (!state->frame[0]) {
22188 		kfree(state);
22189 		return -ENOMEM;
22190 	}
22191 	env->cur_state = state;
22192 	init_func_state(env, state->frame[0],
22193 			BPF_MAIN_FUNC /* callsite */,
22194 			0 /* frameno */,
22195 			subprog);
22196 	state->first_insn_idx = env->subprog_info[subprog].start;
22197 	state->last_insn_idx = -1;
22198 
22199 	regs = state->frame[state->curframe]->regs;
22200 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
22201 		const char *sub_name = subprog_name(env, subprog);
22202 		struct bpf_subprog_arg_info *arg;
22203 		struct bpf_reg_state *reg;
22204 
22205 		verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
22206 		ret = btf_prepare_func_args(env, subprog);
22207 		if (ret)
22208 			goto out;
22209 
22210 		if (subprog_is_exc_cb(env, subprog)) {
22211 			state->frame[0]->in_exception_callback_fn = true;
22212 			/* We have already ensured that the callback returns an integer, just
22213 			 * like all global subprogs. We need to determine it only has a single
22214 			 * scalar argument.
22215 			 */
22216 			if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
22217 				verbose(env, "exception cb only supports single integer argument\n");
22218 				ret = -EINVAL;
22219 				goto out;
22220 			}
22221 		}
22222 		for (i = BPF_REG_1; i <= sub->arg_cnt; i++) {
22223 			arg = &sub->args[i - BPF_REG_1];
22224 			reg = &regs[i];
22225 
22226 			if (arg->arg_type == ARG_PTR_TO_CTX) {
22227 				reg->type = PTR_TO_CTX;
22228 				mark_reg_known_zero(env, regs, i);
22229 			} else if (arg->arg_type == ARG_ANYTHING) {
22230 				reg->type = SCALAR_VALUE;
22231 				mark_reg_unknown(env, regs, i);
22232 			} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
22233 				/* assume unspecial LOCAL dynptr type */
22234 				__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen);
22235 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
22236 				reg->type = PTR_TO_MEM;
22237 				if (arg->arg_type & PTR_MAYBE_NULL)
22238 					reg->type |= PTR_MAYBE_NULL;
22239 				mark_reg_known_zero(env, regs, i);
22240 				reg->mem_size = arg->mem_size;
22241 				reg->id = ++env->id_gen;
22242 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
22243 				reg->type = PTR_TO_BTF_ID;
22244 				if (arg->arg_type & PTR_MAYBE_NULL)
22245 					reg->type |= PTR_MAYBE_NULL;
22246 				if (arg->arg_type & PTR_UNTRUSTED)
22247 					reg->type |= PTR_UNTRUSTED;
22248 				if (arg->arg_type & PTR_TRUSTED)
22249 					reg->type |= PTR_TRUSTED;
22250 				mark_reg_known_zero(env, regs, i);
22251 				reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
22252 				reg->btf_id = arg->btf_id;
22253 				reg->id = ++env->id_gen;
22254 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
22255 				/* caller can pass either PTR_TO_ARENA or SCALAR */
22256 				mark_reg_unknown(env, regs, i);
22257 			} else {
22258 				WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n",
22259 					  i - BPF_REG_1, arg->arg_type);
22260 				ret = -EFAULT;
22261 				goto out;
22262 			}
22263 		}
22264 	} else {
22265 		/* if main BPF program has associated BTF info, validate that
22266 		 * it's matching expected signature, and otherwise mark BTF
22267 		 * info for main program as unreliable
22268 		 */
22269 		if (env->prog->aux->func_info_aux) {
22270 			ret = btf_prepare_func_args(env, 0);
22271 			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX)
22272 				env->prog->aux->func_info_aux[0].unreliable = true;
22273 		}
22274 
22275 		/* 1st arg to a function */
22276 		regs[BPF_REG_1].type = PTR_TO_CTX;
22277 		mark_reg_known_zero(env, regs, BPF_REG_1);
22278 	}
22279 
22280 	ret = do_check(env);
22281 out:
22282 	/* check for NULL is necessary, since cur_state can be freed inside
22283 	 * do_check() under memory pressure.
22284 	 */
22285 	if (env->cur_state) {
22286 		free_verifier_state(env->cur_state, true);
22287 		env->cur_state = NULL;
22288 	}
22289 	while (!pop_stack(env, NULL, NULL, false));
22290 	if (!ret && pop_log)
22291 		bpf_vlog_reset(&env->log, 0);
22292 	free_states(env);
22293 	return ret;
22294 }
22295 
22296 /* Lazily verify all global functions based on their BTF, if they are called
22297  * from main BPF program or any of subprograms transitively.
22298  * BPF global subprogs called from dead code are not validated.
22299  * All callable global functions must pass verification.
22300  * Otherwise the whole program is rejected.
22301  * Consider:
22302  * int bar(int);
22303  * int foo(int f)
22304  * {
22305  *    return bar(f);
22306  * }
22307  * int bar(int b)
22308  * {
22309  *    ...
22310  * }
22311  * foo() will be verified first for R1=any_scalar_value. During verification it
22312  * will be assumed that bar() already verified successfully and call to bar()
22313  * from foo() will be checked for type match only. Later bar() will be verified
22314  * independently to check that it's safe for R1=any_scalar_value.
22315  */
do_check_subprogs(struct bpf_verifier_env * env)22316 static int do_check_subprogs(struct bpf_verifier_env *env)
22317 {
22318 	struct bpf_prog_aux *aux = env->prog->aux;
22319 	struct bpf_func_info_aux *sub_aux;
22320 	int i, ret, new_cnt;
22321 
22322 	if (!aux->func_info)
22323 		return 0;
22324 
22325 	/* exception callback is presumed to be always called */
22326 	if (env->exception_callback_subprog)
22327 		subprog_aux(env, env->exception_callback_subprog)->called = true;
22328 
22329 again:
22330 	new_cnt = 0;
22331 	for (i = 1; i < env->subprog_cnt; i++) {
22332 		if (!subprog_is_global(env, i))
22333 			continue;
22334 
22335 		sub_aux = subprog_aux(env, i);
22336 		if (!sub_aux->called || sub_aux->verified)
22337 			continue;
22338 
22339 		env->insn_idx = env->subprog_info[i].start;
22340 		WARN_ON_ONCE(env->insn_idx == 0);
22341 		ret = do_check_common(env, i);
22342 		if (ret) {
22343 			return ret;
22344 		} else if (env->log.level & BPF_LOG_LEVEL) {
22345 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
22346 				i, subprog_name(env, i));
22347 		}
22348 
22349 		/* We verified new global subprog, it might have called some
22350 		 * more global subprogs that we haven't verified yet, so we
22351 		 * need to do another pass over subprogs to verify those.
22352 		 */
22353 		sub_aux->verified = true;
22354 		new_cnt++;
22355 	}
22356 
22357 	/* We can't loop forever as we verify at least one global subprog on
22358 	 * each pass.
22359 	 */
22360 	if (new_cnt)
22361 		goto again;
22362 
22363 	return 0;
22364 }
22365 
do_check_main(struct bpf_verifier_env * env)22366 static int do_check_main(struct bpf_verifier_env *env)
22367 {
22368 	int ret;
22369 
22370 	env->insn_idx = 0;
22371 	ret = do_check_common(env, 0);
22372 	if (!ret)
22373 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
22374 	return ret;
22375 }
22376 
22377 
print_verification_stats(struct bpf_verifier_env * env)22378 static void print_verification_stats(struct bpf_verifier_env *env)
22379 {
22380 	int i;
22381 
22382 	if (env->log.level & BPF_LOG_STATS) {
22383 		verbose(env, "verification time %lld usec\n",
22384 			div_u64(env->verification_time, 1000));
22385 		verbose(env, "stack depth ");
22386 		for (i = 0; i < env->subprog_cnt; i++) {
22387 			u32 depth = env->subprog_info[i].stack_depth;
22388 
22389 			verbose(env, "%d", depth);
22390 			if (i + 1 < env->subprog_cnt)
22391 				verbose(env, "+");
22392 		}
22393 		verbose(env, "\n");
22394 	}
22395 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
22396 		"total_states %d peak_states %d mark_read %d\n",
22397 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
22398 		env->max_states_per_insn, env->total_states,
22399 		env->peak_states, env->longest_mark_read_walk);
22400 }
22401 
check_struct_ops_btf_id(struct bpf_verifier_env * env)22402 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
22403 {
22404 	const struct btf_type *t, *func_proto;
22405 	const struct bpf_struct_ops_desc *st_ops_desc;
22406 	const struct bpf_struct_ops *st_ops;
22407 	const struct btf_member *member;
22408 	struct bpf_prog *prog = env->prog;
22409 	u32 btf_id, member_idx;
22410 	struct btf *btf;
22411 	const char *mname;
22412 	int err;
22413 
22414 	if (!prog->gpl_compatible) {
22415 		verbose(env, "struct ops programs must have a GPL compatible license\n");
22416 		return -EINVAL;
22417 	}
22418 
22419 	if (!prog->aux->attach_btf_id)
22420 		return -ENOTSUPP;
22421 
22422 	btf = prog->aux->attach_btf;
22423 	if (btf_is_module(btf)) {
22424 		/* Make sure st_ops is valid through the lifetime of env */
22425 		env->attach_btf_mod = btf_try_get_module(btf);
22426 		if (!env->attach_btf_mod) {
22427 			verbose(env, "struct_ops module %s is not found\n",
22428 				btf_get_name(btf));
22429 			return -ENOTSUPP;
22430 		}
22431 	}
22432 
22433 	btf_id = prog->aux->attach_btf_id;
22434 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
22435 	if (!st_ops_desc) {
22436 		verbose(env, "attach_btf_id %u is not a supported struct\n",
22437 			btf_id);
22438 		return -ENOTSUPP;
22439 	}
22440 	st_ops = st_ops_desc->st_ops;
22441 
22442 	t = st_ops_desc->type;
22443 	member_idx = prog->expected_attach_type;
22444 	if (member_idx >= btf_type_vlen(t)) {
22445 		verbose(env, "attach to invalid member idx %u of struct %s\n",
22446 			member_idx, st_ops->name);
22447 		return -EINVAL;
22448 	}
22449 
22450 	member = &btf_type_member(t)[member_idx];
22451 	mname = btf_name_by_offset(btf, member->name_off);
22452 	func_proto = btf_type_resolve_func_ptr(btf, member->type,
22453 					       NULL);
22454 	if (!func_proto) {
22455 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
22456 			mname, member_idx, st_ops->name);
22457 		return -EINVAL;
22458 	}
22459 
22460 	err = bpf_struct_ops_supported(st_ops, __btf_member_bit_offset(t, member) / 8);
22461 	if (err) {
22462 		verbose(env, "attach to unsupported member %s of struct %s\n",
22463 			mname, st_ops->name);
22464 		return err;
22465 	}
22466 
22467 	if (st_ops->check_member) {
22468 		err = st_ops->check_member(t, member, prog);
22469 
22470 		if (err) {
22471 			verbose(env, "attach to unsupported member %s of struct %s\n",
22472 				mname, st_ops->name);
22473 			return err;
22474 		}
22475 	}
22476 
22477 	if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
22478 		verbose(env, "Private stack not supported by jit\n");
22479 		return -EACCES;
22480 	}
22481 
22482 	/* btf_ctx_access() used this to provide argument type info */
22483 	prog->aux->ctx_arg_info =
22484 		st_ops_desc->arg_info[member_idx].info;
22485 	prog->aux->ctx_arg_info_size =
22486 		st_ops_desc->arg_info[member_idx].cnt;
22487 
22488 	prog->aux->attach_func_proto = func_proto;
22489 	prog->aux->attach_func_name = mname;
22490 	env->ops = st_ops->verifier_ops;
22491 
22492 	return 0;
22493 }
22494 #define SECURITY_PREFIX "security_"
22495 
check_attach_modify_return(unsigned long addr,const char * func_name)22496 static int check_attach_modify_return(unsigned long addr, const char *func_name)
22497 {
22498 	if (within_error_injection_list(addr) ||
22499 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
22500 		return 0;
22501 
22502 	return -EINVAL;
22503 }
22504 
22505 /* list of non-sleepable functions that are otherwise on
22506  * ALLOW_ERROR_INJECTION list
22507  */
22508 BTF_SET_START(btf_non_sleepable_error_inject)
22509 /* Three functions below can be called from sleepable and non-sleepable context.
22510  * Assume non-sleepable from bpf safety point of view.
22511  */
BTF_ID(func,__filemap_add_folio)22512 BTF_ID(func, __filemap_add_folio)
22513 #ifdef CONFIG_FAIL_PAGE_ALLOC
22514 BTF_ID(func, should_fail_alloc_page)
22515 #endif
22516 #ifdef CONFIG_FAILSLAB
22517 BTF_ID(func, should_failslab)
22518 #endif
22519 BTF_SET_END(btf_non_sleepable_error_inject)
22520 
22521 static int check_non_sleepable_error_inject(u32 btf_id)
22522 {
22523 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
22524 }
22525 
bpf_check_attach_target(struct bpf_verifier_log * log,const struct bpf_prog * prog,const struct bpf_prog * tgt_prog,u32 btf_id,struct bpf_attach_target_info * tgt_info)22526 int bpf_check_attach_target(struct bpf_verifier_log *log,
22527 			    const struct bpf_prog *prog,
22528 			    const struct bpf_prog *tgt_prog,
22529 			    u32 btf_id,
22530 			    struct bpf_attach_target_info *tgt_info)
22531 {
22532 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
22533 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
22534 	char trace_symbol[KSYM_SYMBOL_LEN];
22535 	const char prefix[] = "btf_trace_";
22536 	struct bpf_raw_event_map *btp;
22537 	int ret = 0, subprog = -1, i;
22538 	const struct btf_type *t;
22539 	bool conservative = true;
22540 	const char *tname, *fname;
22541 	struct btf *btf;
22542 	long addr = 0;
22543 	struct module *mod = NULL;
22544 
22545 	if (!btf_id) {
22546 		bpf_log(log, "Tracing programs must provide btf_id\n");
22547 		return -EINVAL;
22548 	}
22549 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
22550 	if (!btf) {
22551 		bpf_log(log,
22552 			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
22553 		return -EINVAL;
22554 	}
22555 	t = btf_type_by_id(btf, btf_id);
22556 	if (!t) {
22557 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
22558 		return -EINVAL;
22559 	}
22560 	tname = btf_name_by_offset(btf, t->name_off);
22561 	if (!tname) {
22562 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
22563 		return -EINVAL;
22564 	}
22565 	if (tgt_prog) {
22566 		struct bpf_prog_aux *aux = tgt_prog->aux;
22567 		bool tgt_changes_pkt_data;
22568 
22569 		if (bpf_prog_is_dev_bound(prog->aux) &&
22570 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
22571 			bpf_log(log, "Target program bound device mismatch");
22572 			return -EINVAL;
22573 		}
22574 
22575 		for (i = 0; i < aux->func_info_cnt; i++)
22576 			if (aux->func_info[i].type_id == btf_id) {
22577 				subprog = i;
22578 				break;
22579 			}
22580 		if (subprog == -1) {
22581 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
22582 			return -EINVAL;
22583 		}
22584 		if (aux->func && aux->func[subprog]->aux->exception_cb) {
22585 			bpf_log(log,
22586 				"%s programs cannot attach to exception callback\n",
22587 				prog_extension ? "Extension" : "FENTRY/FEXIT");
22588 			return -EINVAL;
22589 		}
22590 		conservative = aux->func_info_aux[subprog].unreliable;
22591 		if (prog_extension) {
22592 			if (conservative) {
22593 				bpf_log(log,
22594 					"Cannot replace static functions\n");
22595 				return -EINVAL;
22596 			}
22597 			if (!prog->jit_requested) {
22598 				bpf_log(log,
22599 					"Extension programs should be JITed\n");
22600 				return -EINVAL;
22601 			}
22602 			tgt_changes_pkt_data = aux->func
22603 					       ? aux->func[subprog]->aux->changes_pkt_data
22604 					       : aux->changes_pkt_data;
22605 			if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
22606 				bpf_log(log,
22607 					"Extension program changes packet data, while original does not\n");
22608 				return -EINVAL;
22609 			}
22610 		}
22611 		if (!tgt_prog->jited) {
22612 			bpf_log(log, "Can attach to only JITed progs\n");
22613 			return -EINVAL;
22614 		}
22615 		if (prog_tracing) {
22616 			if (aux->attach_tracing_prog) {
22617 				/*
22618 				 * Target program is an fentry/fexit which is already attached
22619 				 * to another tracing program. More levels of nesting
22620 				 * attachment are not allowed.
22621 				 */
22622 				bpf_log(log, "Cannot nest tracing program attach more than once\n");
22623 				return -EINVAL;
22624 			}
22625 		} else if (tgt_prog->type == prog->type) {
22626 			/*
22627 			 * To avoid potential call chain cycles, prevent attaching of a
22628 			 * program extension to another extension. It's ok to attach
22629 			 * fentry/fexit to extension program.
22630 			 */
22631 			bpf_log(log, "Cannot recursively attach\n");
22632 			return -EINVAL;
22633 		}
22634 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
22635 		    prog_extension &&
22636 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
22637 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
22638 			/* Program extensions can extend all program types
22639 			 * except fentry/fexit. The reason is the following.
22640 			 * The fentry/fexit programs are used for performance
22641 			 * analysis, stats and can be attached to any program
22642 			 * type. When extension program is replacing XDP function
22643 			 * it is necessary to allow performance analysis of all
22644 			 * functions. Both original XDP program and its program
22645 			 * extension. Hence attaching fentry/fexit to
22646 			 * BPF_PROG_TYPE_EXT is allowed. If extending of
22647 			 * fentry/fexit was allowed it would be possible to create
22648 			 * long call chain fentry->extension->fentry->extension
22649 			 * beyond reasonable stack size. Hence extending fentry
22650 			 * is not allowed.
22651 			 */
22652 			bpf_log(log, "Cannot extend fentry/fexit\n");
22653 			return -EINVAL;
22654 		}
22655 	} else {
22656 		if (prog_extension) {
22657 			bpf_log(log, "Cannot replace kernel functions\n");
22658 			return -EINVAL;
22659 		}
22660 	}
22661 
22662 	switch (prog->expected_attach_type) {
22663 	case BPF_TRACE_RAW_TP:
22664 		if (tgt_prog) {
22665 			bpf_log(log,
22666 				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
22667 			return -EINVAL;
22668 		}
22669 		if (!btf_type_is_typedef(t)) {
22670 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
22671 				btf_id);
22672 			return -EINVAL;
22673 		}
22674 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
22675 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
22676 				btf_id, tname);
22677 			return -EINVAL;
22678 		}
22679 		tname += sizeof(prefix) - 1;
22680 
22681 		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
22682 		 * names. Thus using bpf_raw_event_map to get argument names.
22683 		 */
22684 		btp = bpf_get_raw_tracepoint(tname);
22685 		if (!btp)
22686 			return -EINVAL;
22687 		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
22688 					trace_symbol);
22689 		bpf_put_raw_tracepoint(btp);
22690 
22691 		if (fname)
22692 			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
22693 
22694 		if (!fname || ret < 0) {
22695 			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
22696 				prefix, tname);
22697 			t = btf_type_by_id(btf, t->type);
22698 			if (!btf_type_is_ptr(t))
22699 				/* should never happen in valid vmlinux build */
22700 				return -EINVAL;
22701 		} else {
22702 			t = btf_type_by_id(btf, ret);
22703 			if (!btf_type_is_func(t))
22704 				/* should never happen in valid vmlinux build */
22705 				return -EINVAL;
22706 		}
22707 
22708 		t = btf_type_by_id(btf, t->type);
22709 		if (!btf_type_is_func_proto(t))
22710 			/* should never happen in valid vmlinux build */
22711 			return -EINVAL;
22712 
22713 		break;
22714 	case BPF_TRACE_ITER:
22715 		if (!btf_type_is_func(t)) {
22716 			bpf_log(log, "attach_btf_id %u is not a function\n",
22717 				btf_id);
22718 			return -EINVAL;
22719 		}
22720 		t = btf_type_by_id(btf, t->type);
22721 		if (!btf_type_is_func_proto(t))
22722 			return -EINVAL;
22723 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
22724 		if (ret)
22725 			return ret;
22726 		break;
22727 	default:
22728 		if (!prog_extension)
22729 			return -EINVAL;
22730 		fallthrough;
22731 	case BPF_MODIFY_RETURN:
22732 	case BPF_LSM_MAC:
22733 	case BPF_LSM_CGROUP:
22734 	case BPF_TRACE_FENTRY:
22735 	case BPF_TRACE_FEXIT:
22736 		if (!btf_type_is_func(t)) {
22737 			bpf_log(log, "attach_btf_id %u is not a function\n",
22738 				btf_id);
22739 			return -EINVAL;
22740 		}
22741 		if (prog_extension &&
22742 		    btf_check_type_match(log, prog, btf, t))
22743 			return -EINVAL;
22744 		t = btf_type_by_id(btf, t->type);
22745 		if (!btf_type_is_func_proto(t))
22746 			return -EINVAL;
22747 
22748 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
22749 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
22750 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
22751 			return -EINVAL;
22752 
22753 		if (tgt_prog && conservative)
22754 			t = NULL;
22755 
22756 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
22757 		if (ret < 0)
22758 			return ret;
22759 
22760 		if (tgt_prog) {
22761 			if (subprog == 0)
22762 				addr = (long) tgt_prog->bpf_func;
22763 			else
22764 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
22765 		} else {
22766 			if (btf_is_module(btf)) {
22767 				mod = btf_try_get_module(btf);
22768 				if (mod)
22769 					addr = find_kallsyms_symbol_value(mod, tname);
22770 				else
22771 					addr = 0;
22772 			} else {
22773 				addr = kallsyms_lookup_name(tname);
22774 			}
22775 			if (!addr) {
22776 				module_put(mod);
22777 				bpf_log(log,
22778 					"The address of function %s cannot be found\n",
22779 					tname);
22780 				return -ENOENT;
22781 			}
22782 		}
22783 
22784 		if (prog->sleepable) {
22785 			ret = -EINVAL;
22786 			switch (prog->type) {
22787 			case BPF_PROG_TYPE_TRACING:
22788 
22789 				/* fentry/fexit/fmod_ret progs can be sleepable if they are
22790 				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
22791 				 */
22792 				if (!check_non_sleepable_error_inject(btf_id) &&
22793 				    within_error_injection_list(addr))
22794 					ret = 0;
22795 				/* fentry/fexit/fmod_ret progs can also be sleepable if they are
22796 				 * in the fmodret id set with the KF_SLEEPABLE flag.
22797 				 */
22798 				else {
22799 					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
22800 										prog);
22801 
22802 					if (flags && (*flags & KF_SLEEPABLE))
22803 						ret = 0;
22804 				}
22805 				break;
22806 			case BPF_PROG_TYPE_LSM:
22807 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
22808 				 * Only some of them are sleepable.
22809 				 */
22810 				if (bpf_lsm_is_sleepable_hook(btf_id))
22811 					ret = 0;
22812 				break;
22813 			default:
22814 				break;
22815 			}
22816 			if (ret) {
22817 				module_put(mod);
22818 				bpf_log(log, "%s is not sleepable\n", tname);
22819 				return ret;
22820 			}
22821 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
22822 			if (tgt_prog) {
22823 				module_put(mod);
22824 				bpf_log(log, "can't modify return codes of BPF programs\n");
22825 				return -EINVAL;
22826 			}
22827 			ret = -EINVAL;
22828 			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
22829 			    !check_attach_modify_return(addr, tname))
22830 				ret = 0;
22831 			if (ret) {
22832 				module_put(mod);
22833 				bpf_log(log, "%s() is not modifiable\n", tname);
22834 				return ret;
22835 			}
22836 		}
22837 
22838 		break;
22839 	}
22840 	tgt_info->tgt_addr = addr;
22841 	tgt_info->tgt_name = tname;
22842 	tgt_info->tgt_type = t;
22843 	tgt_info->tgt_mod = mod;
22844 	return 0;
22845 }
22846 
BTF_SET_START(btf_id_deny)22847 BTF_SET_START(btf_id_deny)
22848 BTF_ID_UNUSED
22849 #ifdef CONFIG_SMP
22850 BTF_ID(func, migrate_disable)
22851 BTF_ID(func, migrate_enable)
22852 #endif
22853 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
22854 BTF_ID(func, rcu_read_unlock_strict)
22855 #endif
22856 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
22857 BTF_ID(func, preempt_count_add)
22858 BTF_ID(func, preempt_count_sub)
22859 #endif
22860 #ifdef CONFIG_PREEMPT_RCU
22861 BTF_ID(func, __rcu_read_lock)
22862 BTF_ID(func, __rcu_read_unlock)
22863 #endif
22864 BTF_SET_END(btf_id_deny)
22865 
22866 static bool can_be_sleepable(struct bpf_prog *prog)
22867 {
22868 	if (prog->type == BPF_PROG_TYPE_TRACING) {
22869 		switch (prog->expected_attach_type) {
22870 		case BPF_TRACE_FENTRY:
22871 		case BPF_TRACE_FEXIT:
22872 		case BPF_MODIFY_RETURN:
22873 		case BPF_TRACE_ITER:
22874 			return true;
22875 		default:
22876 			return false;
22877 		}
22878 	}
22879 	return prog->type == BPF_PROG_TYPE_LSM ||
22880 	       prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
22881 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS;
22882 }
22883 
check_attach_btf_id(struct bpf_verifier_env * env)22884 static int check_attach_btf_id(struct bpf_verifier_env *env)
22885 {
22886 	struct bpf_prog *prog = env->prog;
22887 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
22888 	struct bpf_attach_target_info tgt_info = {};
22889 	u32 btf_id = prog->aux->attach_btf_id;
22890 	struct bpf_trampoline *tr;
22891 	int ret;
22892 	u64 key;
22893 
22894 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
22895 		if (prog->sleepable)
22896 			/* attach_btf_id checked to be zero already */
22897 			return 0;
22898 		verbose(env, "Syscall programs can only be sleepable\n");
22899 		return -EINVAL;
22900 	}
22901 
22902 	if (prog->sleepable && !can_be_sleepable(prog)) {
22903 		verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
22904 		return -EINVAL;
22905 	}
22906 
22907 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
22908 		return check_struct_ops_btf_id(env);
22909 
22910 	if (prog->type != BPF_PROG_TYPE_TRACING &&
22911 	    prog->type != BPF_PROG_TYPE_LSM &&
22912 	    prog->type != BPF_PROG_TYPE_EXT)
22913 		return 0;
22914 
22915 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
22916 	if (ret)
22917 		return ret;
22918 
22919 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
22920 		/* to make freplace equivalent to their targets, they need to
22921 		 * inherit env->ops and expected_attach_type for the rest of the
22922 		 * verification
22923 		 */
22924 		env->ops = bpf_verifier_ops[tgt_prog->type];
22925 		prog->expected_attach_type = tgt_prog->expected_attach_type;
22926 	}
22927 
22928 	/* store info about the attachment target that will be used later */
22929 	prog->aux->attach_func_proto = tgt_info.tgt_type;
22930 	prog->aux->attach_func_name = tgt_info.tgt_name;
22931 	prog->aux->mod = tgt_info.tgt_mod;
22932 
22933 	if (tgt_prog) {
22934 		prog->aux->saved_dst_prog_type = tgt_prog->type;
22935 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
22936 	}
22937 
22938 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
22939 		prog->aux->attach_btf_trace = true;
22940 		return 0;
22941 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
22942 		if (!bpf_iter_prog_supported(prog))
22943 			return -EINVAL;
22944 		return 0;
22945 	}
22946 
22947 	if (prog->type == BPF_PROG_TYPE_LSM) {
22948 		ret = bpf_lsm_verify_prog(&env->log, prog);
22949 		if (ret < 0)
22950 			return ret;
22951 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
22952 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
22953 		return -EINVAL;
22954 	}
22955 
22956 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
22957 	tr = bpf_trampoline_get(key, &tgt_info);
22958 	if (!tr)
22959 		return -ENOMEM;
22960 
22961 	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
22962 		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
22963 
22964 	prog->aux->dst_trampoline = tr;
22965 	return 0;
22966 }
22967 
bpf_get_btf_vmlinux(void)22968 struct btf *bpf_get_btf_vmlinux(void)
22969 {
22970 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
22971 		mutex_lock(&bpf_verifier_lock);
22972 		if (!btf_vmlinux)
22973 			btf_vmlinux = btf_parse_vmlinux();
22974 		mutex_unlock(&bpf_verifier_lock);
22975 	}
22976 	return btf_vmlinux;
22977 }
22978 
22979 /*
22980  * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
22981  * this case expect that every file descriptor in the array is either a map or
22982  * a BTF. Everything else is considered to be trash.
22983  */
add_fd_from_fd_array(struct bpf_verifier_env * env,int fd)22984 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
22985 {
22986 	struct bpf_map *map;
22987 	struct btf *btf;
22988 	CLASS(fd, f)(fd);
22989 	int err;
22990 
22991 	map = __bpf_map_get(f);
22992 	if (!IS_ERR(map)) {
22993 		err = __add_used_map(env, map);
22994 		if (err < 0)
22995 			return err;
22996 		return 0;
22997 	}
22998 
22999 	btf = __btf_get_by_fd(f);
23000 	if (!IS_ERR(btf)) {
23001 		err = __add_used_btf(env, btf);
23002 		if (err < 0)
23003 			return err;
23004 		return 0;
23005 	}
23006 
23007 	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
23008 	return PTR_ERR(map);
23009 }
23010 
process_fd_array(struct bpf_verifier_env * env,union bpf_attr * attr,bpfptr_t uattr)23011 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
23012 {
23013 	size_t size = sizeof(int);
23014 	int ret;
23015 	int fd;
23016 	u32 i;
23017 
23018 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
23019 
23020 	/*
23021 	 * The only difference between old (no fd_array_cnt is given) and new
23022 	 * APIs is that in the latter case the fd_array is expected to be
23023 	 * continuous and is scanned for map fds right away
23024 	 */
23025 	if (!attr->fd_array_cnt)
23026 		return 0;
23027 
23028 	/* Check for integer overflow */
23029 	if (attr->fd_array_cnt >= (U32_MAX / size)) {
23030 		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
23031 		return -EINVAL;
23032 	}
23033 
23034 	for (i = 0; i < attr->fd_array_cnt; i++) {
23035 		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
23036 			return -EFAULT;
23037 
23038 		ret = add_fd_from_fd_array(env, fd);
23039 		if (ret)
23040 			return ret;
23041 	}
23042 
23043 	return 0;
23044 }
23045 
bpf_check(struct bpf_prog ** prog,union bpf_attr * attr,bpfptr_t uattr,__u32 uattr_size)23046 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
23047 {
23048 	u64 start_time = ktime_get_ns();
23049 	struct bpf_verifier_env *env;
23050 	int i, len, ret = -EINVAL, err;
23051 	u32 log_true_size;
23052 	bool is_priv;
23053 
23054 	/* no program is valid */
23055 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
23056 		return -EINVAL;
23057 
23058 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
23059 	 * allocate/free it every time bpf_check() is called
23060 	 */
23061 	env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
23062 	if (!env)
23063 		return -ENOMEM;
23064 
23065 	env->bt.env = env;
23066 
23067 	len = (*prog)->len;
23068 	env->insn_aux_data =
23069 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
23070 	ret = -ENOMEM;
23071 	if (!env->insn_aux_data)
23072 		goto err_free_env;
23073 	for (i = 0; i < len; i++)
23074 		env->insn_aux_data[i].orig_idx = i;
23075 	env->prog = *prog;
23076 	env->ops = bpf_verifier_ops[env->prog->type];
23077 
23078 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
23079 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
23080 	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
23081 	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
23082 	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
23083 
23084 	bpf_get_btf_vmlinux();
23085 
23086 	/* grab the mutex to protect few globals used by verifier */
23087 	if (!is_priv)
23088 		mutex_lock(&bpf_verifier_lock);
23089 
23090 	/* user could have requested verbose verifier output
23091 	 * and supplied buffer to store the verification trace
23092 	 */
23093 	ret = bpf_vlog_init(&env->log, attr->log_level,
23094 			    (char __user *) (unsigned long) attr->log_buf,
23095 			    attr->log_size);
23096 	if (ret)
23097 		goto err_unlock;
23098 
23099 	ret = process_fd_array(env, attr, uattr);
23100 	if (ret)
23101 		goto skip_full_check;
23102 
23103 	mark_verifier_state_clean(env);
23104 
23105 	if (IS_ERR(btf_vmlinux)) {
23106 		/* Either gcc or pahole or kernel are broken. */
23107 		verbose(env, "in-kernel BTF is malformed\n");
23108 		ret = PTR_ERR(btf_vmlinux);
23109 		goto skip_full_check;
23110 	}
23111 
23112 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
23113 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
23114 		env->strict_alignment = true;
23115 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
23116 		env->strict_alignment = false;
23117 
23118 	if (is_priv)
23119 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
23120 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
23121 
23122 	env->explored_states = kvcalloc(state_htab_size(env),
23123 				       sizeof(struct bpf_verifier_state_list *),
23124 				       GFP_USER);
23125 	ret = -ENOMEM;
23126 	if (!env->explored_states)
23127 		goto skip_full_check;
23128 
23129 	ret = check_btf_info_early(env, attr, uattr);
23130 	if (ret < 0)
23131 		goto skip_full_check;
23132 
23133 	ret = add_subprog_and_kfunc(env);
23134 	if (ret < 0)
23135 		goto skip_full_check;
23136 
23137 	ret = check_subprogs(env);
23138 	if (ret < 0)
23139 		goto skip_full_check;
23140 
23141 	ret = check_btf_info(env, attr, uattr);
23142 	if (ret < 0)
23143 		goto skip_full_check;
23144 
23145 	ret = resolve_pseudo_ldimm64(env);
23146 	if (ret < 0)
23147 		goto skip_full_check;
23148 
23149 	if (bpf_prog_is_offloaded(env->prog->aux)) {
23150 		ret = bpf_prog_offload_verifier_prep(env->prog);
23151 		if (ret)
23152 			goto skip_full_check;
23153 	}
23154 
23155 	ret = check_cfg(env);
23156 	if (ret < 0)
23157 		goto skip_full_check;
23158 
23159 	ret = check_attach_btf_id(env);
23160 	if (ret)
23161 		goto skip_full_check;
23162 
23163 	ret = mark_fastcall_patterns(env);
23164 	if (ret < 0)
23165 		goto skip_full_check;
23166 
23167 	ret = do_check_main(env);
23168 	ret = ret ?: do_check_subprogs(env);
23169 
23170 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
23171 		ret = bpf_prog_offload_finalize(env);
23172 
23173 skip_full_check:
23174 	kvfree(env->explored_states);
23175 
23176 	/* might decrease stack depth, keep it before passes that
23177 	 * allocate additional slots.
23178 	 */
23179 	if (ret == 0)
23180 		ret = remove_fastcall_spills_fills(env);
23181 
23182 	if (ret == 0)
23183 		ret = check_max_stack_depth(env);
23184 
23185 	/* instruction rewrites happen after this point */
23186 	if (ret == 0)
23187 		ret = optimize_bpf_loop(env);
23188 
23189 	if (is_priv) {
23190 		if (ret == 0)
23191 			opt_hard_wire_dead_code_branches(env);
23192 		if (ret == 0)
23193 			ret = opt_remove_dead_code(env);
23194 		if (ret == 0)
23195 			ret = opt_remove_nops(env);
23196 	} else {
23197 		if (ret == 0)
23198 			sanitize_dead_code(env);
23199 	}
23200 
23201 	if (ret == 0)
23202 		/* program is valid, convert *(u32*)(ctx + off) accesses */
23203 		ret = convert_ctx_accesses(env);
23204 
23205 	if (ret == 0)
23206 		ret = do_misc_fixups(env);
23207 
23208 	/* do 32-bit optimization after insn patching has done so those patched
23209 	 * insns could be handled correctly.
23210 	 */
23211 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
23212 		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
23213 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
23214 								     : false;
23215 	}
23216 
23217 	if (ret == 0)
23218 		ret = fixup_call_args(env);
23219 
23220 	env->verification_time = ktime_get_ns() - start_time;
23221 	print_verification_stats(env);
23222 	env->prog->aux->verified_insns = env->insn_processed;
23223 
23224 	/* preserve original error even if log finalization is successful */
23225 	err = bpf_vlog_finalize(&env->log, &log_true_size);
23226 	if (err)
23227 		ret = err;
23228 
23229 	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
23230 	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
23231 				  &log_true_size, sizeof(log_true_size))) {
23232 		ret = -EFAULT;
23233 		goto err_release_maps;
23234 	}
23235 
23236 	if (ret)
23237 		goto err_release_maps;
23238 
23239 	if (env->used_map_cnt) {
23240 		/* if program passed verifier, update used_maps in bpf_prog_info */
23241 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
23242 							  sizeof(env->used_maps[0]),
23243 							  GFP_KERNEL);
23244 
23245 		if (!env->prog->aux->used_maps) {
23246 			ret = -ENOMEM;
23247 			goto err_release_maps;
23248 		}
23249 
23250 		memcpy(env->prog->aux->used_maps, env->used_maps,
23251 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
23252 		env->prog->aux->used_map_cnt = env->used_map_cnt;
23253 	}
23254 	if (env->used_btf_cnt) {
23255 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
23256 		env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
23257 							  sizeof(env->used_btfs[0]),
23258 							  GFP_KERNEL);
23259 		if (!env->prog->aux->used_btfs) {
23260 			ret = -ENOMEM;
23261 			goto err_release_maps;
23262 		}
23263 
23264 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
23265 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
23266 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
23267 	}
23268 	if (env->used_map_cnt || env->used_btf_cnt) {
23269 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
23270 		 * bpf_ld_imm64 instructions
23271 		 */
23272 		convert_pseudo_ld_imm64(env);
23273 	}
23274 
23275 	adjust_btf_func(env);
23276 
23277 err_release_maps:
23278 	if (!env->prog->aux->used_maps)
23279 		/* if we didn't copy map pointers into bpf_prog_info, release
23280 		 * them now. Otherwise free_used_maps() will release them.
23281 		 */
23282 		release_maps(env);
23283 	if (!env->prog->aux->used_btfs)
23284 		release_btfs(env);
23285 
23286 	/* extension progs temporarily inherit the attach_type of their targets
23287 	   for verification purposes, so set it back to zero before returning
23288 	 */
23289 	if (env->prog->type == BPF_PROG_TYPE_EXT)
23290 		env->prog->expected_attach_type = 0;
23291 
23292 	*prog = env->prog;
23293 
23294 	module_put(env->attach_btf_mod);
23295 err_unlock:
23296 	if (!is_priv)
23297 		mutex_unlock(&bpf_verifier_lock);
23298 	vfree(env->insn_aux_data);
23299 	kvfree(env->insn_hist);
23300 err_free_env:
23301 	kvfree(env);
23302 	return ret;
23303 }
23304