1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <[email protected]>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <[email protected]> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
red_use_ecn(struct red_sched_data * q)55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
red_use_harddrop(struct red_sched_data * q)60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
red_use_nodrop(struct red_sched_data * q)65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
red_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_CONGESTED;
74 	struct red_sched_data *q = qdisc_priv(sch);
75 	struct Qdisc *child = q->qdisc;
76 	unsigned int len;
77 	int ret;
78 
79 	q->vars.qavg = red_calc_qavg(&q->parms,
80 				     &q->vars,
81 				     child->qstats.backlog);
82 
83 	if (red_is_idling(&q->vars))
84 		red_end_of_idle_period(&q->vars);
85 
86 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
87 	case RED_DONT_MARK:
88 		break;
89 
90 	case RED_PROB_MARK:
91 		qdisc_qstats_overlimit(sch);
92 		if (!red_use_ecn(q)) {
93 			q->stats.prob_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		if (INET_ECN_set_ce(skb)) {
98 			q->stats.prob_mark++;
99 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
100 			if (!skb)
101 				return NET_XMIT_CN | ret;
102 		} else if (!red_use_nodrop(q)) {
103 			q->stats.prob_drop++;
104 			goto congestion_drop;
105 		}
106 
107 		/* Non-ECT packet in ECN nodrop mode: queue it. */
108 		break;
109 
110 	case RED_HARD_MARK:
111 		reason = SKB_DROP_REASON_QDISC_OVERLIMIT;
112 		qdisc_qstats_overlimit(sch);
113 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
114 			q->stats.forced_drop++;
115 			goto congestion_drop;
116 		}
117 
118 		if (INET_ECN_set_ce(skb)) {
119 			q->stats.forced_mark++;
120 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
121 			if (!skb)
122 				return NET_XMIT_CN | ret;
123 		} else if (!red_use_nodrop(q)) {
124 			q->stats.forced_drop++;
125 			goto congestion_drop;
126 		}
127 
128 		/* Non-ECT packet in ECN nodrop mode: queue it. */
129 		break;
130 	}
131 
132 	len = qdisc_pkt_len(skb);
133 	ret = qdisc_enqueue(skb, child, to_free);
134 	if (likely(ret == NET_XMIT_SUCCESS)) {
135 		sch->qstats.backlog += len;
136 		sch->q.qlen++;
137 	} else if (net_xmit_drop_count(ret)) {
138 		q->stats.pdrop++;
139 		qdisc_qstats_drop(sch);
140 	}
141 	return ret;
142 
143 congestion_drop:
144 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
145 	if (!skb)
146 		return NET_XMIT_CN | ret;
147 
148 	qdisc_drop_reason(skb, sch, to_free, reason);
149 	return NET_XMIT_CN;
150 }
151 
red_dequeue(struct Qdisc * sch)152 static struct sk_buff *red_dequeue(struct Qdisc *sch)
153 {
154 	struct sk_buff *skb;
155 	struct red_sched_data *q = qdisc_priv(sch);
156 	struct Qdisc *child = q->qdisc;
157 
158 	skb = child->dequeue(child);
159 	if (skb) {
160 		qdisc_bstats_update(sch, skb);
161 		qdisc_qstats_backlog_dec(sch, skb);
162 		sch->q.qlen--;
163 	} else {
164 		if (!red_is_idling(&q->vars))
165 			red_start_of_idle_period(&q->vars);
166 	}
167 	return skb;
168 }
169 
red_peek(struct Qdisc * sch)170 static struct sk_buff *red_peek(struct Qdisc *sch)
171 {
172 	struct red_sched_data *q = qdisc_priv(sch);
173 	struct Qdisc *child = q->qdisc;
174 
175 	return child->ops->peek(child);
176 }
177 
red_reset(struct Qdisc * sch)178 static void red_reset(struct Qdisc *sch)
179 {
180 	struct red_sched_data *q = qdisc_priv(sch);
181 
182 	qdisc_reset(q->qdisc);
183 	red_restart(&q->vars);
184 }
185 
red_offload(struct Qdisc * sch,bool enable)186 static int red_offload(struct Qdisc *sch, bool enable)
187 {
188 	struct red_sched_data *q = qdisc_priv(sch);
189 	struct net_device *dev = qdisc_dev(sch);
190 	struct tc_red_qopt_offload opt = {
191 		.handle = sch->handle,
192 		.parent = sch->parent,
193 	};
194 
195 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
196 		return -EOPNOTSUPP;
197 
198 	if (enable) {
199 		opt.command = TC_RED_REPLACE;
200 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
201 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
202 		opt.set.probability = q->parms.max_P;
203 		opt.set.limit = q->limit;
204 		opt.set.is_ecn = red_use_ecn(q);
205 		opt.set.is_harddrop = red_use_harddrop(q);
206 		opt.set.is_nodrop = red_use_nodrop(q);
207 		opt.set.qstats = &sch->qstats;
208 	} else {
209 		opt.command = TC_RED_DESTROY;
210 	}
211 
212 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
213 }
214 
red_destroy(struct Qdisc * sch)215 static void red_destroy(struct Qdisc *sch)
216 {
217 	struct red_sched_data *q = qdisc_priv(sch);
218 
219 	tcf_qevent_destroy(&q->qe_mark, sch);
220 	tcf_qevent_destroy(&q->qe_early_drop, sch);
221 	del_timer_sync(&q->adapt_timer);
222 	red_offload(sch, false);
223 	qdisc_put(q->qdisc);
224 }
225 
226 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
227 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
228 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
229 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
230 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
231 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
232 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
233 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
234 };
235 
__red_change(struct Qdisc * sch,struct nlattr ** tb,struct netlink_ext_ack * extack)236 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
237 			struct netlink_ext_ack *extack)
238 {
239 	struct Qdisc *old_child = NULL, *child = NULL;
240 	struct red_sched_data *q = qdisc_priv(sch);
241 	struct nla_bitfield32 flags_bf;
242 	struct tc_red_qopt *ctl;
243 	unsigned char userbits;
244 	unsigned char flags;
245 	int err;
246 	u32 max_P;
247 	u8 *stab;
248 
249 	if (tb[TCA_RED_PARMS] == NULL ||
250 	    tb[TCA_RED_STAB] == NULL)
251 		return -EINVAL;
252 
253 	max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0);
254 
255 	ctl = nla_data(tb[TCA_RED_PARMS]);
256 	stab = nla_data(tb[TCA_RED_STAB]);
257 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
258 			      ctl->Scell_log, stab))
259 		return -EINVAL;
260 
261 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
262 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
263 			    &flags_bf, &userbits, extack);
264 	if (err)
265 		return err;
266 
267 	if (ctl->limit > 0) {
268 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
269 					 extack);
270 		if (IS_ERR(child))
271 			return PTR_ERR(child);
272 
273 		/* child is fifo, no need to check for noop_qdisc */
274 		qdisc_hash_add(child, true);
275 	}
276 
277 	sch_tree_lock(sch);
278 
279 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
280 	err = red_validate_flags(flags, extack);
281 	if (err)
282 		goto unlock_out;
283 
284 	q->flags = flags;
285 	q->userbits = userbits;
286 	q->limit = ctl->limit;
287 	if (child) {
288 		qdisc_tree_flush_backlog(q->qdisc);
289 		old_child = q->qdisc;
290 		q->qdisc = child;
291 	}
292 
293 	red_set_parms(&q->parms,
294 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
295 		      ctl->Plog, ctl->Scell_log,
296 		      stab,
297 		      max_P);
298 	red_set_vars(&q->vars);
299 
300 	del_timer(&q->adapt_timer);
301 	if (ctl->flags & TC_RED_ADAPTATIVE)
302 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
303 
304 	if (!q->qdisc->q.qlen)
305 		red_start_of_idle_period(&q->vars);
306 
307 	sch_tree_unlock(sch);
308 
309 	red_offload(sch, true);
310 
311 	if (old_child)
312 		qdisc_put(old_child);
313 	return 0;
314 
315 unlock_out:
316 	sch_tree_unlock(sch);
317 	if (child)
318 		qdisc_put(child);
319 	return err;
320 }
321 
red_adaptative_timer(struct timer_list * t)322 static inline void red_adaptative_timer(struct timer_list *t)
323 {
324 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
325 	struct Qdisc *sch = q->sch;
326 	spinlock_t *root_lock;
327 
328 	rcu_read_lock();
329 	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
330 	spin_lock(root_lock);
331 	red_adaptative_algo(&q->parms, &q->vars);
332 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
333 	spin_unlock(root_lock);
334 	rcu_read_unlock();
335 }
336 
red_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)337 static int red_init(struct Qdisc *sch, struct nlattr *opt,
338 		    struct netlink_ext_ack *extack)
339 {
340 	struct red_sched_data *q = qdisc_priv(sch);
341 	struct nlattr *tb[TCA_RED_MAX + 1];
342 	int err;
343 
344 	q->qdisc = &noop_qdisc;
345 	q->sch = sch;
346 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
347 
348 	if (!opt)
349 		return -EINVAL;
350 
351 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
352 					  extack);
353 	if (err < 0)
354 		return err;
355 
356 	err = __red_change(sch, tb, extack);
357 	if (err)
358 		return err;
359 
360 	err = tcf_qevent_init(&q->qe_early_drop, sch,
361 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
362 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
363 	if (err)
364 		return err;
365 
366 	return tcf_qevent_init(&q->qe_mark, sch,
367 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
368 			       tb[TCA_RED_MARK_BLOCK], extack);
369 }
370 
red_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)371 static int red_change(struct Qdisc *sch, struct nlattr *opt,
372 		      struct netlink_ext_ack *extack)
373 {
374 	struct red_sched_data *q = qdisc_priv(sch);
375 	struct nlattr *tb[TCA_RED_MAX + 1];
376 	int err;
377 
378 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
379 					  extack);
380 	if (err < 0)
381 		return err;
382 
383 	err = tcf_qevent_validate_change(&q->qe_early_drop,
384 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
385 	if (err)
386 		return err;
387 
388 	err = tcf_qevent_validate_change(&q->qe_mark,
389 					 tb[TCA_RED_MARK_BLOCK], extack);
390 	if (err)
391 		return err;
392 
393 	return __red_change(sch, tb, extack);
394 }
395 
red_dump_offload_stats(struct Qdisc * sch)396 static int red_dump_offload_stats(struct Qdisc *sch)
397 {
398 	struct tc_red_qopt_offload hw_stats = {
399 		.command = TC_RED_STATS,
400 		.handle = sch->handle,
401 		.parent = sch->parent,
402 		{
403 			.stats.bstats = &sch->bstats,
404 			.stats.qstats = &sch->qstats,
405 		},
406 	};
407 
408 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
409 }
410 
red_dump(struct Qdisc * sch,struct sk_buff * skb)411 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
412 {
413 	struct red_sched_data *q = qdisc_priv(sch);
414 	struct nlattr *opts = NULL;
415 	struct tc_red_qopt opt = {
416 		.limit		= q->limit,
417 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
418 				  q->userbits,
419 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
420 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
421 		.Wlog		= q->parms.Wlog,
422 		.Plog		= q->parms.Plog,
423 		.Scell_log	= q->parms.Scell_log,
424 	};
425 	int err;
426 
427 	err = red_dump_offload_stats(sch);
428 	if (err)
429 		goto nla_put_failure;
430 
431 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
432 	if (opts == NULL)
433 		goto nla_put_failure;
434 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
435 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
436 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
437 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
438 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
439 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
440 		goto nla_put_failure;
441 	return nla_nest_end(skb, opts);
442 
443 nla_put_failure:
444 	nla_nest_cancel(skb, opts);
445 	return -EMSGSIZE;
446 }
447 
red_dump_stats(struct Qdisc * sch,struct gnet_dump * d)448 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
449 {
450 	struct red_sched_data *q = qdisc_priv(sch);
451 	struct net_device *dev = qdisc_dev(sch);
452 	struct tc_red_xstats st = {0};
453 
454 	if (sch->flags & TCQ_F_OFFLOADED) {
455 		struct tc_red_qopt_offload hw_stats_request = {
456 			.command = TC_RED_XSTATS,
457 			.handle = sch->handle,
458 			.parent = sch->parent,
459 			{
460 				.xstats = &q->stats,
461 			},
462 		};
463 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
464 					      &hw_stats_request);
465 	}
466 	st.early = q->stats.prob_drop + q->stats.forced_drop;
467 	st.pdrop = q->stats.pdrop;
468 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
469 
470 	return gnet_stats_copy_app(d, &st, sizeof(st));
471 }
472 
red_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)473 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
474 			  struct sk_buff *skb, struct tcmsg *tcm)
475 {
476 	struct red_sched_data *q = qdisc_priv(sch);
477 
478 	tcm->tcm_handle |= TC_H_MIN(1);
479 	tcm->tcm_info = q->qdisc->handle;
480 	return 0;
481 }
482 
red_graft_offload(struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)483 static void red_graft_offload(struct Qdisc *sch,
484 			      struct Qdisc *new, struct Qdisc *old,
485 			      struct netlink_ext_ack *extack)
486 {
487 	struct tc_red_qopt_offload graft_offload = {
488 		.handle		= sch->handle,
489 		.parent		= sch->parent,
490 		.child_handle	= new->handle,
491 		.command	= TC_RED_GRAFT,
492 	};
493 
494 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
495 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
496 }
497 
red_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)498 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
499 		     struct Qdisc **old, struct netlink_ext_ack *extack)
500 {
501 	struct red_sched_data *q = qdisc_priv(sch);
502 
503 	if (new == NULL)
504 		new = &noop_qdisc;
505 
506 	*old = qdisc_replace(sch, new, &q->qdisc);
507 
508 	red_graft_offload(sch, new, *old, extack);
509 	return 0;
510 }
511 
red_leaf(struct Qdisc * sch,unsigned long arg)512 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
513 {
514 	struct red_sched_data *q = qdisc_priv(sch);
515 	return q->qdisc;
516 }
517 
red_find(struct Qdisc * sch,u32 classid)518 static unsigned long red_find(struct Qdisc *sch, u32 classid)
519 {
520 	return 1;
521 }
522 
red_walk(struct Qdisc * sch,struct qdisc_walker * walker)523 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
524 {
525 	if (!walker->stop) {
526 		tc_qdisc_stats_dump(sch, 1, walker);
527 	}
528 }
529 
530 static const struct Qdisc_class_ops red_class_ops = {
531 	.graft		=	red_graft,
532 	.leaf		=	red_leaf,
533 	.find		=	red_find,
534 	.walk		=	red_walk,
535 	.dump		=	red_dump_class,
536 };
537 
538 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
539 	.id		=	"red",
540 	.priv_size	=	sizeof(struct red_sched_data),
541 	.cl_ops		=	&red_class_ops,
542 	.enqueue	=	red_enqueue,
543 	.dequeue	=	red_dequeue,
544 	.peek		=	red_peek,
545 	.init		=	red_init,
546 	.reset		=	red_reset,
547 	.destroy	=	red_destroy,
548 	.change		=	red_change,
549 	.dump		=	red_dump,
550 	.dump_stats	=	red_dump_stats,
551 	.owner		=	THIS_MODULE,
552 };
553 MODULE_ALIAS_NET_SCH("red");
554 
red_module_init(void)555 static int __init red_module_init(void)
556 {
557 	return register_qdisc(&red_qdisc_ops);
558 }
559 
red_module_exit(void)560 static void __exit red_module_exit(void)
561 {
562 	unregister_qdisc(&red_qdisc_ops);
563 }
564 
565 module_init(red_module_init)
566 module_exit(red_module_exit)
567 
568 MODULE_LICENSE("GPL");
569 MODULE_DESCRIPTION("Random Early Detection qdisc");
570