1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * net/sched/sch_red.c Random Early Detection queue.
4 *
5 * Authors: Alexey Kuznetsov, <[email protected]>
6 *
7 * Changes:
8 * J Hadi Salim 980914: computation fixes
9 * Alexey Makarenko <[email protected]> 990814: qave on idle link was calculated incorrectly.
10 * J Hadi Salim 980816: ECN support
11 */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21
22
23 /* Parameters, settable by user:
24 -----------------------------
25
26 limit - bytes (must be > qth_max + burst)
27
28 Hard limit on queue length, should be chosen >qth_max
29 to allow packet bursts. This parameter does not
30 affect the algorithms behaviour and can be chosen
31 arbitrarily high (well, less than ram size)
32 Really, this limit will never be reached
33 if RED works correctly.
34 */
35
36 struct red_sched_data {
37 u32 limit; /* HARD maximal queue length */
38
39 unsigned char flags;
40 /* Non-flags in tc_red_qopt.flags. */
41 unsigned char userbits;
42
43 struct timer_list adapt_timer;
44 struct Qdisc *sch;
45 struct red_parms parms;
46 struct red_vars vars;
47 struct red_stats stats;
48 struct Qdisc *qdisc;
49 struct tcf_qevent qe_early_drop;
50 struct tcf_qevent qe_mark;
51 };
52
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54
red_use_ecn(struct red_sched_data * q)55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 return q->flags & TC_RED_ECN;
58 }
59
red_use_harddrop(struct red_sched_data * q)60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 return q->flags & TC_RED_HARDDROP;
63 }
64
red_use_nodrop(struct red_sched_data * q)65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 return q->flags & TC_RED_NODROP;
68 }
69
red_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 struct sk_buff **to_free)
72 {
73 enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_CONGESTED;
74 struct red_sched_data *q = qdisc_priv(sch);
75 struct Qdisc *child = q->qdisc;
76 unsigned int len;
77 int ret;
78
79 q->vars.qavg = red_calc_qavg(&q->parms,
80 &q->vars,
81 child->qstats.backlog);
82
83 if (red_is_idling(&q->vars))
84 red_end_of_idle_period(&q->vars);
85
86 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
87 case RED_DONT_MARK:
88 break;
89
90 case RED_PROB_MARK:
91 qdisc_qstats_overlimit(sch);
92 if (!red_use_ecn(q)) {
93 q->stats.prob_drop++;
94 goto congestion_drop;
95 }
96
97 if (INET_ECN_set_ce(skb)) {
98 q->stats.prob_mark++;
99 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
100 if (!skb)
101 return NET_XMIT_CN | ret;
102 } else if (!red_use_nodrop(q)) {
103 q->stats.prob_drop++;
104 goto congestion_drop;
105 }
106
107 /* Non-ECT packet in ECN nodrop mode: queue it. */
108 break;
109
110 case RED_HARD_MARK:
111 reason = SKB_DROP_REASON_QDISC_OVERLIMIT;
112 qdisc_qstats_overlimit(sch);
113 if (red_use_harddrop(q) || !red_use_ecn(q)) {
114 q->stats.forced_drop++;
115 goto congestion_drop;
116 }
117
118 if (INET_ECN_set_ce(skb)) {
119 q->stats.forced_mark++;
120 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
121 if (!skb)
122 return NET_XMIT_CN | ret;
123 } else if (!red_use_nodrop(q)) {
124 q->stats.forced_drop++;
125 goto congestion_drop;
126 }
127
128 /* Non-ECT packet in ECN nodrop mode: queue it. */
129 break;
130 }
131
132 len = qdisc_pkt_len(skb);
133 ret = qdisc_enqueue(skb, child, to_free);
134 if (likely(ret == NET_XMIT_SUCCESS)) {
135 sch->qstats.backlog += len;
136 sch->q.qlen++;
137 } else if (net_xmit_drop_count(ret)) {
138 q->stats.pdrop++;
139 qdisc_qstats_drop(sch);
140 }
141 return ret;
142
143 congestion_drop:
144 skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
145 if (!skb)
146 return NET_XMIT_CN | ret;
147
148 qdisc_drop_reason(skb, sch, to_free, reason);
149 return NET_XMIT_CN;
150 }
151
red_dequeue(struct Qdisc * sch)152 static struct sk_buff *red_dequeue(struct Qdisc *sch)
153 {
154 struct sk_buff *skb;
155 struct red_sched_data *q = qdisc_priv(sch);
156 struct Qdisc *child = q->qdisc;
157
158 skb = child->dequeue(child);
159 if (skb) {
160 qdisc_bstats_update(sch, skb);
161 qdisc_qstats_backlog_dec(sch, skb);
162 sch->q.qlen--;
163 } else {
164 if (!red_is_idling(&q->vars))
165 red_start_of_idle_period(&q->vars);
166 }
167 return skb;
168 }
169
red_peek(struct Qdisc * sch)170 static struct sk_buff *red_peek(struct Qdisc *sch)
171 {
172 struct red_sched_data *q = qdisc_priv(sch);
173 struct Qdisc *child = q->qdisc;
174
175 return child->ops->peek(child);
176 }
177
red_reset(struct Qdisc * sch)178 static void red_reset(struct Qdisc *sch)
179 {
180 struct red_sched_data *q = qdisc_priv(sch);
181
182 qdisc_reset(q->qdisc);
183 red_restart(&q->vars);
184 }
185
red_offload(struct Qdisc * sch,bool enable)186 static int red_offload(struct Qdisc *sch, bool enable)
187 {
188 struct red_sched_data *q = qdisc_priv(sch);
189 struct net_device *dev = qdisc_dev(sch);
190 struct tc_red_qopt_offload opt = {
191 .handle = sch->handle,
192 .parent = sch->parent,
193 };
194
195 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
196 return -EOPNOTSUPP;
197
198 if (enable) {
199 opt.command = TC_RED_REPLACE;
200 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
201 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
202 opt.set.probability = q->parms.max_P;
203 opt.set.limit = q->limit;
204 opt.set.is_ecn = red_use_ecn(q);
205 opt.set.is_harddrop = red_use_harddrop(q);
206 opt.set.is_nodrop = red_use_nodrop(q);
207 opt.set.qstats = &sch->qstats;
208 } else {
209 opt.command = TC_RED_DESTROY;
210 }
211
212 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
213 }
214
red_destroy(struct Qdisc * sch)215 static void red_destroy(struct Qdisc *sch)
216 {
217 struct red_sched_data *q = qdisc_priv(sch);
218
219 tcf_qevent_destroy(&q->qe_mark, sch);
220 tcf_qevent_destroy(&q->qe_early_drop, sch);
221 del_timer_sync(&q->adapt_timer);
222 red_offload(sch, false);
223 qdisc_put(q->qdisc);
224 }
225
226 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
227 [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
228 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
229 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
230 [TCA_RED_MAX_P] = { .type = NLA_U32 },
231 [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
232 [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
233 [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
234 };
235
__red_change(struct Qdisc * sch,struct nlattr ** tb,struct netlink_ext_ack * extack)236 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
237 struct netlink_ext_ack *extack)
238 {
239 struct Qdisc *old_child = NULL, *child = NULL;
240 struct red_sched_data *q = qdisc_priv(sch);
241 struct nla_bitfield32 flags_bf;
242 struct tc_red_qopt *ctl;
243 unsigned char userbits;
244 unsigned char flags;
245 int err;
246 u32 max_P;
247 u8 *stab;
248
249 if (tb[TCA_RED_PARMS] == NULL ||
250 tb[TCA_RED_STAB] == NULL)
251 return -EINVAL;
252
253 max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0);
254
255 ctl = nla_data(tb[TCA_RED_PARMS]);
256 stab = nla_data(tb[TCA_RED_STAB]);
257 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
258 ctl->Scell_log, stab))
259 return -EINVAL;
260
261 err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
262 tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
263 &flags_bf, &userbits, extack);
264 if (err)
265 return err;
266
267 if (ctl->limit > 0) {
268 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
269 extack);
270 if (IS_ERR(child))
271 return PTR_ERR(child);
272
273 /* child is fifo, no need to check for noop_qdisc */
274 qdisc_hash_add(child, true);
275 }
276
277 sch_tree_lock(sch);
278
279 flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
280 err = red_validate_flags(flags, extack);
281 if (err)
282 goto unlock_out;
283
284 q->flags = flags;
285 q->userbits = userbits;
286 q->limit = ctl->limit;
287 if (child) {
288 qdisc_tree_flush_backlog(q->qdisc);
289 old_child = q->qdisc;
290 q->qdisc = child;
291 }
292
293 red_set_parms(&q->parms,
294 ctl->qth_min, ctl->qth_max, ctl->Wlog,
295 ctl->Plog, ctl->Scell_log,
296 stab,
297 max_P);
298 red_set_vars(&q->vars);
299
300 del_timer(&q->adapt_timer);
301 if (ctl->flags & TC_RED_ADAPTATIVE)
302 mod_timer(&q->adapt_timer, jiffies + HZ/2);
303
304 if (!q->qdisc->q.qlen)
305 red_start_of_idle_period(&q->vars);
306
307 sch_tree_unlock(sch);
308
309 red_offload(sch, true);
310
311 if (old_child)
312 qdisc_put(old_child);
313 return 0;
314
315 unlock_out:
316 sch_tree_unlock(sch);
317 if (child)
318 qdisc_put(child);
319 return err;
320 }
321
red_adaptative_timer(struct timer_list * t)322 static inline void red_adaptative_timer(struct timer_list *t)
323 {
324 struct red_sched_data *q = from_timer(q, t, adapt_timer);
325 struct Qdisc *sch = q->sch;
326 spinlock_t *root_lock;
327
328 rcu_read_lock();
329 root_lock = qdisc_lock(qdisc_root_sleeping(sch));
330 spin_lock(root_lock);
331 red_adaptative_algo(&q->parms, &q->vars);
332 mod_timer(&q->adapt_timer, jiffies + HZ/2);
333 spin_unlock(root_lock);
334 rcu_read_unlock();
335 }
336
red_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)337 static int red_init(struct Qdisc *sch, struct nlattr *opt,
338 struct netlink_ext_ack *extack)
339 {
340 struct red_sched_data *q = qdisc_priv(sch);
341 struct nlattr *tb[TCA_RED_MAX + 1];
342 int err;
343
344 q->qdisc = &noop_qdisc;
345 q->sch = sch;
346 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
347
348 if (!opt)
349 return -EINVAL;
350
351 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
352 extack);
353 if (err < 0)
354 return err;
355
356 err = __red_change(sch, tb, extack);
357 if (err)
358 return err;
359
360 err = tcf_qevent_init(&q->qe_early_drop, sch,
361 FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
362 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
363 if (err)
364 return err;
365
366 return tcf_qevent_init(&q->qe_mark, sch,
367 FLOW_BLOCK_BINDER_TYPE_RED_MARK,
368 tb[TCA_RED_MARK_BLOCK], extack);
369 }
370
red_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)371 static int red_change(struct Qdisc *sch, struct nlattr *opt,
372 struct netlink_ext_ack *extack)
373 {
374 struct red_sched_data *q = qdisc_priv(sch);
375 struct nlattr *tb[TCA_RED_MAX + 1];
376 int err;
377
378 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
379 extack);
380 if (err < 0)
381 return err;
382
383 err = tcf_qevent_validate_change(&q->qe_early_drop,
384 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
385 if (err)
386 return err;
387
388 err = tcf_qevent_validate_change(&q->qe_mark,
389 tb[TCA_RED_MARK_BLOCK], extack);
390 if (err)
391 return err;
392
393 return __red_change(sch, tb, extack);
394 }
395
red_dump_offload_stats(struct Qdisc * sch)396 static int red_dump_offload_stats(struct Qdisc *sch)
397 {
398 struct tc_red_qopt_offload hw_stats = {
399 .command = TC_RED_STATS,
400 .handle = sch->handle,
401 .parent = sch->parent,
402 {
403 .stats.bstats = &sch->bstats,
404 .stats.qstats = &sch->qstats,
405 },
406 };
407
408 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
409 }
410
red_dump(struct Qdisc * sch,struct sk_buff * skb)411 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
412 {
413 struct red_sched_data *q = qdisc_priv(sch);
414 struct nlattr *opts = NULL;
415 struct tc_red_qopt opt = {
416 .limit = q->limit,
417 .flags = (q->flags & TC_RED_HISTORIC_FLAGS) |
418 q->userbits,
419 .qth_min = q->parms.qth_min >> q->parms.Wlog,
420 .qth_max = q->parms.qth_max >> q->parms.Wlog,
421 .Wlog = q->parms.Wlog,
422 .Plog = q->parms.Plog,
423 .Scell_log = q->parms.Scell_log,
424 };
425 int err;
426
427 err = red_dump_offload_stats(sch);
428 if (err)
429 goto nla_put_failure;
430
431 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
432 if (opts == NULL)
433 goto nla_put_failure;
434 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
435 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
436 nla_put_bitfield32(skb, TCA_RED_FLAGS,
437 q->flags, TC_RED_SUPPORTED_FLAGS) ||
438 tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
439 tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
440 goto nla_put_failure;
441 return nla_nest_end(skb, opts);
442
443 nla_put_failure:
444 nla_nest_cancel(skb, opts);
445 return -EMSGSIZE;
446 }
447
red_dump_stats(struct Qdisc * sch,struct gnet_dump * d)448 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
449 {
450 struct red_sched_data *q = qdisc_priv(sch);
451 struct net_device *dev = qdisc_dev(sch);
452 struct tc_red_xstats st = {0};
453
454 if (sch->flags & TCQ_F_OFFLOADED) {
455 struct tc_red_qopt_offload hw_stats_request = {
456 .command = TC_RED_XSTATS,
457 .handle = sch->handle,
458 .parent = sch->parent,
459 {
460 .xstats = &q->stats,
461 },
462 };
463 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
464 &hw_stats_request);
465 }
466 st.early = q->stats.prob_drop + q->stats.forced_drop;
467 st.pdrop = q->stats.pdrop;
468 st.marked = q->stats.prob_mark + q->stats.forced_mark;
469
470 return gnet_stats_copy_app(d, &st, sizeof(st));
471 }
472
red_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)473 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
474 struct sk_buff *skb, struct tcmsg *tcm)
475 {
476 struct red_sched_data *q = qdisc_priv(sch);
477
478 tcm->tcm_handle |= TC_H_MIN(1);
479 tcm->tcm_info = q->qdisc->handle;
480 return 0;
481 }
482
red_graft_offload(struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)483 static void red_graft_offload(struct Qdisc *sch,
484 struct Qdisc *new, struct Qdisc *old,
485 struct netlink_ext_ack *extack)
486 {
487 struct tc_red_qopt_offload graft_offload = {
488 .handle = sch->handle,
489 .parent = sch->parent,
490 .child_handle = new->handle,
491 .command = TC_RED_GRAFT,
492 };
493
494 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
495 TC_SETUP_QDISC_RED, &graft_offload, extack);
496 }
497
red_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)498 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
499 struct Qdisc **old, struct netlink_ext_ack *extack)
500 {
501 struct red_sched_data *q = qdisc_priv(sch);
502
503 if (new == NULL)
504 new = &noop_qdisc;
505
506 *old = qdisc_replace(sch, new, &q->qdisc);
507
508 red_graft_offload(sch, new, *old, extack);
509 return 0;
510 }
511
red_leaf(struct Qdisc * sch,unsigned long arg)512 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
513 {
514 struct red_sched_data *q = qdisc_priv(sch);
515 return q->qdisc;
516 }
517
red_find(struct Qdisc * sch,u32 classid)518 static unsigned long red_find(struct Qdisc *sch, u32 classid)
519 {
520 return 1;
521 }
522
red_walk(struct Qdisc * sch,struct qdisc_walker * walker)523 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
524 {
525 if (!walker->stop) {
526 tc_qdisc_stats_dump(sch, 1, walker);
527 }
528 }
529
530 static const struct Qdisc_class_ops red_class_ops = {
531 .graft = red_graft,
532 .leaf = red_leaf,
533 .find = red_find,
534 .walk = red_walk,
535 .dump = red_dump_class,
536 };
537
538 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
539 .id = "red",
540 .priv_size = sizeof(struct red_sched_data),
541 .cl_ops = &red_class_ops,
542 .enqueue = red_enqueue,
543 .dequeue = red_dequeue,
544 .peek = red_peek,
545 .init = red_init,
546 .reset = red_reset,
547 .destroy = red_destroy,
548 .change = red_change,
549 .dump = red_dump,
550 .dump_stats = red_dump_stats,
551 .owner = THIS_MODULE,
552 };
553 MODULE_ALIAS_NET_SCH("red");
554
red_module_init(void)555 static int __init red_module_init(void)
556 {
557 return register_qdisc(&red_qdisc_ops);
558 }
559
red_module_exit(void)560 static void __exit red_module_exit(void)
561 {
562 unregister_qdisc(&red_qdisc_ops);
563 }
564
565 module_init(red_module_init)
566 module_exit(red_module_exit)
567
568 MODULE_LICENSE("GPL");
569 MODULE_DESCRIPTION("Random Early Detection qdisc");
570