1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34         This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc:
52          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
53      -
54        name: hash
55        doc:
56          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
57      -
58        name: vlan-tag
59        doc:
60          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
61  -
62    type: flags
63    name: xsk-flags
64    entries:
65      -
66        name: tx-timestamp
67        doc:
68          HW timestamping egress packets is supported by the driver.
69      -
70        name: tx-checksum
71        doc:
72          L3 checksum HW offload is supported by the driver.
73      -
74        name: tx-launch-time-fifo
75        doc:
76          Launch time HW offload is supported by the driver.
77  -
78    name: queue-type
79    type: enum
80    entries: [ rx, tx ]
81  -
82    name: qstats-scope
83    type: flags
84    entries: [ queue ]
85
86attribute-sets:
87  -
88    name: dev
89    attributes:
90      -
91        name: ifindex
92        doc: netdev ifindex
93        type: u32
94        checks:
95          min: 1
96      -
97        name: pad
98        type: pad
99      -
100        name: xdp-features
101        doc: Bitmask of enabled xdp-features.
102        type: u64
103        enum: xdp-act
104      -
105        name: xdp-zc-max-segs
106        doc: max fragment count supported by ZC driver
107        type: u32
108        checks:
109          min: 1
110      -
111        name: xdp-rx-metadata-features
112        doc: Bitmask of supported XDP receive metadata features.
113             See Documentation/networking/xdp-rx-metadata.rst for more details.
114        type: u64
115        enum: xdp-rx-metadata
116      -
117        name: xsk-features
118        doc: Bitmask of enabled AF_XDP features.
119        type: u64
120        enum: xsk-flags
121  -
122    name: page-pool
123    attributes:
124      -
125        name: id
126        doc: Unique ID of a Page Pool instance.
127        type: uint
128        checks:
129          min: 1
130          max: u32-max
131      -
132        name: ifindex
133        doc: |
134          ifindex of the netdev to which the pool belongs.
135          May be reported as 0 if the page pool was allocated for a netdev
136          which got destroyed already (page pools may outlast their netdevs
137          because they wait for all memory to be returned).
138        type: u32
139        checks:
140          min: 1
141          max: s32-max
142      -
143        name: napi-id
144        doc: Id of NAPI using this Page Pool instance.
145        type: uint
146        checks:
147          min: 1
148          max: u32-max
149      -
150        name: inflight
151        type: uint
152        doc: |
153          Number of outstanding references to this page pool (allocated
154          but yet to be freed pages). Allocated pages may be held in
155          socket receive queues, driver receive ring, page pool recycling
156          ring, the page pool cache, etc.
157      -
158        name: inflight-mem
159        type: uint
160        doc: |
161          Amount of memory held by inflight pages.
162      -
163        name: detach-time
164        type: uint
165        doc: |
166          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
167          the driver. Once detached Page Pool can no longer be used to
168          allocate memory.
169          Page Pools wait for all the memory allocated from them to be freed
170          before truly disappearing. "Detached" Page Pools cannot be
171          "re-attached", they are just waiting to disappear.
172          Attribute is absent if Page Pool has not been detached, and
173          can still be used to allocate new memory.
174      -
175        name: dmabuf
176        doc: ID of the dmabuf this page-pool is attached to.
177        type: u32
178  -
179    name: page-pool-info
180    subset-of: page-pool
181    attributes:
182      -
183        name: id
184      -
185        name: ifindex
186  -
187    name: page-pool-stats
188    doc: |
189      Page pool statistics, see docs for struct page_pool_stats
190      for information about individual statistics.
191    attributes:
192      -
193        name: info
194        doc: Page pool identifying information.
195        type: nest
196        nested-attributes: page-pool-info
197      -
198        name: alloc-fast
199        type: uint
200        value: 8 # reserve some attr ids in case we need more metadata later
201      -
202        name: alloc-slow
203        type: uint
204      -
205        name: alloc-slow-high-order
206        type: uint
207      -
208        name: alloc-empty
209        type: uint
210      -
211        name: alloc-refill
212        type: uint
213      -
214        name: alloc-waive
215        type: uint
216      -
217        name: recycle-cached
218        type: uint
219      -
220        name: recycle-cache-full
221        type: uint
222      -
223        name: recycle-ring
224        type: uint
225      -
226        name: recycle-ring-full
227        type: uint
228      -
229        name: recycle-released-refcnt
230        type: uint
231
232  -
233    name: napi
234    attributes:
235      -
236        name: ifindex
237        doc: ifindex of the netdevice to which NAPI instance belongs.
238        type: u32
239        checks:
240          min: 1
241      -
242        name: id
243        doc: ID of the NAPI instance.
244        type: u32
245      -
246        name: irq
247        doc: The associated interrupt vector number for the napi
248        type: u32
249      -
250        name: pid
251        doc: PID of the napi thread, if NAPI is configured to operate in
252             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
253             softirq context), the attribute will be absent.
254        type: u32
255      -
256        name: defer-hard-irqs
257        doc: The number of consecutive empty polls before IRQ deferral ends
258             and hardware IRQs are re-enabled.
259        type: u32
260        checks:
261          max: s32-max
262      -
263        name: gro-flush-timeout
264        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
265             timer which schedules NAPI processing. Additionally, a non-zero
266             value will also prevent GRO from flushing recent super-frames at
267             the end of a NAPI cycle. This may add receive latency in exchange
268             for reducing the number of frames processed by the network stack.
269        type: uint
270      -
271        name: irq-suspend-timeout
272        doc: The timeout, in nanoseconds, of how long to suspend irq
273             processing, if event polling finds events
274        type: uint
275  -
276    name: queue
277    attributes:
278      -
279        name: id
280        doc: Queue index; most queue types are indexed like a C array, with
281             indexes starting at 0 and ending at queue count - 1. Queue indexes
282             are scoped to an interface and queue type.
283        type: u32
284      -
285        name: ifindex
286        doc: ifindex of the netdevice to which the queue belongs.
287        type: u32
288        checks:
289          min: 1
290      -
291        name: type
292        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
293        type: u32
294        enum: queue-type
295      -
296        name: napi-id
297        doc: ID of the NAPI instance which services this queue.
298        type: u32
299      -
300        name: dmabuf
301        doc: ID of the dmabuf attached to this queue, if any.
302        type: u32
303
304  -
305    name: qstats
306    doc: |
307      Get device statistics, scoped to a device or a queue.
308      These statistics extend (and partially duplicate) statistics available
309      in struct rtnl_link_stats64.
310      Value of the `scope` attribute determines how statistics are
311      aggregated. When aggregated for the entire device the statistics
312      represent the total number of events since last explicit reset of
313      the device (i.e. not a reconfiguration like changing queue count).
314      When reported per-queue, however, the statistics may not add
315      up to the total number of events, will only be reported for currently
316      active objects, and will likely report the number of events since last
317      reconfiguration.
318    attributes:
319      -
320        name: ifindex
321        doc: ifindex of the netdevice to which stats belong.
322        type: u32
323        checks:
324          min: 1
325      -
326        name: queue-type
327        doc: Queue type as rx, tx, for queue-id.
328        type: u32
329        enum: queue-type
330      -
331        name: queue-id
332        doc: Queue ID, if stats are scoped to a single queue instance.
333        type: u32
334      -
335        name: scope
336        doc: |
337          What object type should be used to iterate over the stats.
338        type: uint
339        enum: qstats-scope
340      -
341        name: rx-packets
342        doc: |
343          Number of wire packets successfully received and passed to the stack.
344          For drivers supporting XDP, XDP is considered the first layer
345          of the stack, so packets consumed by XDP are still counted here.
346        type: uint
347        value: 8 # reserve some attr ids in case we need more metadata later
348      -
349        name: rx-bytes
350        doc: Successfully received bytes, see `rx-packets`.
351        type: uint
352      -
353        name: tx-packets
354        doc: |
355          Number of wire packets successfully sent. Packet is considered to be
356          successfully sent once it is in device memory (usually this means
357          the device has issued a DMA completion for the packet).
358        type: uint
359      -
360        name: tx-bytes
361        doc: Successfully sent bytes, see `tx-packets`.
362        type: uint
363      -
364        name: rx-alloc-fail
365        doc: |
366          Number of times skb or buffer allocation failed on the Rx datapath.
367          Allocation failure may, or may not result in a packet drop, depending
368          on driver implementation and whether system recovers quickly.
369        type: uint
370      -
371        name: rx-hw-drops
372        doc: |
373          Number of all packets which entered the device, but never left it,
374          including but not limited to: packets dropped due to lack of buffer
375          space, processing errors, explicit or implicit policies and packet
376          filters.
377        type: uint
378      -
379        name: rx-hw-drop-overruns
380        doc: |
381          Number of packets dropped due to transient lack of resources, such as
382          buffer space, host descriptors etc.
383        type: uint
384      -
385        name: rx-csum-complete
386        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
387        type: uint
388      -
389        name: rx-csum-unnecessary
390        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
391        type: uint
392      -
393        name: rx-csum-none
394        doc: Number of packets that were not checksummed by device.
395        type: uint
396      -
397        name: rx-csum-bad
398        doc: |
399          Number of packets with bad checksum. The packets are not discarded,
400          but still delivered to the stack.
401        type: uint
402      -
403        name: rx-hw-gro-packets
404        doc: |
405          Number of packets that were coalesced from smaller packets by the device.
406          Counts only packets coalesced with the HW-GRO netdevice feature,
407          LRO-coalesced packets are not counted.
408        type: uint
409      -
410        name: rx-hw-gro-bytes
411        doc: See `rx-hw-gro-packets`.
412        type: uint
413      -
414        name: rx-hw-gro-wire-packets
415        doc: |
416          Number of packets that were coalesced to bigger packetss with the HW-GRO
417          netdevice feature. LRO-coalesced packets are not counted.
418        type: uint
419      -
420        name: rx-hw-gro-wire-bytes
421        doc: See `rx-hw-gro-wire-packets`.
422        type: uint
423      -
424        name: rx-hw-drop-ratelimits
425        doc: |
426          Number of the packets dropped by the device due to the received
427          packets bitrate exceeding the device rate limit.
428        type: uint
429      -
430        name: tx-hw-drops
431        doc: |
432          Number of packets that arrived at the device but never left it,
433          encompassing packets dropped for reasons such as processing errors, as
434          well as those affected by explicitly defined policies and packet
435          filtering criteria.
436        type: uint
437      -
438        name: tx-hw-drop-errors
439        doc: Number of packets dropped because they were invalid or malformed.
440        type: uint
441      -
442        name: tx-csum-none
443        doc: |
444          Number of packets that did not require the device to calculate the
445          checksum.
446        type: uint
447      -
448        name: tx-needs-csum
449        doc: |
450          Number of packets that required the device to calculate the checksum.
451        type: uint
452      -
453        name: tx-hw-gso-packets
454        doc: |
455          Number of packets that necessitated segmentation into smaller packets
456          by the device.
457        type: uint
458      -
459        name: tx-hw-gso-bytes
460        doc: See `tx-hw-gso-packets`.
461        type: uint
462      -
463        name: tx-hw-gso-wire-packets
464        doc: |
465          Number of wire-sized packets generated by processing
466          `tx-hw-gso-packets`
467        type: uint
468      -
469        name: tx-hw-gso-wire-bytes
470        doc: See `tx-hw-gso-wire-packets`.
471        type: uint
472      -
473        name: tx-hw-drop-ratelimits
474        doc: |
475          Number of the packets dropped by the device due to the transmit
476          packets bitrate exceeding the device rate limit.
477        type: uint
478      -
479        name: tx-stop
480        doc: |
481          Number of times driver paused accepting new tx packets
482          from the stack to this queue, because the queue was full.
483          Note that if BQL is supported and enabled on the device
484          the networking stack will avoid queuing a lot of data at once.
485        type: uint
486      -
487        name: tx-wake
488        doc: |
489          Number of times driver re-started accepting send
490          requests to this queue from the stack.
491        type: uint
492  -
493    name: queue-id
494    subset-of: queue
495    attributes:
496      -
497        name: id
498      -
499        name: type
500  -
501    name: dmabuf
502    attributes:
503      -
504        name: ifindex
505        doc: netdev ifindex to bind the dmabuf to.
506        type: u32
507        checks:
508          min: 1
509      -
510        name: queues
511        doc: receive queues to bind the dmabuf to.
512        type: nest
513        nested-attributes: queue-id
514        multi-attr: true
515      -
516        name: fd
517        doc: dmabuf file descriptor to bind.
518        type: u32
519      -
520        name: id
521        doc: id of the dmabuf binding
522        type: u32
523        checks:
524          min: 1
525
526operations:
527  list:
528    -
529      name: dev-get
530      doc: Get / dump information about a netdev.
531      attribute-set: dev
532      do:
533        request:
534          attributes:
535            - ifindex
536        reply: &dev-all
537          attributes:
538            - ifindex
539            - xdp-features
540            - xdp-zc-max-segs
541            - xdp-rx-metadata-features
542            - xsk-features
543      dump:
544        reply: *dev-all
545    -
546      name: dev-add-ntf
547      doc: Notification about device appearing.
548      notify: dev-get
549      mcgrp: mgmt
550    -
551      name: dev-del-ntf
552      doc: Notification about device disappearing.
553      notify: dev-get
554      mcgrp: mgmt
555    -
556      name: dev-change-ntf
557      doc: Notification about device configuration being changed.
558      notify: dev-get
559      mcgrp: mgmt
560    -
561      name: page-pool-get
562      doc: |
563        Get / dump information about Page Pools.
564        (Only Page Pools associated with a net_device can be listed.)
565      attribute-set: page-pool
566      do:
567        request:
568          attributes:
569            - id
570        reply: &pp-reply
571          attributes:
572            - id
573            - ifindex
574            - napi-id
575            - inflight
576            - inflight-mem
577            - detach-time
578            - dmabuf
579      dump:
580        reply: *pp-reply
581      config-cond: page-pool
582    -
583      name: page-pool-add-ntf
584      doc: Notification about page pool appearing.
585      notify: page-pool-get
586      mcgrp: page-pool
587      config-cond: page-pool
588    -
589      name: page-pool-del-ntf
590      doc: Notification about page pool disappearing.
591      notify: page-pool-get
592      mcgrp: page-pool
593      config-cond: page-pool
594    -
595      name: page-pool-change-ntf
596      doc: Notification about page pool configuration being changed.
597      notify: page-pool-get
598      mcgrp: page-pool
599      config-cond: page-pool
600    -
601      name: page-pool-stats-get
602      doc: Get page pool statistics.
603      attribute-set: page-pool-stats
604      do:
605        request:
606          attributes:
607            - info
608        reply: &pp-stats-reply
609          attributes:
610            - info
611            - alloc-fast
612            - alloc-slow
613            - alloc-slow-high-order
614            - alloc-empty
615            - alloc-refill
616            - alloc-waive
617            - recycle-cached
618            - recycle-cache-full
619            - recycle-ring
620            - recycle-ring-full
621            - recycle-released-refcnt
622      dump:
623        reply: *pp-stats-reply
624      config-cond: page-pool-stats
625    -
626      name: queue-get
627      doc: Get queue information from the kernel.
628           Only configured queues will be reported (as opposed to all available
629           hardware queues).
630      attribute-set: queue
631      do:
632        request:
633          attributes:
634            - ifindex
635            - type
636            - id
637        reply: &queue-get-op
638          attributes:
639            - id
640            - type
641            - napi-id
642            - ifindex
643            - dmabuf
644      dump:
645        request:
646          attributes:
647            - ifindex
648        reply: *queue-get-op
649    -
650      name: napi-get
651      doc: Get information about NAPI instances configured on the system.
652      attribute-set: napi
653      do:
654        request:
655          attributes:
656            - id
657        reply: &napi-get-op
658          attributes:
659            - id
660            - ifindex
661            - irq
662            - pid
663            - defer-hard-irqs
664            - gro-flush-timeout
665            - irq-suspend-timeout
666      dump:
667        request:
668          attributes:
669            - ifindex
670        reply: *napi-get-op
671    -
672      name: qstats-get
673      doc: |
674        Get / dump fine grained statistics. Which statistics are reported
675        depends on the device and the driver, and whether the driver stores
676        software counters per-queue.
677      attribute-set: qstats
678      dump:
679        request:
680          attributes:
681            - ifindex
682            - scope
683        reply:
684          attributes:
685            - ifindex
686            - queue-type
687            - queue-id
688            - rx-packets
689            - rx-bytes
690            - tx-packets
691            - tx-bytes
692    -
693      name: bind-rx
694      doc: Bind dmabuf to netdev
695      attribute-set: dmabuf
696      flags: [ admin-perm ]
697      do:
698        request:
699          attributes:
700            - ifindex
701            - fd
702            - queues
703        reply:
704          attributes:
705            - id
706    -
707      name: napi-set
708      doc: Set configurable NAPI instance settings.
709      attribute-set: napi
710      flags: [ admin-perm ]
711      do:
712        request:
713          attributes:
714            - id
715            - defer-hard-irqs
716            - gro-flush-timeout
717            - irq-suspend-timeout
718
719kernel-family:
720  headers: [ "linux/list.h"]
721  sock-priv: struct list_head
722
723mcast-groups:
724  list:
725    -
726      name: mgmt
727    -
728      name: page-pool
729