1 // Copyright 2022 The gRPC Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef GRPC_SRC_CORE_LIB_EVENT_ENGINE_POSIX_ENGINE_INTERNAL_ERRQUEUE_H
16 #define GRPC_SRC_CORE_LIB_EVENT_ENGINE_POSIX_ENGINE_INTERNAL_ERRQUEUE_H
17 
18 #include <grpc/support/port_platform.h>
19 
20 #include <stdint.h>
21 
22 #include "src/core/lib/iomgr/port.h"
23 
24 #ifdef GRPC_POSIX_SOCKET_TCP
25 
26 #include <time.h>
27 
28 #ifdef GRPC_LINUX_ERRQUEUE
29 #include <linux/errqueue.h>  // IWYU pragma: keep
30 #include <sys/socket.h>
31 #endif  // GRPC_LINUX_ERRQUEUE
32 
33 namespace grpc_event_engine {
34 namespace experimental {
35 
36 #ifdef GRPC_LINUX_ERRQUEUE
37 
38 // Redefining scm_timestamping in the same way that <linux/errqueue.h> defines
39 // it, so that code compiles on systems that don't have it.
40 struct scm_timestamping {
41   struct timespec ts[3];
42 };
43 // Also redefine timestamp types
44 // The timestamp type for when the driver passed skb to NIC, or HW.
45 constexpr int SCM_TSTAMP_SND = 0;
46 // The timestamp type for when data entered the packet scheduler.
47 constexpr int SCM_TSTAMP_SCHED = 1;
48 // The timestamp type for when data acknowledged by peer.
49 constexpr int SCM_TSTAMP_ACK = 2;
50 
51 // Control message type containing OPT_STATS
52 #ifndef SCM_TIMESTAMPING_OPT_STATS
53 #define SCM_TIMESTAMPING_OPT_STATS 54
54 #endif
55 
56 // Redefine required constants from <linux/net_tstamp.h>
57 constexpr uint32_t SOF_TIMESTAMPING_TX_SOFTWARE = 1u << 1;
58 constexpr uint32_t SOF_TIMESTAMPING_SOFTWARE = 1u << 4;
59 constexpr uint32_t SOF_TIMESTAMPING_OPT_ID = 1u << 7;
60 constexpr uint32_t SOF_TIMESTAMPING_TX_SCHED = 1u << 8;
61 constexpr uint32_t SOF_TIMESTAMPING_TX_ACK = 1u << 9;
62 constexpr uint32_t SOF_TIMESTAMPING_OPT_TSONLY = 1u << 11;
63 constexpr uint32_t SOF_TIMESTAMPING_OPT_STATS = 1u << 12;
64 
65 constexpr uint32_t kTimestampingSocketOptions =
66     SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID |
67     SOF_TIMESTAMPING_OPT_TSONLY | SOF_TIMESTAMPING_OPT_STATS;
68 constexpr uint32_t kTimestampingRecordingOptions =
69     SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
70     SOF_TIMESTAMPING_TX_ACK;
71 
72 // Netlink attribute types used for TCP opt stats.
73 enum TCPOptStats {
74   TCP_NLA_PAD,
75   TCP_NLA_BUSY,                   // Time (usec) busy sending data.
76   TCP_NLA_RWND_LIMITED,           // Time (usec) limited by receive window.
77   TCP_NLA_SNDBUF_LIMITED,         // Time (usec) limited by send buffer.
78   TCP_NLA_DATA_SEGS_OUT,          // Data pkts sent including retransmission.
79   TCP_NLA_TOTAL_RETRANS,          // Data pkts retransmitted.
80   TCP_NLA_PACING_RATE,            // Pacing rate in Bps.
81   TCP_NLA_DELIVERY_RATE,          // Delivery rate in Bps.
82   TCP_NLA_SND_CWND,               // Sending congestion window.
83   TCP_NLA_REORDERING,             // Reordering metric.
84   TCP_NLA_MIN_RTT,                // minimum RTT.
85   TCP_NLA_RECUR_RETRANS,          // Recurring retransmits for the current pkt.
86   TCP_NLA_DELIVERY_RATE_APP_LMT,  // Delivery rate application limited?
87   TCP_NLA_SNDQ_SIZE,              // Data (bytes) pending in send queue
88   TCP_NLA_CA_STATE,               // ca_state of socket
89   TCP_NLA_SND_SSTHRESH,           // Slow start size threshold
90   TCP_NLA_DELIVERED,              // Data pkts delivered incl. out-of-order
91   TCP_NLA_DELIVERED_CE,           // Like above but only ones w/ CE marks
92   TCP_NLA_BYTES_SENT,             // Data bytes sent including retransmission
93   TCP_NLA_BYTES_RETRANS,          // Data bytes retransmitted
94   TCP_NLA_DSACK_DUPS,             // DSACK blocks received
95   TCP_NLA_REORD_SEEN,             // reordering events seen
96   TCP_NLA_SRTT,                   // smoothed RTT in usecs
97 };
98 
99 // tcp_info from from linux/tcp.h
100 struct tcp_info {
101   uint8_t tcpi_state;
102   uint8_t tcpi_ca_state;
103   uint8_t tcpi_retransmits;
104   uint8_t tcpi_probes;
105   uint8_t tcpi_backoff;
106   uint8_t tcpi_options;
107   uint8_t tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
108   uint8_t tcpi_delivery_rate_app_limited : 1;
109   uint32_t tcpi_rto;
110   uint32_t tcpi_ato;
111   uint32_t tcpi_snd_mss;
112   uint32_t tcpi_rcv_mss;
113   uint32_t tcpi_unacked;
114   uint32_t tcpi_sacked;
115   uint32_t tcpi_lost;
116   uint32_t tcpi_retrans;
117   uint32_t tcpi_fackets;
118   // Times.
119   uint32_t tcpi_last_data_sent;
120   uint32_t tcpi_last_ack_sent;  // Not remembered, sorry.
121   uint32_t tcpi_last_data_recv;
122   uint32_t tcpi_last_ack_recv;
123   // Metrics.
124   uint32_t tcpi_pmtu;
125   uint32_t tcpi_rcv_ssthresh;
126   uint32_t tcpi_rtt;
127   uint32_t tcpi_rttvar;
128   uint32_t tcpi_snd_ssthresh;
129   uint32_t tcpi_snd_cwnd;
130   uint32_t tcpi_advmss;
131   uint32_t tcpi_reordering;
132   uint32_t tcpi_rcv_rtt;
133   uint32_t tcpi_rcv_space;
134   uint32_t tcpi_total_retrans;
135   uint64_t tcpi_pacing_rate;
136   uint64_t tcpi_max_pacing_rate;
137   uint64_t tcpi_bytes_acked;     // RFC4898 tcpEStatsAppHCThruOctetsAcked
138   uint64_t tcpi_bytes_received;  // RFC4898 tcpEStatsAppHCThruOctetsReceived
139 
140   uint32_t tcpi_segs_out;  // RFC4898 tcpEStatsPerfSegsOut
141   uint32_t tcpi_segs_in;   // RFC4898 tcpEStatsPerfSegsIn
142   uint32_t tcpi_notsent_bytes;
143   uint32_t tcpi_min_rtt;
144 
145   uint32_t tcpi_data_segs_in;   // RFC4898 tcpEStatsDataSegsIn
146   uint32_t tcpi_data_segs_out;  // RFC4898 tcpEStatsDataSegsOut
147 
148   uint64_t tcpi_delivery_rate;
149   uint64_t tcpi_busy_time;       // Time (usec) busy sending data
150   uint64_t tcpi_rwnd_limited;    // Time (usec) limited by receive window
151   uint64_t tcpi_sndbuf_limited;  // Time (usec) limited by send buffer
152 
153   uint32_t tcpi_delivered;
154   uint32_t tcpi_delivered_ce;
155   uint64_t tcpi_bytes_sent;     // RFC4898 tcpEStatsPerfHCDataOctetsOut
156   uint64_t tcpi_bytes_retrans;  // RFC4898 tcpEStatsPerfOctetsRetrans
157   uint32_t tcpi_dsack_dups;     // RFC4898 tcpEStatsStackDSACKDups
158   uint32_t tcpi_reord_seen;     // reordering events seen
159   socklen_t length;             // Length of struct returned by kernel
160 };
161 
162 #ifndef TCP_INFO
163 #define TCP_INFO 11
164 #endif
165 
166 int GetSocketTcpInfo(tcp_info* info, int fd);
167 
168 #endif  // GRPC_LINUX_ERRQUEUE
169 
170 // Returns true if kernel is capable of supporting errqueue and timestamping.
171 // Currently allowing only linux kernels above 4.0.0
172 bool KernelSupportsErrqueue();
173 
174 }  // namespace experimental
175 }  // namespace grpc_event_engine
176 
177 #endif  // GRPC_POSIX_SOCKET_TCP
178 
179 #endif  // GRPC_SRC_CORE_LIB_EVENT_ENGINE_POSIX_ENGINE_INTERNAL_ERRQUEUE_H
180