xref: /aosp_15_r20/external/webrtc/rtc_tools/rtc_event_log_visualizer/alerts.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "rtc_tools/rtc_event_log_visualizer/alerts.h"
12 
13 #include <stdio.h>
14 
15 #include <algorithm>
16 #include <limits>
17 #include <map>
18 #include <string>
19 
20 #include "logging/rtc_event_log/rtc_event_processor.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/logging.h"
23 #include "rtc_base/numerics/sequence_number_util.h"
24 #include "rtc_base/strings/string_builder.h"
25 
26 namespace webrtc {
27 
AnalyzeStreamGaps(const ParsedRtcEventLog & parsed_log,PacketDirection direction)28 void TriageHelper::AnalyzeStreamGaps(const ParsedRtcEventLog& parsed_log,
29                                      PacketDirection direction) {
30   // With 100 packets/s (~800kbps), false positives would require 10 s without
31   // data.
32   constexpr int64_t kMaxSeqNumJump = 1000;
33   // With a 90 kHz clock, false positives would require 10 s without data.
34   constexpr int64_t kTicksPerMillisec = 90;
35   constexpr int64_t kCaptureTimeGraceMs = 10000;
36 
37   std::string seq_num_explanation =
38       direction == kIncomingPacket
39           ? "Incoming RTP sequence number jumps more than 1000. Counter may "
40             "have been reset or rewritten incorrectly in a group call."
41           : "Outgoing RTP sequence number jumps more than 1000. Counter may "
42             "have been reset.";
43   std::string capture_time_explanation =
44       direction == kIncomingPacket ? "Incoming capture time jumps more than "
45                                      "10s. Clock might have been reset."
46                                    : "Outgoing capture time jumps more than "
47                                      "10s. Clock might have been reset.";
48   TriageAlertType seq_num_alert = direction == kIncomingPacket
49                                       ? TriageAlertType::kIncomingSeqNumJump
50                                       : TriageAlertType::kOutgoingSeqNumJump;
51   TriageAlertType capture_time_alert =
52       direction == kIncomingPacket ? TriageAlertType::kIncomingCaptureTimeJump
53                                    : TriageAlertType::kOutgoingCaptureTimeJump;
54 
55   const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
56 
57   // Check for gaps in sequence numbers and capture timestamps.
58   for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
59     if (IsRtxSsrc(parsed_log, direction, stream.ssrc)) {
60       continue;
61     }
62     auto packets = stream.packet_view;
63     if (packets.empty()) {
64       continue;
65     }
66     SeqNumUnwrapper<uint16_t> seq_num_unwrapper;
67     int64_t last_seq_num =
68         seq_num_unwrapper.Unwrap(packets[0].header.sequenceNumber);
69     SeqNumUnwrapper<uint32_t> capture_time_unwrapper;
70     int64_t last_capture_time =
71         capture_time_unwrapper.Unwrap(packets[0].header.timestamp);
72     int64_t last_log_time_ms = packets[0].log_time_ms();
73     for (const auto& packet : packets) {
74       if (packet.log_time_us() > segment_end_us) {
75         // Only process the first (LOG_START, LOG_END) segment.
76         break;
77       }
78 
79       int64_t seq_num = seq_num_unwrapper.Unwrap(packet.header.sequenceNumber);
80       if (std::abs(seq_num - last_seq_num) > kMaxSeqNumJump) {
81         Alert(seq_num_alert, config_.GetCallTimeSec(packet.log_time()),
82               seq_num_explanation);
83       }
84       last_seq_num = seq_num;
85 
86       int64_t capture_time =
87           capture_time_unwrapper.Unwrap(packet.header.timestamp);
88       if (std::abs(capture_time - last_capture_time) >
89           kTicksPerMillisec *
90               (kCaptureTimeGraceMs + packet.log_time_ms() - last_log_time_ms)) {
91         Alert(capture_time_alert, config_.GetCallTimeSec(packet.log_time()),
92               capture_time_explanation);
93       }
94       last_capture_time = capture_time;
95     }
96   }
97 }
98 
AnalyzeTransmissionGaps(const ParsedRtcEventLog & parsed_log,PacketDirection direction)99 void TriageHelper::AnalyzeTransmissionGaps(const ParsedRtcEventLog& parsed_log,
100                                            PacketDirection direction) {
101   constexpr int64_t kMaxRtpTransmissionGap = 500000;
102   constexpr int64_t kMaxRtcpTransmissionGap = 3000000;
103   std::string rtp_explanation =
104       direction == kIncomingPacket
105           ? "No RTP packets received for more than 500ms. This indicates a "
106             "network problem. Temporary video freezes and choppy or robotic "
107             "audio is unavoidable. Unnecessary BWE drops is a known issue."
108           : "No RTP packets sent for more than 500 ms. This might be an issue "
109             "with the pacer.";
110   std::string rtcp_explanation =
111       direction == kIncomingPacket
112           ? "No RTCP packets received for more than 3 s. Could be a longer "
113             "connection outage"
114           : "No RTCP packets sent for more than 3 s. This is most likely a "
115             "bug.";
116   TriageAlertType rtp_alert = direction == kIncomingPacket
117                                   ? TriageAlertType::kIncomingRtpGap
118                                   : TriageAlertType::kOutgoingRtpGap;
119   TriageAlertType rtcp_alert = direction == kIncomingPacket
120                                    ? TriageAlertType::kIncomingRtcpGap
121                                    : TriageAlertType::kOutgoingRtcpGap;
122 
123   const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
124 
125   // TODO(terelius): The parser could provide a list of all packets, ordered
126   // by time, for each direction.
127   std::multimap<int64_t, const LoggedRtpPacket*> rtp_in_direction;
128   for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
129     for (const LoggedRtpPacket& rtp_packet : stream.packet_view)
130       rtp_in_direction.emplace(rtp_packet.log_time_us(), &rtp_packet);
131   }
132   absl::optional<int64_t> last_rtp_time;
133   for (const auto& kv : rtp_in_direction) {
134     int64_t timestamp = kv.first;
135     if (timestamp > segment_end_us) {
136       // Only process the first (LOG_START, LOG_END) segment.
137       break;
138     }
139     int64_t duration = timestamp - last_rtp_time.value_or(0);
140     if (last_rtp_time.has_value() && duration > kMaxRtpTransmissionGap) {
141       // No packet sent/received for more than 500 ms.
142       Alert(rtp_alert, config_.GetCallTimeSec(Timestamp::Micros(timestamp)),
143             rtp_explanation);
144     }
145     last_rtp_time.emplace(timestamp);
146   }
147 
148   absl::optional<int64_t> last_rtcp_time;
149   if (direction == kIncomingPacket) {
150     for (const auto& rtcp : parsed_log.incoming_rtcp_packets()) {
151       if (rtcp.log_time_us() > segment_end_us) {
152         // Only process the first (LOG_START, LOG_END) segment.
153         break;
154       }
155       int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
156       if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
157         // No feedback sent/received for more than 2000 ms.
158         Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time()),
159               rtcp_explanation);
160       }
161       last_rtcp_time.emplace(rtcp.log_time_us());
162     }
163   } else {
164     for (const auto& rtcp : parsed_log.outgoing_rtcp_packets()) {
165       if (rtcp.log_time_us() > segment_end_us) {
166         // Only process the first (LOG_START, LOG_END) segment.
167         break;
168       }
169       int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
170       if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
171         // No feedback sent/received for more than 2000 ms.
172         Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time()),
173               rtcp_explanation);
174       }
175       last_rtcp_time.emplace(rtcp.log_time_us());
176     }
177   }
178 }
179 
180 // TODO(terelius): Notifications could possibly be generated by the same code
181 // that produces the graphs. There is some code duplication that could be
182 // avoided, but that might be solved anyway when we move functionality from the
183 // analyzer to the parser.
AnalyzeLog(const ParsedRtcEventLog & parsed_log)184 void TriageHelper::AnalyzeLog(const ParsedRtcEventLog& parsed_log) {
185   AnalyzeStreamGaps(parsed_log, kIncomingPacket);
186   AnalyzeStreamGaps(parsed_log, kOutgoingPacket);
187   AnalyzeTransmissionGaps(parsed_log, kIncomingPacket);
188   AnalyzeTransmissionGaps(parsed_log, kOutgoingPacket);
189 
190   const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
191 
192   Timestamp first_occurrence = parsed_log.last_timestamp();
193   constexpr double kMaxLossFraction = 0.05;
194   // Loss feedback
195   int64_t total_lost_packets = 0;
196   int64_t total_expected_packets = 0;
197   for (auto& bwe_update : parsed_log.bwe_loss_updates()) {
198     if (bwe_update.log_time_us() > segment_end_us) {
199       // Only process the first (LOG_START, LOG_END) segment.
200       break;
201     }
202     int64_t lost_packets = static_cast<double>(bwe_update.fraction_lost) / 255 *
203                            bwe_update.expected_packets;
204     total_lost_packets += lost_packets;
205     total_expected_packets += bwe_update.expected_packets;
206     if (bwe_update.fraction_lost >= 255 * kMaxLossFraction) {
207       first_occurrence = std::min(first_occurrence, bwe_update.log_time());
208     }
209   }
210   double avg_outgoing_loss =
211       static_cast<double>(total_lost_packets) / total_expected_packets;
212   if (avg_outgoing_loss > kMaxLossFraction) {
213     Alert(TriageAlertType::kOutgoingHighLoss,
214           config_.GetCallTimeSec(first_occurrence),
215           "More than 5% of outgoing packets lost.");
216   }
217 }
218 
Print(FILE * file)219 void TriageHelper::Print(FILE* file) {
220   fprintf(file, "========== TRIAGE NOTIFICATIONS ==========\n");
221   for (const auto& alert : triage_alerts_) {
222     fprintf(file, "%d %s. First occurrence at %3.3lf\n", alert.second.count,
223             alert.second.explanation.c_str(), alert.second.first_occurrence);
224   }
225   fprintf(file, "========== END TRIAGE NOTIFICATIONS ==========\n");
226 }
227 
ProcessAlerts(std::function<void (int,float,std::string)> f)228 void TriageHelper::ProcessAlerts(
229     std::function<void(int, float, std::string)> f) {
230   for (const auto& alert : triage_alerts_) {
231     f(alert.second.count, alert.second.first_occurrence,
232       alert.second.explanation);
233   }
234 }
235 
236 }  // namespace webrtc
237