xref: /aosp_15_r20/external/webrtc/modules/video_coding/timing/frame_delay_variation_kalman_filter.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_VIDEO_CODING_TIMING_FRAME_DELAY_VARIATION_KALMAN_FILTER_H_
12 #define MODULES_VIDEO_CODING_TIMING_FRAME_DELAY_VARIATION_KALMAN_FILTER_H_
13 
14 #include "api/units/data_size.h"
15 #include "api/units/time_delta.h"
16 
17 namespace webrtc {
18 
19 // This class uses a linear Kalman filter (see
20 // https://en.wikipedia.org/wiki/Kalman_filter) to estimate the frame delay
21 // variation (i.e., the difference in transmission time between a frame and the
22 // prior frame) for a frame, given its size variation in bytes (i.e., the
23 // difference in size between a frame and the prior frame). The idea is that,
24 // given a fixed link bandwidth, a larger frame (in bytes) would take
25 // proportionally longer to arrive than a correspondingly smaller frame. Using
26 // the variations of frame delay and frame size, the underlying bandwidth and
27 // queuing delay variation of the network link can be estimated.
28 //
29 // The filter takes as input the frame delay variation, the difference between
30 // the actual inter-frame arrival time and the expected inter-frame arrival time
31 // (based on RTP timestamp), and frame size variation, the inter-frame size
32 // delta for a single frame. The frame delay variation is seen as the
33 // measurement and the frame size variation is used in the observation model.
34 // The hidden state of the filter is the link bandwidth and queuing delay
35 // buildup. The estimated state can be used to get the expected frame delay
36 // variation for a frame, given its frame size variation. This information can
37 // then be used to estimate the frame delay variation coming from network
38 // jitter.
39 //
40 // Mathematical details:
41 //  * The state (`x` in Wikipedia notation) is a 2x1 vector comprising the
42 //    reciprocal of link bandwidth [1 / bytes per ms] and the
43 //    link queuing delay buildup [ms].
44 //  * The state transition matrix (`F`) is the 2x2 identity matrix, meaning that
45 //    link bandwidth and link queuing delay buildup are modeled as independent.
46 //  * The measurement (`z`) is the (scalar) frame delay variation [ms].
47 //  * The observation matrix (`H`) is a 1x2 vector set as
48 //    `{frame_size_variation [bytes], 1.0}`.
49 //  * The state estimate covariance (`P`) is a symmetric 2x2 matrix.
50 //  * The process noise covariance (`Q`) is a constant 2x2 diagonal matrix
51 //    [(1 / bytes per ms)^2, ms^2].
52 //  * The observation noise covariance (`r`) is a scalar [ms^2] that is
53 //    determined externally to this class.
54 class FrameDelayVariationKalmanFilter {
55  public:
56   FrameDelayVariationKalmanFilter();
57   ~FrameDelayVariationKalmanFilter() = default;
58 
59   // Predicts and updates the filter, given a new pair of frame delay variation
60   // and frame size variation.
61   //
62   // Inputs:
63   // `frame_delay_variation_ms`:
64   //    Frame delay variation as calculated by the `InterFrameDelay` estimator.
65   //
66   // `frame_size_variation_bytes`:
67   //    Frame size variation, i.e., the current frame size minus the previous
68   //    frame size (in bytes). Note that this quantity may be negative.
69   //
70   // `max_frame_size_bytes`:
71   //    Filtered largest frame size received since the last reset.
72   //
73   // `var_noise`:
74   //    Variance of the estimated random jitter.
75   //
76   // TODO(bugs.webrtc.org/14381): For now use doubles as input parameters as
77   // units defined in api/units have insufficient underlying precision for
78   // jitter estimation.
79   void PredictAndUpdate(double frame_delay_variation_ms,
80                         double frame_size_variation_bytes,
81                         double max_frame_size_bytes,
82                         double var_noise);
83 
84   // Given a frame size variation, returns the estimated frame delay variation
85   // explained by the link bandwidth alone.
86   double GetFrameDelayVariationEstimateSizeBased(
87       double frame_size_variation_bytes) const;
88 
89   // Given a frame size variation, returns the estimated frame delay variation
90   // explained by both link bandwidth and link queuing delay buildup.
91   double GetFrameDelayVariationEstimateTotal(
92       double frame_size_variation_bytes) const;
93 
94  private:
95   // State estimate (bandwidth [1 / bytes per ms], queue buildup [ms]).
96   double estimate_[2];
97   double estimate_cov_[2][2];  // Estimate covariance.
98 
99   // Process noise covariance. This is a diagonal matrix, so we only store the
100   // diagonal entries.
101   double process_noise_cov_diag_[2];
102 };
103 
104 }  // namespace webrtc
105 
106 #endif  // MODULES_VIDEO_CODING_TIMING_FRAME_DELAY_VARIATION_KALMAN_FILTER_H_
107