xref: /aosp_15_r20/external/webrtc/rtc_tools/frame_analyzer/video_temporal_aligner.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "rtc_tools/frame_analyzer/video_temporal_aligner.h"
12 
13 #include <algorithm>
14 #include <cmath>
15 #include <cstddef>
16 #include <deque>
17 #include <iterator>
18 #include <limits>
19 #include <vector>
20 
21 #include "api/make_ref_counted.h"
22 #include "api/video/i420_buffer.h"
23 #include "api/video/video_frame_buffer.h"
24 #include "rtc_tools/frame_analyzer/video_quality_analysis.h"
25 
26 namespace webrtc {
27 namespace test {
28 
29 namespace {
30 
31 // This constant controls how many frames we look ahead while seeking for the
32 // match for the next frame. Note that we may span bigger gaps than this number
33 // since we reset the counter as soon as we find a better match. The seeking
34 // will stop when there is no improvement in the next kNumberOfFramesLookAhead
35 // frames. Typically, the SSIM will improve as we get closer and closer to the
36 // real match.
37 const int kNumberOfFramesLookAhead = 60;
38 
39 // Helper class that takes a video and generates an infinite looping video.
40 class LoopingVideo : public Video {
41  public:
LoopingVideo(const rtc::scoped_refptr<Video> & video)42   explicit LoopingVideo(const rtc::scoped_refptr<Video>& video)
43       : video_(video) {}
44 
width() const45   int width() const override { return video_->width(); }
height() const46   int height() const override { return video_->height(); }
number_of_frames() const47   size_t number_of_frames() const override {
48     return std::numeric_limits<size_t>::max();
49   }
50 
GetFrame(size_t index) const51   rtc::scoped_refptr<I420BufferInterface> GetFrame(
52       size_t index) const override {
53     return video_->GetFrame(index % video_->number_of_frames());
54   }
55 
56  private:
57   const rtc::scoped_refptr<Video> video_;
58 };
59 
60 // Helper class that take a vector of frame indices and a video and produces a
61 // new video where the frames have been reshuffled.
62 class ReorderedVideo : public Video {
63  public:
ReorderedVideo(const rtc::scoped_refptr<Video> & video,const std::vector<size_t> & indices)64   ReorderedVideo(const rtc::scoped_refptr<Video>& video,
65                  const std::vector<size_t>& indices)
66       : video_(video), indices_(indices) {}
67 
width() const68   int width() const override { return video_->width(); }
height() const69   int height() const override { return video_->height(); }
number_of_frames() const70   size_t number_of_frames() const override { return indices_.size(); }
71 
GetFrame(size_t index) const72   rtc::scoped_refptr<I420BufferInterface> GetFrame(
73       size_t index) const override {
74     return video_->GetFrame(indices_.at(index));
75   }
76 
77  private:
78   const rtc::scoped_refptr<Video> video_;
79   const std::vector<size_t> indices_;
80 };
81 
82 // Helper class that takes a video and produces a downscaled video.
83 class DownscaledVideo : public Video {
84  public:
DownscaledVideo(float scale_factor,const rtc::scoped_refptr<Video> & video)85   DownscaledVideo(float scale_factor, const rtc::scoped_refptr<Video>& video)
86       : downscaled_width_(
87             static_cast<int>(std::round(scale_factor * video->width()))),
88         downscaled_height_(
89             static_cast<int>(std::round(scale_factor * video->height()))),
90         video_(video) {}
91 
width() const92   int width() const override { return downscaled_width_; }
height() const93   int height() const override { return downscaled_height_; }
number_of_frames() const94   size_t number_of_frames() const override {
95     return video_->number_of_frames();
96   }
97 
GetFrame(size_t index) const98   rtc::scoped_refptr<I420BufferInterface> GetFrame(
99       size_t index) const override {
100     const rtc::scoped_refptr<I420BufferInterface> frame =
101         video_->GetFrame(index);
102     rtc::scoped_refptr<I420Buffer> downscaled_frame =
103         I420Buffer::Create(downscaled_width_, downscaled_height_);
104     downscaled_frame->ScaleFrom(*frame);
105     return downscaled_frame;
106   }
107 
108  private:
109   const int downscaled_width_;
110   const int downscaled_height_;
111   const rtc::scoped_refptr<Video> video_;
112 };
113 
114 // Helper class that takes a video and caches the latest frame access. This
115 // improves performance a lot since the original source is often from a file.
116 class CachedVideo : public Video {
117  public:
CachedVideo(int max_cache_size,const rtc::scoped_refptr<Video> & video)118   CachedVideo(int max_cache_size, const rtc::scoped_refptr<Video>& video)
119       : max_cache_size_(max_cache_size), video_(video) {}
120 
width() const121   int width() const override { return video_->width(); }
height() const122   int height() const override { return video_->height(); }
number_of_frames() const123   size_t number_of_frames() const override {
124     return video_->number_of_frames();
125   }
126 
GetFrame(size_t index) const127   rtc::scoped_refptr<I420BufferInterface> GetFrame(
128       size_t index) const override {
129     for (const CachedFrame& cached_frame : cache_) {
130       if (cached_frame.index == index)
131         return cached_frame.frame;
132     }
133 
134     rtc::scoped_refptr<I420BufferInterface> frame = video_->GetFrame(index);
135     cache_.push_front({index, frame});
136     if (cache_.size() > max_cache_size_)
137       cache_.pop_back();
138 
139     return frame;
140   }
141 
142  private:
143   struct CachedFrame {
144     size_t index;
145     rtc::scoped_refptr<I420BufferInterface> frame;
146   };
147 
148   const size_t max_cache_size_;
149   const rtc::scoped_refptr<Video> video_;
150   mutable std::deque<CachedFrame> cache_;
151 };
152 
153 // Try matching the test frame against all frames in the reference video and
154 // return the index of the best matching frame.
FindBestMatch(const rtc::scoped_refptr<I420BufferInterface> & test_frame,const Video & reference_video)155 size_t FindBestMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame,
156                      const Video& reference_video) {
157   std::vector<double> ssim;
158   for (const auto& ref_frame : reference_video)
159     ssim.push_back(Ssim(test_frame, ref_frame));
160   return std::distance(ssim.begin(),
161                        std::max_element(ssim.begin(), ssim.end()));
162 }
163 
164 // Find and return the index of the frame matching the test frame. The search
165 // starts at the starting index and continues until there is no better match
166 // within the next kNumberOfFramesLookAhead frames.
FindNextMatch(const rtc::scoped_refptr<I420BufferInterface> & test_frame,const Video & reference_video,size_t start_index)167 size_t FindNextMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame,
168                      const Video& reference_video,
169                      size_t start_index) {
170   const double start_ssim =
171       Ssim(test_frame, reference_video.GetFrame(start_index));
172   for (int i = 1; i < kNumberOfFramesLookAhead; ++i) {
173     const size_t next_index = start_index + i;
174     // If we find a better match, restart the search at that point.
175     if (start_ssim < Ssim(test_frame, reference_video.GetFrame(next_index)))
176       return FindNextMatch(test_frame, reference_video, next_index);
177   }
178   // The starting index was the best match.
179   return start_index;
180 }
181 
182 }  // namespace
183 
FindMatchingFrameIndices(const rtc::scoped_refptr<Video> & reference_video,const rtc::scoped_refptr<Video> & test_video)184 std::vector<size_t> FindMatchingFrameIndices(
185     const rtc::scoped_refptr<Video>& reference_video,
186     const rtc::scoped_refptr<Video>& test_video) {
187   // This is done to get a 10x speedup. We don't need the full resolution in
188   // order to match frames, and we should limit file access and not read the
189   // same memory tens of times.
190   const float kScaleFactor = 0.25f;
191   const rtc::scoped_refptr<Video> cached_downscaled_reference_video =
192       rtc::make_ref_counted<CachedVideo>(kNumberOfFramesLookAhead,
193                                          rtc::make_ref_counted<DownscaledVideo>(
194                                              kScaleFactor, reference_video));
195   const rtc::scoped_refptr<Video> downscaled_test_video =
196       rtc::make_ref_counted<DownscaledVideo>(kScaleFactor, test_video);
197 
198   // Assume the video is looping around.
199   const rtc::scoped_refptr<Video> looping_reference_video =
200       rtc::make_ref_counted<LoopingVideo>(cached_downscaled_reference_video);
201 
202   std::vector<size_t> match_indices;
203   for (const rtc::scoped_refptr<I420BufferInterface>& test_frame :
204        *downscaled_test_video) {
205     if (match_indices.empty()) {
206       // First frame.
207       match_indices.push_back(
208           FindBestMatch(test_frame, *cached_downscaled_reference_video));
209     } else {
210       match_indices.push_back(FindNextMatch(
211           test_frame, *looping_reference_video, match_indices.back()));
212     }
213   }
214 
215   return match_indices;
216 }
217 
ReorderVideo(const rtc::scoped_refptr<Video> & video,const std::vector<size_t> & indices)218 rtc::scoped_refptr<Video> ReorderVideo(const rtc::scoped_refptr<Video>& video,
219                                        const std::vector<size_t>& indices) {
220   return rtc::make_ref_counted<ReorderedVideo>(
221       rtc::make_ref_counted<LoopingVideo>(video), indices);
222 }
223 
GenerateAlignedReferenceVideo(const rtc::scoped_refptr<Video> & reference_video,const rtc::scoped_refptr<Video> & test_video)224 rtc::scoped_refptr<Video> GenerateAlignedReferenceVideo(
225     const rtc::scoped_refptr<Video>& reference_video,
226     const rtc::scoped_refptr<Video>& test_video) {
227   return GenerateAlignedReferenceVideo(
228       reference_video, FindMatchingFrameIndices(reference_video, test_video));
229 }
230 
GenerateAlignedReferenceVideo(const rtc::scoped_refptr<Video> & reference_video,const std::vector<size_t> & indices)231 rtc::scoped_refptr<Video> GenerateAlignedReferenceVideo(
232     const rtc::scoped_refptr<Video>& reference_video,
233     const std::vector<size_t>& indices) {
234   return ReorderVideo(reference_video, indices);
235 }
236 
237 }  // namespace test
238 }  // namespace webrtc
239