1/* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12#import "RTCVideoEncoderH264.h" 13 14#import <VideoToolbox/VideoToolbox.h> 15#include <vector> 16 17#if defined(WEBRTC_IOS) 18#import "helpers/UIDevice+RTCDevice.h" 19#endif 20#import "RTCCodecSpecificInfoH264.h" 21#import "RTCH264ProfileLevelId.h" 22#import "api/peerconnection/RTCVideoCodecInfo+Private.h" 23#import "base/RTCCodecSpecificInfo.h" 24#import "base/RTCI420Buffer.h" 25#import "base/RTCVideoEncoder.h" 26#import "base/RTCVideoFrame.h" 27#import "base/RTCVideoFrameBuffer.h" 28#import "components/video_frame_buffer/RTCCVPixelBuffer.h" 29#import "helpers.h" 30 31#include "api/video_codecs/h264_profile_level_id.h" 32#include "common_video/h264/h264_bitstream_parser.h" 33#include "common_video/include/bitrate_adjuster.h" 34#include "modules/video_coding/include/video_error_codes.h" 35#include "rtc_base/buffer.h" 36#include "rtc_base/logging.h" 37#include "rtc_base/time_utils.h" 38#include "sdk/objc/components/video_codec/nalu_rewriter.h" 39#include "third_party/libyuv/include/libyuv/convert_from.h" 40 41@interface RTC_OBJC_TYPE (RTCVideoEncoderH264) 42() 43 44 - (void)frameWasEncoded : (OSStatus)status flags : (VTEncodeInfoFlags)infoFlags sampleBuffer 45 : (CMSampleBufferRef)sampleBuffer codecSpecificInfo 46 : (id<RTC_OBJC_TYPE(RTCCodecSpecificInfo)>)codecSpecificInfo width : (int32_t)width height 47 : (int32_t)height renderTimeMs : (int64_t)renderTimeMs timestamp : (uint32_t)timestamp rotation 48 : (RTCVideoRotation)rotation; 49 50@end 51 52namespace { // anonymous namespace 53 54// The ratio between kVTCompressionPropertyKey_DataRateLimits and 55// kVTCompressionPropertyKey_AverageBitRate. The data rate limit is set higher 56// than the average bit rate to avoid undershooting the target. 57const float kLimitToAverageBitRateFactor = 1.5f; 58// These thresholds deviate from the default h264 QP thresholds, as they 59// have been found to work better on devices that support VideoToolbox 60const int kLowH264QpThreshold = 28; 61const int kHighH264QpThreshold = 39; 62 63const OSType kNV12PixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; 64 65// Struct that we pass to the encoder per frame to encode. We receive it again 66// in the encoder callback. 67struct RTCFrameEncodeParams { 68 RTCFrameEncodeParams(RTC_OBJC_TYPE(RTCVideoEncoderH264) * e, 69 RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) * csi, 70 int32_t w, 71 int32_t h, 72 int64_t rtms, 73 uint32_t ts, 74 RTCVideoRotation r) 75 : encoder(e), width(w), height(h), render_time_ms(rtms), timestamp(ts), rotation(r) { 76 if (csi) { 77 codecSpecificInfo = csi; 78 } else { 79 codecSpecificInfo = [[RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) alloc] init]; 80 } 81 } 82 83 RTC_OBJC_TYPE(RTCVideoEncoderH264) * encoder; 84 RTC_OBJC_TYPE(RTCCodecSpecificInfoH264) * codecSpecificInfo; 85 int32_t width; 86 int32_t height; 87 int64_t render_time_ms; 88 uint32_t timestamp; 89 RTCVideoRotation rotation; 90}; 91 92// We receive I420Frames as input, but we need to feed CVPixelBuffers into the 93// encoder. This performs the copy and format conversion. 94// TODO(tkchin): See if encoder will accept i420 frames and compare performance. 95bool CopyVideoFrameToNV12PixelBuffer(id<RTC_OBJC_TYPE(RTCI420Buffer)> frameBuffer, 96 CVPixelBufferRef pixelBuffer) { 97 RTC_DCHECK(pixelBuffer); 98 RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), kNV12PixelFormat); 99 RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), frameBuffer.height); 100 RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), frameBuffer.width); 101 102 CVReturn cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, 0); 103 if (cvRet != kCVReturnSuccess) { 104 RTC_LOG(LS_ERROR) << "Failed to lock base address: " << cvRet; 105 return false; 106 } 107 uint8_t *dstY = reinterpret_cast<uint8_t *>(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0)); 108 int dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0); 109 uint8_t *dstUV = reinterpret_cast<uint8_t *>(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1)); 110 int dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1); 111 // Convert I420 to NV12. 112 int ret = libyuv::I420ToNV12(frameBuffer.dataY, 113 frameBuffer.strideY, 114 frameBuffer.dataU, 115 frameBuffer.strideU, 116 frameBuffer.dataV, 117 frameBuffer.strideV, 118 dstY, 119 dstStrideY, 120 dstUV, 121 dstStrideUV, 122 frameBuffer.width, 123 frameBuffer.height); 124 CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); 125 if (ret) { 126 RTC_LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret; 127 return false; 128 } 129 return true; 130} 131 132CVPixelBufferRef CreatePixelBuffer(CVPixelBufferPoolRef pixel_buffer_pool) { 133 if (!pixel_buffer_pool) { 134 RTC_LOG(LS_ERROR) << "Failed to get pixel buffer pool."; 135 return nullptr; 136 } 137 CVPixelBufferRef pixel_buffer; 138 CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool, &pixel_buffer); 139 if (ret != kCVReturnSuccess) { 140 RTC_LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret; 141 // We probably want to drop frames here, since failure probably means 142 // that the pool is empty. 143 return nullptr; 144 } 145 return pixel_buffer; 146} 147 148// This is the callback function that VideoToolbox calls when encode is 149// complete. From inspection this happens on its own queue. 150void compressionOutputCallback(void *encoder, 151 void *params, 152 OSStatus status, 153 VTEncodeInfoFlags infoFlags, 154 CMSampleBufferRef sampleBuffer) { 155 if (!params) { 156 // If there are pending callbacks when the encoder is destroyed, this can happen. 157 return; 158 } 159 std::unique_ptr<RTCFrameEncodeParams> encodeParams( 160 reinterpret_cast<RTCFrameEncodeParams *>(params)); 161 [encodeParams->encoder frameWasEncoded:status 162 flags:infoFlags 163 sampleBuffer:sampleBuffer 164 codecSpecificInfo:encodeParams->codecSpecificInfo 165 width:encodeParams->width 166 height:encodeParams->height 167 renderTimeMs:encodeParams->render_time_ms 168 timestamp:encodeParams->timestamp 169 rotation:encodeParams->rotation]; 170} 171 172// Extract VideoToolbox profile out of the webrtc::SdpVideoFormat. If there is 173// no specific VideoToolbox profile for the specified level, AutoLevel will be 174// returned. The user must initialize the encoder with a resolution and 175// framerate conforming to the selected H264 level regardless. 176CFStringRef ExtractProfile(const webrtc::H264ProfileLevelId &profile_level_id) { 177 switch (profile_level_id.profile) { 178 case webrtc::H264Profile::kProfileConstrainedBaseline: 179 case webrtc::H264Profile::kProfileBaseline: 180 switch (profile_level_id.level) { 181 case webrtc::H264Level::kLevel3: 182 return kVTProfileLevel_H264_Baseline_3_0; 183 case webrtc::H264Level::kLevel3_1: 184 return kVTProfileLevel_H264_Baseline_3_1; 185 case webrtc::H264Level::kLevel3_2: 186 return kVTProfileLevel_H264_Baseline_3_2; 187 case webrtc::H264Level::kLevel4: 188 return kVTProfileLevel_H264_Baseline_4_0; 189 case webrtc::H264Level::kLevel4_1: 190 return kVTProfileLevel_H264_Baseline_4_1; 191 case webrtc::H264Level::kLevel4_2: 192 return kVTProfileLevel_H264_Baseline_4_2; 193 case webrtc::H264Level::kLevel5: 194 return kVTProfileLevel_H264_Baseline_5_0; 195 case webrtc::H264Level::kLevel5_1: 196 return kVTProfileLevel_H264_Baseline_5_1; 197 case webrtc::H264Level::kLevel5_2: 198 return kVTProfileLevel_H264_Baseline_5_2; 199 case webrtc::H264Level::kLevel1: 200 case webrtc::H264Level::kLevel1_b: 201 case webrtc::H264Level::kLevel1_1: 202 case webrtc::H264Level::kLevel1_2: 203 case webrtc::H264Level::kLevel1_3: 204 case webrtc::H264Level::kLevel2: 205 case webrtc::H264Level::kLevel2_1: 206 case webrtc::H264Level::kLevel2_2: 207 return kVTProfileLevel_H264_Baseline_AutoLevel; 208 } 209 210 case webrtc::H264Profile::kProfileMain: 211 switch (profile_level_id.level) { 212 case webrtc::H264Level::kLevel3: 213 return kVTProfileLevel_H264_Main_3_0; 214 case webrtc::H264Level::kLevel3_1: 215 return kVTProfileLevel_H264_Main_3_1; 216 case webrtc::H264Level::kLevel3_2: 217 return kVTProfileLevel_H264_Main_3_2; 218 case webrtc::H264Level::kLevel4: 219 return kVTProfileLevel_H264_Main_4_0; 220 case webrtc::H264Level::kLevel4_1: 221 return kVTProfileLevel_H264_Main_4_1; 222 case webrtc::H264Level::kLevel4_2: 223 return kVTProfileLevel_H264_Main_4_2; 224 case webrtc::H264Level::kLevel5: 225 return kVTProfileLevel_H264_Main_5_0; 226 case webrtc::H264Level::kLevel5_1: 227 return kVTProfileLevel_H264_Main_5_1; 228 case webrtc::H264Level::kLevel5_2: 229 return kVTProfileLevel_H264_Main_5_2; 230 case webrtc::H264Level::kLevel1: 231 case webrtc::H264Level::kLevel1_b: 232 case webrtc::H264Level::kLevel1_1: 233 case webrtc::H264Level::kLevel1_2: 234 case webrtc::H264Level::kLevel1_3: 235 case webrtc::H264Level::kLevel2: 236 case webrtc::H264Level::kLevel2_1: 237 case webrtc::H264Level::kLevel2_2: 238 return kVTProfileLevel_H264_Main_AutoLevel; 239 } 240 241 case webrtc::H264Profile::kProfileConstrainedHigh: 242 case webrtc::H264Profile::kProfileHigh: 243 case webrtc::H264Profile::kProfilePredictiveHigh444: 244 switch (profile_level_id.level) { 245 case webrtc::H264Level::kLevel3: 246 return kVTProfileLevel_H264_High_3_0; 247 case webrtc::H264Level::kLevel3_1: 248 return kVTProfileLevel_H264_High_3_1; 249 case webrtc::H264Level::kLevel3_2: 250 return kVTProfileLevel_H264_High_3_2; 251 case webrtc::H264Level::kLevel4: 252 return kVTProfileLevel_H264_High_4_0; 253 case webrtc::H264Level::kLevel4_1: 254 return kVTProfileLevel_H264_High_4_1; 255 case webrtc::H264Level::kLevel4_2: 256 return kVTProfileLevel_H264_High_4_2; 257 case webrtc::H264Level::kLevel5: 258 return kVTProfileLevel_H264_High_5_0; 259 case webrtc::H264Level::kLevel5_1: 260 return kVTProfileLevel_H264_High_5_1; 261 case webrtc::H264Level::kLevel5_2: 262 return kVTProfileLevel_H264_High_5_2; 263 case webrtc::H264Level::kLevel1: 264 case webrtc::H264Level::kLevel1_b: 265 case webrtc::H264Level::kLevel1_1: 266 case webrtc::H264Level::kLevel1_2: 267 case webrtc::H264Level::kLevel1_3: 268 case webrtc::H264Level::kLevel2: 269 case webrtc::H264Level::kLevel2_1: 270 case webrtc::H264Level::kLevel2_2: 271 return kVTProfileLevel_H264_High_AutoLevel; 272 } 273 } 274} 275 276// The function returns the max allowed sample rate (pixels per second) that 277// can be processed by given encoder with `profile_level_id`. 278// See https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.264-201610-S!!PDF-E&type=items 279// for details. 280NSUInteger GetMaxSampleRate(const webrtc::H264ProfileLevelId &profile_level_id) { 281 switch (profile_level_id.level) { 282 case webrtc::H264Level::kLevel3: 283 return 10368000; 284 case webrtc::H264Level::kLevel3_1: 285 return 27648000; 286 case webrtc::H264Level::kLevel3_2: 287 return 55296000; 288 case webrtc::H264Level::kLevel4: 289 case webrtc::H264Level::kLevel4_1: 290 return 62914560; 291 case webrtc::H264Level::kLevel4_2: 292 return 133693440; 293 case webrtc::H264Level::kLevel5: 294 return 150994944; 295 case webrtc::H264Level::kLevel5_1: 296 return 251658240; 297 case webrtc::H264Level::kLevel5_2: 298 return 530841600; 299 case webrtc::H264Level::kLevel1: 300 case webrtc::H264Level::kLevel1_b: 301 case webrtc::H264Level::kLevel1_1: 302 case webrtc::H264Level::kLevel1_2: 303 case webrtc::H264Level::kLevel1_3: 304 case webrtc::H264Level::kLevel2: 305 case webrtc::H264Level::kLevel2_1: 306 case webrtc::H264Level::kLevel2_2: 307 // Zero means auto rate setting. 308 return 0; 309 } 310} 311} // namespace 312 313@implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) { 314 RTC_OBJC_TYPE(RTCVideoCodecInfo) * _codecInfo; 315 std::unique_ptr<webrtc::BitrateAdjuster> _bitrateAdjuster; 316 uint32_t _targetBitrateBps; 317 uint32_t _encoderBitrateBps; 318 uint32_t _encoderFrameRate; 319 uint32_t _maxAllowedFrameRate; 320 RTCH264PacketizationMode _packetizationMode; 321 absl::optional<webrtc::H264ProfileLevelId> _profile_level_id; 322 RTCVideoEncoderCallback _callback; 323 int32_t _width; 324 int32_t _height; 325 VTCompressionSessionRef _compressionSession; 326 CVPixelBufferPoolRef _pixelBufferPool; 327 RTCVideoCodecMode _mode; 328 329 webrtc::H264BitstreamParser _h264BitstreamParser; 330 std::vector<uint8_t> _frameScaleBuffer; 331} 332 333// .5 is set as a mininum to prevent overcompensating for large temporary 334// overshoots. We don't want to degrade video quality too badly. 335// .95 is set to prevent oscillations. When a lower bitrate is set on the 336// encoder than previously set, its output seems to have a brief period of 337// drastically reduced bitrate, so we want to avoid that. In steady state 338// conditions, 0.95 seems to give us better overall bitrate over long periods 339// of time. 340- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo { 341 if (self = [super init]) { 342 _codecInfo = codecInfo; 343 _bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95)); 344 _packetizationMode = RTCH264PacketizationModeNonInterleaved; 345 _profile_level_id = 346 webrtc::ParseSdpForH264ProfileLevelId([codecInfo nativeSdpVideoFormat].parameters); 347 RTC_DCHECK(_profile_level_id); 348 RTC_LOG(LS_INFO) << "Using profile " << CFStringToString(ExtractProfile(*_profile_level_id)); 349 RTC_CHECK([codecInfo.name isEqualToString:kRTCVideoCodecH264Name]); 350 } 351 return self; 352} 353 354- (void)dealloc { 355 [self destroyCompressionSession]; 356} 357 358- (NSInteger)startEncodeWithSettings:(RTC_OBJC_TYPE(RTCVideoEncoderSettings) *)settings 359 numberOfCores:(int)numberOfCores { 360 RTC_DCHECK(settings); 361 RTC_DCHECK([settings.name isEqualToString:kRTCVideoCodecH264Name]); 362 363 _width = settings.width; 364 _height = settings.height; 365 _mode = settings.mode; 366 367 uint32_t aligned_width = (((_width + 15) >> 4) << 4); 368 uint32_t aligned_height = (((_height + 15) >> 4) << 4); 369 _maxAllowedFrameRate = static_cast<uint32_t>(GetMaxSampleRate(*_profile_level_id) / 370 (aligned_width * aligned_height)); 371 372 // We can only set average bitrate on the HW encoder. 373 _targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps. 374 _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); 375 _encoderFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate); 376 if (settings.maxFramerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) { 377 RTC_LOG(LS_WARNING) << "Initial encoder frame rate setting " << settings.maxFramerate 378 << " is larger than the " 379 << "maximal allowed frame rate " << _maxAllowedFrameRate << "."; 380 } 381 382 // TODO(tkchin): Try setting payload size via 383 // kVTCompressionPropertyKey_MaxH264SliceBytes. 384 385 return [self resetCompressionSessionWithPixelFormat:kNV12PixelFormat]; 386} 387 388- (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame 389 codecSpecificInfo:(nullable id<RTC_OBJC_TYPE(RTCCodecSpecificInfo)>)codecSpecificInfo 390 frameTypes:(NSArray<NSNumber *> *)frameTypes { 391 if (!_callback || !_compressionSession) { 392 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 393 } 394 BOOL isKeyframeRequired = NO; 395 396 // Get a pixel buffer from the pool and copy frame data over. 397 if ([self resetCompressionSessionIfNeededWithFrame:frame]) { 398 isKeyframeRequired = YES; 399 } 400 401 CVPixelBufferRef pixelBuffer = nullptr; 402 if ([frame.buffer isKindOfClass:[RTC_OBJC_TYPE(RTCCVPixelBuffer) class]]) { 403 // Native frame buffer 404 RTC_OBJC_TYPE(RTCCVPixelBuffer) *rtcPixelBuffer = 405 (RTC_OBJC_TYPE(RTCCVPixelBuffer) *)frame.buffer; 406 if (![rtcPixelBuffer requiresCropping]) { 407 // This pixel buffer might have a higher resolution than what the 408 // compression session is configured to. The compression session can 409 // handle that and will output encoded frames in the configured 410 // resolution regardless of the input pixel buffer resolution. 411 pixelBuffer = rtcPixelBuffer.pixelBuffer; 412 CVBufferRetain(pixelBuffer); 413 } else { 414 // Cropping required, we need to crop and scale to a new pixel buffer. 415 pixelBuffer = CreatePixelBuffer(_pixelBufferPool); 416 if (!pixelBuffer) { 417 return WEBRTC_VIDEO_CODEC_ERROR; 418 } 419 int dstWidth = CVPixelBufferGetWidth(pixelBuffer); 420 int dstHeight = CVPixelBufferGetHeight(pixelBuffer); 421 if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) { 422 int size = 423 [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight]; 424 _frameScaleBuffer.resize(size); 425 } else { 426 _frameScaleBuffer.clear(); 427 } 428 _frameScaleBuffer.shrink_to_fit(); 429 if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer withTempBuffer:_frameScaleBuffer.data()]) { 430 CVBufferRelease(pixelBuffer); 431 return WEBRTC_VIDEO_CODEC_ERROR; 432 } 433 } 434 } 435 436 if (!pixelBuffer) { 437 // We did not have a native frame buffer 438 RTC_DCHECK_EQ(frame.width, _width); 439 RTC_DCHECK_EQ(frame.height, _height); 440 pixelBuffer = CreatePixelBuffer(_pixelBufferPool); 441 if (!pixelBuffer) { 442 return WEBRTC_VIDEO_CODEC_ERROR; 443 } 444 RTC_DCHECK(pixelBuffer); 445 if (!CopyVideoFrameToNV12PixelBuffer([frame.buffer toI420], pixelBuffer)) { 446 RTC_LOG(LS_ERROR) << "Failed to copy frame data."; 447 CVBufferRelease(pixelBuffer); 448 return WEBRTC_VIDEO_CODEC_ERROR; 449 } 450 } 451 452 // Check if we need a keyframe. 453 if (!isKeyframeRequired && frameTypes) { 454 for (NSNumber *frameType in frameTypes) { 455 if ((RTCFrameType)frameType.intValue == RTCFrameTypeVideoFrameKey) { 456 isKeyframeRequired = YES; 457 break; 458 } 459 } 460 } 461 462 CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000); 463 CFDictionaryRef frameProperties = nullptr; 464 if (isKeyframeRequired) { 465 CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; 466 CFTypeRef values[] = {kCFBooleanTrue}; 467 frameProperties = CreateCFTypeDictionary(keys, values, 1); 468 } 469 470 std::unique_ptr<RTCFrameEncodeParams> encodeParams; 471 encodeParams.reset(new RTCFrameEncodeParams(self, 472 codecSpecificInfo, 473 _width, 474 _height, 475 frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 476 frame.timeStamp, 477 frame.rotation)); 478 encodeParams->codecSpecificInfo.packetizationMode = _packetizationMode; 479 480 // Update the bitrate if needed. 481 [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:_encoderFrameRate]; 482 483 OSStatus status = VTCompressionSessionEncodeFrame(_compressionSession, 484 pixelBuffer, 485 presentationTimeStamp, 486 kCMTimeInvalid, 487 frameProperties, 488 encodeParams.release(), 489 nullptr); 490 if (frameProperties) { 491 CFRelease(frameProperties); 492 } 493 if (pixelBuffer) { 494 CVBufferRelease(pixelBuffer); 495 } 496 497 if (status == kVTInvalidSessionErr) { 498 // This error occurs when entering foreground after backgrounding the app. 499 RTC_LOG(LS_ERROR) << "Invalid compression session, resetting."; 500 [self resetCompressionSessionWithPixelFormat:[self pixelFormatOfFrame:frame]]; 501 502 return WEBRTC_VIDEO_CODEC_NO_OUTPUT; 503 } else if (status == kVTVideoEncoderMalfunctionErr) { 504 // Sometimes the encoder malfunctions and needs to be restarted. 505 RTC_LOG(LS_ERROR) 506 << "Encountered video encoder malfunction error. Resetting compression session."; 507 [self resetCompressionSessionWithPixelFormat:[self pixelFormatOfFrame:frame]]; 508 509 return WEBRTC_VIDEO_CODEC_NO_OUTPUT; 510 } else if (status != noErr) { 511 RTC_LOG(LS_ERROR) << "Failed to encode frame with code: " << status; 512 return WEBRTC_VIDEO_CODEC_ERROR; 513 } 514 return WEBRTC_VIDEO_CODEC_OK; 515} 516 517- (void)setCallback:(RTCVideoEncoderCallback)callback { 518 _callback = callback; 519} 520 521- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate { 522 _targetBitrateBps = 1000 * bitrateKbit; 523 _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); 524 if (framerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) { 525 RTC_LOG(LS_WARNING) << "Encoder frame rate setting " << framerate << " is larger than the " 526 << "maximal allowed frame rate " << _maxAllowedFrameRate << "."; 527 } 528 framerate = MIN(framerate, _maxAllowedFrameRate); 529 [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:framerate]; 530 return WEBRTC_VIDEO_CODEC_OK; 531} 532 533- (NSInteger)resolutionAlignment { 534 return 1; 535} 536 537- (BOOL)applyAlignmentToAllSimulcastLayers { 538 return NO; 539} 540 541- (BOOL)supportsNativeHandle { 542 return YES; 543} 544 545#pragma mark - Private 546 547- (NSInteger)releaseEncoder { 548 // Need to destroy so that the session is invalidated and won't use the 549 // callback anymore. Do not remove callback until the session is invalidated 550 // since async encoder callbacks can occur until invalidation. 551 [self destroyCompressionSession]; 552 _callback = nullptr; 553 return WEBRTC_VIDEO_CODEC_OK; 554} 555 556- (OSType)pixelFormatOfFrame:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame { 557 // Use NV12 for non-native frames. 558 if ([frame.buffer isKindOfClass:[RTC_OBJC_TYPE(RTCCVPixelBuffer) class]]) { 559 RTC_OBJC_TYPE(RTCCVPixelBuffer) *rtcPixelBuffer = 560 (RTC_OBJC_TYPE(RTCCVPixelBuffer) *)frame.buffer; 561 return CVPixelBufferGetPixelFormatType(rtcPixelBuffer.pixelBuffer); 562 } 563 564 return kNV12PixelFormat; 565} 566 567- (BOOL)resetCompressionSessionIfNeededWithFrame:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame { 568 BOOL resetCompressionSession = NO; 569 570 // If we're capturing native frames in another pixel format than the compression session is 571 // configured with, make sure the compression session is reset using the correct pixel format. 572 OSType framePixelFormat = [self pixelFormatOfFrame:frame]; 573 574 if (_compressionSession) { 575 // The pool attribute `kCVPixelBufferPixelFormatTypeKey` can contain either an array of pixel 576 // formats or a single pixel format. 577 NSDictionary *poolAttributes = 578 (__bridge NSDictionary *)CVPixelBufferPoolGetPixelBufferAttributes(_pixelBufferPool); 579 id pixelFormats = 580 [poolAttributes objectForKey:(__bridge NSString *)kCVPixelBufferPixelFormatTypeKey]; 581 NSArray<NSNumber *> *compressionSessionPixelFormats = nil; 582 if ([pixelFormats isKindOfClass:[NSArray class]]) { 583 compressionSessionPixelFormats = (NSArray *)pixelFormats; 584 } else if ([pixelFormats isKindOfClass:[NSNumber class]]) { 585 compressionSessionPixelFormats = @[ (NSNumber *)pixelFormats ]; 586 } 587 588 if (![compressionSessionPixelFormats 589 containsObject:[NSNumber numberWithLong:framePixelFormat]]) { 590 resetCompressionSession = YES; 591 RTC_LOG(LS_INFO) << "Resetting compression session due to non-matching pixel format."; 592 } 593 } else { 594 resetCompressionSession = YES; 595 } 596 597 if (resetCompressionSession) { 598 [self resetCompressionSessionWithPixelFormat:framePixelFormat]; 599 } 600 return resetCompressionSession; 601} 602 603- (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat { 604 [self destroyCompressionSession]; 605 606 // Set source image buffer attributes. These attributes will be present on 607 // buffers retrieved from the encoder's pixel buffer pool. 608 NSDictionary *sourceAttributes = @{ 609#if defined(WEBRTC_IOS) && (TARGET_OS_MACCATALYST || TARGET_OS_SIMULATOR) 610 (NSString *)kCVPixelBufferMetalCompatibilityKey : @(YES), 611#elif defined(WEBRTC_IOS) 612 (NSString *)kCVPixelBufferOpenGLESCompatibilityKey : @(YES), 613#elif defined(WEBRTC_MAC) && !defined(WEBRTC_ARCH_ARM64) 614 (NSString *)kCVPixelBufferOpenGLCompatibilityKey : @(YES), 615#endif 616 (NSString *)kCVPixelBufferIOSurfacePropertiesKey : @{}, 617 (NSString *)kCVPixelBufferPixelFormatTypeKey : @(framePixelFormat), 618 }; 619 620 NSDictionary *encoder_specs; 621#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) 622 // Currently hw accl is supported above 360p on mac, below 360p 623 // the compression session will be created with hw accl disabled. 624 encoder_specs = @{ 625 (NSString *)kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder : @(YES), 626 }; 627 628#endif 629 OSStatus status = VTCompressionSessionCreate( 630 nullptr, // use default allocator 631 _width, 632 _height, 633 kCMVideoCodecType_H264, 634 (__bridge CFDictionaryRef)encoder_specs, // use hardware accelerated encoder if available 635 (__bridge CFDictionaryRef)sourceAttributes, 636 nullptr, // use default compressed data allocator 637 compressionOutputCallback, 638 nullptr, 639 &_compressionSession); 640 if (status != noErr) { 641 RTC_LOG(LS_ERROR) << "Failed to create compression session: " << status; 642 return WEBRTC_VIDEO_CODEC_ERROR; 643 } 644#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) 645 CFBooleanRef hwaccl_enabled = nullptr; 646 status = VTSessionCopyProperty(_compressionSession, 647 kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, 648 nullptr, 649 &hwaccl_enabled); 650 if (status == noErr && (CFBooleanGetValue(hwaccl_enabled))) { 651 RTC_LOG(LS_INFO) << "Compression session created with hw accl enabled"; 652 } else { 653 RTC_LOG(LS_INFO) << "Compression session created with hw accl disabled"; 654 } 655#endif 656 [self configureCompressionSession]; 657 658 // The pixel buffer pool is dependent on the compression session so if the session is reset, the 659 // pool should be reset as well. 660 _pixelBufferPool = VTCompressionSessionGetPixelBufferPool(_compressionSession); 661 662 return WEBRTC_VIDEO_CODEC_OK; 663} 664 665- (void)configureCompressionSession { 666 RTC_DCHECK(_compressionSession); 667 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, true); 668 SetVTSessionProperty(_compressionSession, 669 kVTCompressionPropertyKey_ProfileLevel, 670 ExtractProfile(*_profile_level_id)); 671 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, false); 672 [self setEncoderBitrateBps:_targetBitrateBps frameRate:_encoderFrameRate]; 673 // TODO(tkchin): Look at entropy mode and colorspace matrices. 674 // TODO(tkchin): Investigate to see if there's any way to make this work. 675 // May need it to interop with Android. Currently this call just fails. 676 // On inspecting encoder output on iOS8, this value is set to 6. 677 // internal::SetVTSessionProperty(compression_session_, 678 // kVTCompressionPropertyKey_MaxFrameDelayCount, 679 // 1); 680 681 // Set a relatively large value for keyframe emission (7200 frames or 4 minutes). 682 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameInterval, 7200); 683 SetVTSessionProperty( 684 _compressionSession, kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240); 685} 686 687- (void)destroyCompressionSession { 688 if (_compressionSession) { 689 VTCompressionSessionInvalidate(_compressionSession); 690 CFRelease(_compressionSession); 691 _compressionSession = nullptr; 692 _pixelBufferPool = nullptr; 693 } 694} 695 696- (NSString *)implementationName { 697 return @"VideoToolbox"; 698} 699 700- (void)setBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate { 701 if (_encoderBitrateBps != bitrateBps || _encoderFrameRate != frameRate) { 702 [self setEncoderBitrateBps:bitrateBps frameRate:frameRate]; 703 } 704} 705 706- (void)setEncoderBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate { 707 if (_compressionSession) { 708 SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, bitrateBps); 709 710 // With zero `_maxAllowedFrameRate`, we fall back to automatic frame rate detection. 711 if (_maxAllowedFrameRate > 0) { 712 SetVTSessionProperty( 713 _compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, frameRate); 714 } 715 716 // TODO(tkchin): Add a helper method to set array value. 717 int64_t dataLimitBytesPerSecondValue = 718 static_cast<int64_t>(bitrateBps * kLimitToAverageBitRateFactor / 8); 719 CFNumberRef bytesPerSecond = 720 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &dataLimitBytesPerSecondValue); 721 int64_t oneSecondValue = 1; 722 CFNumberRef oneSecond = 723 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &oneSecondValue); 724 const void *nums[2] = {bytesPerSecond, oneSecond}; 725 CFArrayRef dataRateLimits = CFArrayCreate(nullptr, nums, 2, &kCFTypeArrayCallBacks); 726 OSStatus status = VTSessionSetProperty( 727 _compressionSession, kVTCompressionPropertyKey_DataRateLimits, dataRateLimits); 728 if (bytesPerSecond) { 729 CFRelease(bytesPerSecond); 730 } 731 if (oneSecond) { 732 CFRelease(oneSecond); 733 } 734 if (dataRateLimits) { 735 CFRelease(dataRateLimits); 736 } 737 if (status != noErr) { 738 RTC_LOG(LS_ERROR) << "Failed to set data rate limit with code: " << status; 739 } 740 741 _encoderBitrateBps = bitrateBps; 742 _encoderFrameRate = frameRate; 743 } 744} 745 746- (void)frameWasEncoded:(OSStatus)status 747 flags:(VTEncodeInfoFlags)infoFlags 748 sampleBuffer:(CMSampleBufferRef)sampleBuffer 749 codecSpecificInfo:(id<RTC_OBJC_TYPE(RTCCodecSpecificInfo)>)codecSpecificInfo 750 width:(int32_t)width 751 height:(int32_t)height 752 renderTimeMs:(int64_t)renderTimeMs 753 timestamp:(uint32_t)timestamp 754 rotation:(RTCVideoRotation)rotation { 755 RTCVideoEncoderCallback callback = _callback; 756 if (!callback) { 757 return; 758 } 759 if (status != noErr) { 760 RTC_LOG(LS_ERROR) << "H264 encode failed with code: " << status; 761 return; 762 } 763 if (infoFlags & kVTEncodeInfo_FrameDropped) { 764 RTC_LOG(LS_INFO) << "H264 encode dropped frame."; 765 return; 766 } 767 768 BOOL isKeyframe = NO; 769 CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, 0); 770 if (attachments != nullptr && CFArrayGetCount(attachments)) { 771 CFDictionaryRef attachment = 772 static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0)); 773 isKeyframe = !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync); 774 } 775 776 if (isKeyframe) { 777 RTC_LOG(LS_INFO) << "Generated keyframe"; 778 } 779 780 __block std::unique_ptr<rtc::Buffer> buffer = std::make_unique<rtc::Buffer>(); 781 if (!webrtc::H264CMSampleBufferToAnnexBBuffer(sampleBuffer, isKeyframe, buffer.get())) { 782 return; 783 } 784 785 RTC_OBJC_TYPE(RTCEncodedImage) *frame = [[RTC_OBJC_TYPE(RTCEncodedImage) alloc] init]; 786 // This assumes ownership of `buffer` and is responsible for freeing it when done. 787 frame.buffer = [[NSData alloc] initWithBytesNoCopy:buffer->data() 788 length:buffer->size() 789 deallocator:^(void *bytes, NSUInteger size) { 790 buffer.reset(); 791 }]; 792 frame.encodedWidth = width; 793 frame.encodedHeight = height; 794 frame.frameType = isKeyframe ? RTCFrameTypeVideoFrameKey : RTCFrameTypeVideoFrameDelta; 795 frame.captureTimeMs = renderTimeMs; 796 frame.timeStamp = timestamp; 797 frame.rotation = rotation; 798 frame.contentType = (_mode == RTCVideoCodecModeScreensharing) ? RTCVideoContentTypeScreenshare : 799 RTCVideoContentTypeUnspecified; 800 frame.flags = webrtc::VideoSendTiming::kInvalid; 801 802 _h264BitstreamParser.ParseBitstream(*buffer); 803 frame.qp = @(_h264BitstreamParser.GetLastSliceQp().value_or(-1)); 804 805 BOOL res = callback(frame, codecSpecificInfo); 806 if (!res) { 807 RTC_LOG(LS_ERROR) << "Encode callback failed"; 808 return; 809 } 810 _bitrateAdjuster->Update(frame.buffer.length); 811} 812 813- (nullable RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) *)scalingSettings { 814 return [[RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) alloc] 815 initWithThresholdsLow:kLowH264QpThreshold 816 high:kHighH264QpThreshold]; 817} 818 819@end 820