/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "mediautils/TimerThread.h" #define LOG_TAG "TimeCheck" #include #include #include #include #include #include #include #include #include #include #include #if defined(__ANDROID__) #include "debuggerd/handler.h" #endif namespace android::mediautils { // Note: The sum of kDefaultTimeOutDurationMs and kDefaultSecondChanceDurationMs // should be no less than 2 seconds, otherwise spurious timeouts // may occur with system suspend. static constexpr int kDefaultTimeoutDurationMs = 3000; // Due to suspend abort not incrementing the monotonic clock, // we allow another second chance timeout after the first timeout expires. // // The total timeout is therefore kDefaultTimeoutDuration + kDefaultSecondChanceDuration, // and the result is more stable when the monotonic clock increments during suspend. // static constexpr int kDefaultSecondChanceDurationMs = 2000; /* static */ TimeCheck::Duration TimeCheck::getDefaultTimeoutDuration() { static constinit std::atomic defaultTimeoutDurationMs{}; auto defaultMs = defaultTimeoutDurationMs.load(std::memory_order_relaxed); if (defaultMs == 0) { defaultMs = property_get_int32( "audio.timecheck.timeout_duration_ms", kDefaultTimeoutDurationMs); if (defaultMs < 1) defaultMs = kDefaultTimeoutDurationMs; defaultTimeoutDurationMs.store(defaultMs, std::memory_order_relaxed); } return std::chrono::milliseconds(defaultMs); } /* static */ TimeCheck::Duration TimeCheck::getDefaultSecondChanceDuration() { static constinit std::atomic defaultSecondChanceDurationMs{}; auto defaultMs = defaultSecondChanceDurationMs.load(std::memory_order_relaxed); if (defaultMs == 0) { defaultMs = property_get_int32( "audio.timecheck.second_chance_duration_ms", kDefaultSecondChanceDurationMs); if (defaultMs < 1) defaultMs = kDefaultSecondChanceDurationMs; defaultSecondChanceDurationMs.store(defaultMs, std::memory_order_relaxed); } return std::chrono::milliseconds(defaultMs); } // This function appropriately signals a pid to dump a backtrace if we are // running on device (and the HAL exists). If we are not running on an Android // device, there is no HAL to signal (so we do nothing). static inline void signalAudioHAL([[maybe_unused]] pid_t pid) { #if defined(__ANDROID__) sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0}); #endif } /** * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point. */ std::string formatTime(std::chrono::system_clock::time_point t) { auto time_string = audio_utils_time_string_from_ns( std::chrono::nanoseconds(t.time_since_epoch()).count()); // The time string is 19 characters (including null termination). // Example: "03-27 16:47:06.187" // MM DD HH MM SS MS // We offset by 6 to get HH:MM:SS.MSc // return time_string.time + 6; // offset to remove month/day. } /** * Finds the end of the common time prefix. * * This is as an option to remove the common time prefix to avoid * unnecessary duplicated strings. * * \param time1 a time string * \param time2 a time string * \return the position where the common time prefix ends. For abbreviated * printing of time2, offset the character pointer by this position. */ static size_t commonTimePrefixPosition(std::string_view time1, std::string_view time2) { const size_t endPos = std::min(time1.size(), time2.size()); size_t i; // Find location of the first mismatch between strings for (i = 0; ; ++i) { if (i == endPos) { return i; // strings match completely to the length of one of the strings. } if (time1[i] != time2[i]) { break; } if (time1[i] == '\0') { return i; // "printed" strings match completely. No need to check further. } } // Go backwards until we find a delimeter or space. for (; i > 0 && isdigit(time1[i]) // still a number && time1[i - 1] != ' ' ; --i) { } return i; } /** * Returns the unique suffix of time2 that isn't present in time1. * * If time2 is identical to time1, then an empty string_view is returned. * This method is used to elide the common prefix when printing times. */ std::string_view timeSuffix(std::string_view time1, std::string_view time2) { const size_t pos = commonTimePrefixPosition(time1, time2); return time2.substr(pos); } // Audio HAL server pids vector used to generate audio HAL processes tombstone // when audioserver watchdog triggers. // We use a lockless storage to avoid potential deadlocks in the context of watchdog // trigger. // Protection again simultaneous writes is not needed given one update takes place // during AudioFlinger construction and other comes necessarily later once the IAudioFlinger // interface is available. // The use of an atomic index just guaranties that current vector is fully initialized // when read. /* static */ void TimeCheck::accessAudioHalPids(std::vector* pids, bool update) { static constexpr int kNumAudioHalPidsVectors = 3; static std::vector audioHalPids[kNumAudioHalPidsVectors]; static std::atomic curAudioHalPids = 0; if (update) { audioHalPids[(curAudioHalPids++ + 1) % kNumAudioHalPidsVectors] = *pids; } else { *pids = audioHalPids[curAudioHalPids % kNumAudioHalPidsVectors]; } } /* static */ void TimeCheck::setAudioHalPids(const std::vector& pids) { accessAudioHalPids(&(const_cast&>(pids)), true); } /* static */ std::vector TimeCheck::getAudioHalPids() { std::vector pids; accessAudioHalPids(&pids, false); return pids; } /* static */ std::string TimeCheck::signalAudioHals() { std::vector pids = getAudioHalPids(); std::string halPids; if (pids.size() != 0) { for (const auto& pid : pids) { ALOGI("requesting tombstone for pid: %d", pid); halPids.append(std::to_string(pid)).append(" "); signalAudioHAL(pid); } // Allow time to complete, usually the caller is forcing restart afterwards. sleep(1); } return halPids; } /* static */ TimerThread& TimeCheck::getTimeCheckThread() { static TimerThread sTimeCheckThread{}; return sTimeCheckThread; } /* static */ std::string TimeCheck::toString() { // note pending and retired are individually locked for maximum concurrency, // snapshot is not instantaneous at a single time. return getTimeCheckThread().getSnapshotAnalysis().toString(); } TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration, Duration secondChanceDuration, bool crashOnTimeout) : mTimeCheckHandler{ std::make_shared( tag, std::move(onTimer), crashOnTimeout, requestedTimeoutDuration, secondChanceDuration, std::chrono::system_clock::now(), getThreadIdWrapper()) } , mTimerHandle(requestedTimeoutDuration.count() == 0 /* for TimeCheck we don't consider a non-zero secondChanceDuration here */ ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag) : getTimeCheckThread().scheduleTask( mTimeCheckHandler->tag, // Pass in all the arguments by value to this task for safety. // The thread could call the callback before the constructor is finished. // The destructor is not blocked on callback. [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) { timeCheckHandler->onTimeout(timerHandle); }, requestedTimeoutDuration, secondChanceDuration)) {} TimeCheck::~TimeCheck() { if (mTimeCheckHandler) { mTimeCheckHandler->onCancel(mTimerHandle); } } /* static */ std::string TimeCheck::analyzeTimeouts( float requestedTimeoutMs, float secondChanceMs, float elapsedSteadyMs, float elapsedSystemMs) { // Track any OS clock issues with suspend. // It is possible that the elapsedSystemMs is much greater than elapsedSteadyMs if // a suspend occurs; however, we always expect the timeout ms should always be slightly // less than the elapsed steady ms regardless of whether a suspend occurs or not. const float totalTimeoutMs = requestedTimeoutMs + secondChanceMs; std::string s = std::format( "Timeout ms {:.2f} ({:.2f} + {:.2f})" " elapsed steady ms {:.4f} elapsed system ms {:.4f}", totalTimeoutMs, requestedTimeoutMs, secondChanceMs, elapsedSteadyMs, elapsedSystemMs); // Is there something unusual? static constexpr float TOLERANCE_CONTEXT_SWITCH_MS = 200.f; if (totalTimeoutMs > elapsedSteadyMs || totalTimeoutMs > elapsedSystemMs) { s.append("\nError: early expiration - " "totalTimeoutMs should be less than elapsed time"); } if (elapsedSteadyMs > elapsedSystemMs + TOLERANCE_CONTEXT_SWITCH_MS) { s.append("\nWarning: steady time should not advance faster than system time"); } // This has been found in suspend stress testing. if (elapsedSteadyMs > totalTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { s.append("\nWarning: steady time significantly exceeds timeout " "- possible thread stall or aborted suspend"); } // This has been found in suspend stress testing. if (elapsedSystemMs > totalTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { s.append("\nInformation: system time significantly exceeds timeout " "- possible suspend"); } return s; } // To avoid any potential race conditions, the timer handle // (expiration = clock steady start + timeout) is passed into the callback. void TimeCheck::TimeCheckHandler::onCancel(TimerThread::Handle timerHandle) const { if (TimeCheck::getTimeCheckThread().cancelTask(timerHandle) && onTimer) { const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now(); const float elapsedSteadyMs = std::chrono::duration_cast( endSteadyTime - timerHandle + timeoutDuration).count(); // send the elapsed steady time for statistics. onTimer(false /* timeout */, elapsedSteadyMs); } } // To avoid any potential race conditions, the timer handle // (expiration = clock steady start + timeout) is passed into the callback. void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) const { const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now(); const std::chrono::system_clock::time_point endSystemTime = std::chrono::system_clock::now(); // timerHandle incorporates the timeout const float elapsedSteadyMs = std::chrono::duration_cast( endSteadyTime - (timerHandle - timeoutDuration)).count(); const float elapsedSystemMs = std::chrono::duration_cast( endSystemTime - startSystemTime).count(); const float requestedTimeoutMs = std::chrono::duration_cast( timeoutDuration).count(); const float secondChanceMs = std::chrono::duration_cast( secondChanceDuration).count(); if (onTimer) { onTimer(true /* timeout */, elapsedSteadyMs); } if (!crashOnTimeout) return; // Generate the TimerThread summary string early before sending signals to the // HAL processes which can affect thread behavior. const auto snapshotAnalysis = getTimeCheckThread().getSnapshotAnalysis(4 /* retiredCount */); // Generate audio HAL processes tombstones. std::string halPids = signalAudioHals(); if (!halPids.empty()) { halPids = "HAL pids [ " + halPids + "]"; } else { halPids = "No HAL process pids available"; ALOGI("%s", (halPids + ", skipping tombstones").c_str()); } LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str()); // Create abort message string - caution: this can be very large. const std::string abortMessage = std::string("TimeCheck timeout for ") .append(tag) .append(" scheduled ").append(formatTime(startSystemTime)) .append(" on thread ").append(std::to_string(tid)).append("\n") .append(analyzeTimeouts(requestedTimeoutMs, secondChanceMs, elapsedSteadyMs, elapsedSystemMs)).append("\n") .append(halPids).append("\n") .append(snapshotAnalysis.toString()); // In many cases, the initial timeout stack differs from the abort backtrace because // (1) the time difference between initial timeout and the final abort signal // and (2) signalling the HAL audio service may cause // the thread to unblock and continue. // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h: // Log message text may be truncated to less than an // implementation-specific limit (1023 bytes). // // Here, we send the string through android-base/logging.h LOG() // to avoid the size limitation. LOG(FATAL) does an abort whereas // LOG(FATAL_WITHOUT_ABORT) does not abort. static constexpr pid_t invalidPid = TimerThread::SnapshotAnalysis::INVALID_PID; pid_t tidToAbort = invalidPid; if (snapshotAnalysis.suspectTid != invalidPid) { tidToAbort = snapshotAnalysis.suspectTid; } else if (snapshotAnalysis.timeoutTid != invalidPid) { tidToAbort = snapshotAnalysis.timeoutTid; } LOG(FATAL_WITHOUT_ABORT) << abortMessage; const auto ret = abortTid(tidToAbort); if (ret < 0) { LOG(FATAL) << "TimeCheck thread signal failed, aborting process. " "errno: " << errno << base::ErrnoNumberAsString(errno); } } // Automatically create a TimeCheck class for a class and method. // This is used for Audio HAL support. mediautils::TimeCheck makeTimeCheckStatsForClassMethod( std::string_view className, std::string_view methodName) { std::shared_ptr> statistics = mediautils::getStatisticsForClass(className); if (!statistics) return {}; // empty TimeCheck. return mediautils::TimeCheck( FixedString62(className).append("::").append(methodName), [ safeMethodName = FixedString30(methodName), stats = std::move(statistics) ] (bool timeout, float elapsedMs) { if (timeout) { ; // ignored, there is no timeout value. } else { stats->event(safeMethodName.asStringView(), elapsedMs); } }, {} /* timeoutDuration */, {} /* secondChanceDuration */, false /* crashOnTimeout */); } } // namespace android::mediautils