xref: /aosp_15_r20/system/core/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp (revision 00c7fec1bb09f3284aad6a6f96d2f63dfc3650ad)
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "snapuserd_core.h"
18 
19 /*
20  * Readahead is used to optimize the merge of COPY and XOR Ops.
21  *
22  * We create a scratch space of 2MB to store the read-ahead data in the COW
23  * device.
24  *
25  *      +-----------------------+
26  *      |     Header (fixed)    |
27  *      +-----------------------+
28  *      |    Scratch space      |  <-- 2MB
29  *      +-----------------------+
30  *
31  *      Scratch space is as follows:
32  *
33  *      +-----------------------+
34  *      |       Metadata        | <- 4k page
35  *      +-----------------------+
36  *      |       Metadata        | <- 4k page
37  *      +-----------------------+
38  *      |                       |
39  *      |    Read-ahead data    |
40  *      |                       |
41  *      +-----------------------+
42  *
43  *
44  * * ===================================================================
45  *
46  * Example:
47  *
48  * We have 6 copy operations to be executed in OTA. Update-engine
49  * will write to COW file as follows:
50  *
51  * Op-1: 20 -> 23
52  * Op-2: 19 -> 22
53  * Op-3: 18 -> 21
54  * Op-4: 17 -> 20
55  * Op-5: 16 -> 19
56  * Op-6: 15 -> 18
57  *
58  * Read-ahead thread will read all the 6 source blocks and store the data in the
59  * scratch space. Metadata will contain the destination block numbers. Thus,
60  * scratch space will look something like this:
61  *
62  * +--------------+
63  * | Block   23   |
64  * | offset - 1   |
65  * +--------------+
66  * | Block   22   |
67  * | offset - 2   |
68  * +--------------+
69  * | Block   21   |
70  * | offset - 3   |
71  * +--------------+
72  *    ...
73  *    ...
74  * +--------------+
75  * | Data-Block 20| <-- offset - 1
76  * +--------------+
77  * | Data-Block 19| <-- offset - 2
78  * +--------------+
79  * | Data-Block 18| <-- offset - 3
80  * +--------------+
81  *     ...
82  *     ...
83  *
84  * ====================================================================
85  *
86  *
87  *  Read-ahead thread will process the COW Ops in fixed set. Consider
88  *  the following example:
89  *
90  *  +--------------------------+
91  *  |op-1|op-2|op-3|....|op-510|
92  *  +--------------------------+
93  *
94  *  <------ One RA Block ------>
95  *
96  *  RA thread will read 510 ordered COW ops at a time and will store
97  *  the data in the scratch space.
98  *
99  *  RA thread and Merge thread will go lock-step wherein RA thread
100  *  will make sure that 510 COW operation data are read upfront
101  *  and is in memory. Thus, when merge thread will pick up the data
102  *  directly from memory and write it back to base device.
103  *
104  *
105  *  +--------------------------+------------------------------------+
106  *  |op-1|op-2|op-3|....|op-510|op-511|op-512|op-513........|op-1020|
107  *  +--------------------------+------------------------------------+
108  *
109  *  <------Merge 510 Blocks----><-Prepare 510 blocks for merge by RA->
110  *           ^                                  ^
111  *           |                                  |
112  *      Merge thread                        RA thread
113  *
114  * Both Merge and RA thread will strive to work in parallel.
115  *
116  * ===========================================================================
117  *
118  * State transitions and communication between RA thread and Merge thread:
119  *
120  *  Merge Thread                                      RA Thread
121  *  ----------------------------------------------------------------------------
122  *
123  *          |                                         |
124  *    WAIT for RA Block N                     READ one RA Block (N)
125  *        for merge                                   |
126  *          |                                         |
127  *          |                                         |
128  *          <--------------MERGE BEGIN--------READ Block N done(copy to scratch)
129  *          |                                         |
130  *          |                                         |
131  *    Merge Begin Block N                     READ one RA BLock (N+1)
132  *          |                                         |
133  *          |                                         |
134  *          |                                  READ done. Wait for merge complete
135  *          |                                         |
136  *          |                                        WAIT
137  *          |                                         |
138  *    Merge done Block N                              |
139  *          ----------------MERGE READY-------------->|
140  *    WAIT for RA Block N+1                     Copy RA Block (N+1)
141  *        for merge                              to scratch space
142  *          |                                         |
143  *          <---------------MERGE BEGIN---------BLOCK N+1 Done
144  *          |                                         |
145  *          |                                         |
146  *    Merge Begin Block N+1                   READ one RA BLock (N+2)
147  *          |                                         |
148  *          |                                         |
149  *          |                                  READ done. Wait for merge complete
150  *          |                                         |
151  *          |                                        WAIT
152  *          |                                         |
153  *    Merge done Block N+1                            |
154  *          ----------------MERGE READY-------------->|
155  *    WAIT for RA Block N+2                     Copy RA Block (N+2)
156  *        for merge                              to scratch space
157  *          |                                         |
158  *          <---------------MERGE BEGIN---------BLOCK N+2 Done
159  */
160 
161 namespace android {
162 namespace snapshot {
163 
164 using namespace android;
165 using namespace android::dm;
166 using android::base::unique_fd;
167 
MonitorMerge()168 void SnapshotHandler::MonitorMerge() {
169     {
170         std::lock_guard<std::mutex> lock(lock_);
171         merge_monitored_ = true;
172     }
173 }
174 
175 // This is invoked once primarily by update-engine to initiate
176 // the merge
InitiateMerge()177 void SnapshotHandler::InitiateMerge() {
178     {
179         std::lock_guard<std::mutex> lock(lock_);
180         merge_initiated_ = true;
181 
182         // If there are only REPLACE ops to be merged, then we need
183         // to explicitly set the state to MERGE_BEGIN as there
184         // is no read-ahead thread
185         if (!ra_thread_) {
186             io_state_ = MERGE_IO_TRANSITION::MERGE_BEGIN;
187         }
188     }
189     cv.notify_all();
190 }
191 
IsMergeBeginError(MERGE_IO_TRANSITION io_state)192 static inline bool IsMergeBeginError(MERGE_IO_TRANSITION io_state) {
193     return io_state == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
194            io_state == MERGE_IO_TRANSITION::IO_TERMINATED;
195 }
196 
197 // Invoked by Merge thread - Waits on RA thread to resume merging. Will
198 // be waken up RA thread.
WaitForMergeBegin()199 bool SnapshotHandler::WaitForMergeBegin() {
200     std::unique_lock<std::mutex> lock(lock_);
201 
202     cv.wait(lock, [this]() -> bool { return MergeInitiated() || IsMergeBeginError(io_state_); });
203 
204     if (IsMergeBeginError(io_state_)) {
205         SNAP_LOG(VERBOSE) << "WaitForMergeBegin failed with state: " << io_state_;
206         return false;
207     }
208 
209     cv.wait(lock, [this]() -> bool {
210         return io_state_ == MERGE_IO_TRANSITION::MERGE_BEGIN || IsMergeBeginError(io_state_);
211     });
212 
213     if (IsMergeBeginError(io_state_)) {
214         SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
215         return false;
216     }
217     return true;
218 }
219 
220 // Invoked by RA thread - Flushes the RA block to scratch space if necessary
221 // and then notifies the merge thread to resume merging
ReadAheadIOCompleted(bool sync)222 bool SnapshotHandler::ReadAheadIOCompleted(bool sync) {
223     if (sync) {
224         // Flush the entire buffer region
225         int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
226         if (ret < 0) {
227             PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
228             return false;
229         }
230 
231         // Metadata and data are synced. Now, update the state.
232         // We need to update the state after flushing data; if there is a crash
233         // when read-ahead IO is in progress, the state of data in the COW file
234         // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
235         // in the scratch space is good and during next reboot, read-ahead thread
236         // can safely re-construct the data.
237         struct BufferState* ra_state = GetBufferState();
238         ra_state->read_ahead_state = kCowReadAheadDone;
239 
240         ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
241         if (ret < 0) {
242             PLOG(ERROR) << "msync failed to flush Readahead completion state...";
243             return false;
244         }
245     }
246 
247     // Notify the merge thread to resume merging
248     {
249         std::lock_guard<std::mutex> lock(lock_);
250         if (io_state_ != MERGE_IO_TRANSITION::IO_TERMINATED &&
251             io_state_ != MERGE_IO_TRANSITION::MERGE_FAILED) {
252             io_state_ = MERGE_IO_TRANSITION::MERGE_BEGIN;
253         }
254     }
255 
256     cv.notify_all();
257     return true;
258 }
259 
260 // Invoked by RA thread - Waits for merge thread to finish merging
261 // RA Block N - RA thread would be ready will with Block N+1 but
262 // will wait to merge thread to finish Block N. Once Block N
263 // is merged, RA thread will be woken up by Merge thread and will
264 // flush the data of Block N+1 to scratch space
WaitForMergeReady()265 bool SnapshotHandler::WaitForMergeReady() {
266     {
267         std::unique_lock<std::mutex> lock(lock_);
268         while (!(io_state_ == MERGE_IO_TRANSITION::MERGE_READY ||
269                  io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED ||
270                  io_state_ == MERGE_IO_TRANSITION::MERGE_COMPLETE ||
271                  io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED)) {
272             cv.wait(lock);
273         }
274 
275         // Check if merge failed
276         if (io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED ||
277             io_state_ == MERGE_IO_TRANSITION::MERGE_COMPLETE ||
278             io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
279             if (io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED) {
280                 SNAP_LOG(ERROR) << "Wait for merge ready failed: " << io_state_;
281             }
282             return false;
283         }
284         return true;
285     }
286 }
287 
288 // Invoked by Merge thread - Notify RA thread about Merge completion
289 // for Block N and wake up
NotifyRAForMergeReady()290 void SnapshotHandler::NotifyRAForMergeReady() {
291     {
292         std::lock_guard<std::mutex> lock(lock_);
293         if (io_state_ != MERGE_IO_TRANSITION::IO_TERMINATED &&
294             io_state_ != MERGE_IO_TRANSITION::READ_AHEAD_FAILURE) {
295             io_state_ = MERGE_IO_TRANSITION::MERGE_READY;
296         }
297     }
298 
299     cv.notify_all();
300 }
301 
302 // The following transitions are mostly in the failure paths
MergeFailed()303 void SnapshotHandler::MergeFailed() {
304     {
305         std::lock_guard<std::mutex> lock(lock_);
306         io_state_ = MERGE_IO_TRANSITION::MERGE_FAILED;
307     }
308 
309     cv.notify_all();
310 }
311 
MergeCompleted()312 void SnapshotHandler::MergeCompleted() {
313     {
314         std::lock_guard<std::mutex> lock(lock_);
315         io_state_ = MERGE_IO_TRANSITION::MERGE_COMPLETE;
316     }
317 
318     cv.notify_all();
319 }
320 
321 // This is invoked by worker threads.
322 //
323 // Worker threads are terminated either by two scenarios:
324 //
325 // 1: If dm-user device is destroyed
326 // 2: We had an I/O failure when reading root partitions
327 //
328 // In case (1), this would be a graceful shutdown. In this case, merge
329 // thread and RA thread should have _already_ terminated by this point. We will be
330 // destroying the dm-user device only _after_ merge is completed.
331 //
332 // In case (2), if merge thread had started, then it will be
333 // continuing to merge; however, since we had an I/O failure and the
334 // I/O on root partitions are no longer served, we will terminate the
335 // merge.
336 //
337 // This functions is about handling case (2)
NotifyIOTerminated()338 void SnapshotHandler::NotifyIOTerminated() {
339     {
340         std::lock_guard<std::mutex> lock(lock_);
341         io_state_ = MERGE_IO_TRANSITION::IO_TERMINATED;
342     }
343 
344     cv.notify_all();
345 }
346 
IsIOTerminated()347 bool SnapshotHandler::IsIOTerminated() {
348     std::lock_guard<std::mutex> lock(lock_);
349     return (io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED);
350 }
351 
352 // Invoked by RA thread
ReadAheadIOFailed()353 void SnapshotHandler::ReadAheadIOFailed() {
354     {
355         std::lock_guard<std::mutex> lock(lock_);
356         io_state_ = MERGE_IO_TRANSITION::READ_AHEAD_FAILURE;
357     }
358 
359     cv.notify_all();
360 }
361 
WaitForMergeComplete()362 void SnapshotHandler::WaitForMergeComplete() {
363     std::unique_lock<std::mutex> lock(lock_);
364     while (!(io_state_ == MERGE_IO_TRANSITION::MERGE_COMPLETE ||
365              io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED ||
366              io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED)) {
367         cv.wait(lock);
368     }
369 }
370 
RaThreadStarted()371 void SnapshotHandler::RaThreadStarted() {
372     std::unique_lock<std::mutex> lock(lock_);
373     ra_thread_started_ = true;
374 }
375 
WaitForRaThreadToStart()376 void SnapshotHandler::WaitForRaThreadToStart() {
377     auto now = std::chrono::system_clock::now();
378     auto deadline = now + 3s;
379     {
380         std::unique_lock<std::mutex> lock(lock_);
381         while (!ra_thread_started_) {
382             auto status = cv.wait_until(lock, deadline);
383             if (status == std::cv_status::timeout) {
384                 SNAP_LOG(ERROR) << "Read-ahead thread did not start";
385                 return;
386             }
387         }
388     }
389 }
390 
MarkMergeComplete()391 void SnapshotHandler::MarkMergeComplete() {
392     std::lock_guard<std::mutex> lock(lock_);
393     merge_complete_ = true;
394 }
395 
GetMergeStatus()396 std::string SnapshotHandler::GetMergeStatus() {
397     bool merge_not_initiated = false;
398     bool merge_monitored = false;
399     bool merge_failed = false;
400     bool merge_complete = false;
401 
402     {
403         std::lock_guard<std::mutex> lock(lock_);
404 
405         if (MergeMonitored()) {
406             merge_monitored = true;
407         }
408 
409         if (!MergeInitiated()) {
410             merge_not_initiated = true;
411         }
412 
413         if (io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED) {
414             merge_failed = true;
415         }
416 
417         merge_complete = merge_complete_;
418     }
419 
420     if (merge_not_initiated) {
421         // Merge was not initiated yet; however, we have merge completion
422         // recorded in the COW Header. This can happen if the device was
423         // rebooted during merge. During next reboot, libsnapshot will
424         // query the status and if the merge is completed, then snapshot-status
425         // file will be deleted
426         if (merge_complete) {
427             return "snapshot-merge-complete";
428         }
429 
430         // Merge monitor thread is tracking the merge but the merge thread
431         // is not started yet.
432         if (merge_monitored) {
433             return "snapshot-merge";
434         }
435 
436         // Return the state as "snapshot". If the device was rebooted during
437         // merge, we will return the status as "snapshot". This is ok, as
438         // libsnapshot will explicitly resume the merge. This is slightly
439         // different from kernel snapshot wherein once the snapshot was switched
440         // to merge target, during next boot, we immediately switch to merge
441         // target. We don't do that here because, during first stage init, we
442         // don't want to initiate the merge. The problem is that we have daemon
443         // transition between first and second stage init. If the merge was
444         // started, then we will have to quiesce the merge before switching
445         // the dm tables. Instead, we just wait until second stage daemon is up
446         // before resuming the merge.
447         return "snapshot";
448     }
449 
450     if (merge_failed) {
451         return "snapshot-merge-failed";
452     }
453 
454     if (merge_complete) {
455         return "snapshot-merge-complete";
456     }
457 
458     // Merge is in-progress
459     return "snapshot-merge";
460 }
461 
462 //========== End of Read-ahead state transition functions ====================
463 
464 /*
465  * Root partitions are mounted off dm-user and the I/O's are served
466  * by snapuserd worker threads.
467  *
468  * When there is an I/O request to be served by worker threads, we check
469  * if the corresponding sector is "changed" due to OTA by doing a lookup.
470  * If the lookup succeeds then the sector has been changed and that can
471  * either fall into 4 COW operations viz: COPY, XOR, REPLACE and ZERO.
472  *
473  * For the case of REPLACE and ZERO ops, there is not much of a concern
474  * as there is no dependency between blocks. Hence all the I/O request
475  * mapped to these two COW operations will be served by reading the COW device.
476  *
477  * However, COPY and XOR ops are tricky. Since the merge operations are
478  * in-progress, we cannot just go and read from the source device. We need
479  * to be in sync with the state of the merge thread before serving the I/O.
480  *
481  * Given that we know merge thread processes a set of COW ops called as RA
482  * Blocks - These set of COW ops are fixed size wherein each Block comprises
483  * of 510 COW ops.
484  *
485  *  +--------------------------+
486  *  |op-1|op-2|op-3|....|op-510|
487  *  +--------------------------+
488  *
489  *  <------ Merge Group Block N ------>
490  *
491  * Thus, a Merge Group Block N, will fall into one of these states and will
492  * transition the states in the following order:
493  *
494  * 1: GROUP_MERGE_PENDING
495  * 2: GROUP_MERGE_RA_READY
496  * 2: GROUP_MERGE_IN_PROGRESS
497  * 3: GROUP_MERGE_COMPLETED
498  * 4: GROUP_MERGE_FAILED
499  *
500  * Let's say that we have the I/O request from dm-user whose sector gets mapped
501  * to a COPY operation with op-10 in the above "Merge Group Block N".
502  *
503  * 1: If the Group is in "GROUP_MERGE_PENDING" state:
504  *
505  *    Just read the data from source block based on COW op->source field. Note,
506  *    that we will take a ref count on "Block N". This ref count will prevent
507  *    merge thread to begin merging if there are any pending I/Os. Once the I/O
508  *    is completed, ref count on "Group N" is decremented. Merge thread will
509  *    resume merging "Group N" if there are no pending I/Os.
510  *
511  * 2: If the Group is in "GROUP_MERGE_IN_PROGRESS" or "GROUP_MERGE_RA_READY" state:
512  *
513  *    When the merge thread is ready to process a "Group", it will first move
514  *    the state to GROUP_MERGE_PENDING -> GROUP_MERGE_RA_READY. From this point
515  *    onwards, I/O will be served from Read-ahead buffer. However, merge thread
516  *    cannot start merging this "Group" immediately. If there were any in-flight
517  *    I/O requests, merge thread should wait and allow those I/O's to drain.
518  *    Once all the in-flight I/O's are completed, merge thread will move the
519  *    state from "GROUP_MERGE_RA_READY" -> "GROUP_MERGE_IN_PROGRESS". I/O will
520  *    be continued to serve from Read-ahead buffer during the entire duration
521  *    of the merge.
522  *
523  *    See SetMergeInProgress().
524  *
525  * 3: If the Group is in "GROUP_MERGE_COMPLETED" state:
526  *
527  *    This is straightforward. We just read the data directly from "Base"
528  *    device. We should not be reading the COW op->source field.
529  *
530  * 4: If the Block is in "GROUP_MERGE_FAILED" state:
531  *
532  *    Terminate the I/O with an I/O error as we don't know which "op" in the
533  *    "Group" failed.
534  *
535  *    Transition ensures that the I/O from root partitions are never made to
536  *    wait and are processed immediately. Thus the state transition for any
537  *    "Group" is:
538  *
539  *    GROUP_MERGE_PENDING
540  *          |
541  *          |
542  *          v
543  *    GROUP_MERGE_RA_READY
544  *          |
545  *          |
546  *          v
547  *    GROUP_MERGE_IN_PROGRESS
548  *          |
549  *          |----------------------------(on failure)
550  *          |                           |
551  *          v                           v
552  *    GROUP_MERGE_COMPLETED           GROUP_MERGE_FAILED
553  *
554  */
555 
556 // Invoked by Merge thread
SetMergeCompleted(size_t ra_index)557 void SnapshotHandler::SetMergeCompleted(size_t ra_index) {
558     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
559     {
560         std::lock_guard<std::mutex> lock(blk_state->m_lock);
561 
562         CHECK(blk_state->merge_state_ == MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS);
563         CHECK(blk_state->num_ios_in_progress == 0);
564 
565         // Merge is complete - All I/O henceforth should be read directly
566         // from base device
567         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_COMPLETED;
568     }
569 }
570 
571 // Invoked by Merge thread. This is called just before the beginning
572 // of merging a given Block of 510 ops. If there are any in-flight I/O's
573 // from dm-user then wait for them to complete.
SetMergeInProgress(size_t ra_index)574 void SnapshotHandler::SetMergeInProgress(size_t ra_index) {
575     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
576     {
577         std::unique_lock<std::mutex> lock(blk_state->m_lock);
578 
579         // We may have fallback from Async-merge to synchronous merging
580         // on the existing block. There is no need to reset as the
581         // merge is already in progress.
582         if (blk_state->merge_state_ == MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS) {
583             return;
584         }
585 
586         CHECK(blk_state->merge_state_ == MERGE_GROUP_STATE::GROUP_MERGE_PENDING);
587 
588         // First set the state to RA_READY so that in-flight I/O will drain
589         // and any new I/O will start reading from RA buffer
590         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_RA_READY;
591 
592         // Wait if there are any in-flight I/O's - we cannot merge at this point
593         while (!(blk_state->num_ios_in_progress == 0)) {
594             blk_state->m_cv.wait(lock);
595         }
596 
597         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS;
598     }
599 }
600 
601 // Invoked by Merge thread on failure
SetMergeFailed(size_t ra_index)602 void SnapshotHandler::SetMergeFailed(size_t ra_index) {
603     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
604     {
605         std::unique_lock<std::mutex> lock(blk_state->m_lock);
606 
607         blk_state->merge_state_ = MERGE_GROUP_STATE::GROUP_MERGE_FAILED;
608     }
609 }
610 
611 // Invoked by worker threads when I/O is complete on a "MERGE_PENDING"
612 // Block. If there are no more in-flight I/Os, wake up merge thread
613 // to resume merging.
NotifyIOCompletion(uint64_t new_block)614 void SnapshotHandler::NotifyIOCompletion(uint64_t new_block) {
615     auto it = block_to_ra_index_.find(new_block);
616     CHECK(it != block_to_ra_index_.end()) << " invalid block: " << new_block;
617 
618     bool pending_ios = true;
619 
620     int ra_index = it->second;
621     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
622     {
623         std::unique_lock<std::mutex> lock(blk_state->m_lock);
624 
625         blk_state->num_ios_in_progress -= 1;
626         if (blk_state->num_ios_in_progress == 0) {
627             pending_ios = false;
628         }
629     }
630 
631     // Give a chance to merge-thread to resume merge
632     // as there are no pending I/O.
633     if (!pending_ios) {
634         blk_state->m_cv.notify_all();
635     }
636 }
637 
GetRABuffer(std::unique_lock<std::mutex> * lock,uint64_t block,void * buffer)638 bool SnapshotHandler::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block,
639                                   void* buffer) {
640     if (!lock->owns_lock()) {
641         SNAP_LOG(ERROR) << "GetRABuffer - Lock not held";
642         return false;
643     }
644     std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
645 
646     if (it == read_ahead_buffer_map_.end()) {
647         return false;
648     }
649 
650     memcpy(buffer, it->second, BLOCK_SZ);
651     return true;
652 }
653 
654 // Invoked by worker threads in the I/O path. This is called when a sector
655 // is mapped to a COPY/XOR COW op.
ProcessMergingBlock(uint64_t new_block,void * buffer)656 MERGE_GROUP_STATE SnapshotHandler::ProcessMergingBlock(uint64_t new_block, void* buffer) {
657     auto it = block_to_ra_index_.find(new_block);
658     if (it == block_to_ra_index_.end()) {
659         return MERGE_GROUP_STATE::GROUP_INVALID;
660     }
661 
662     int ra_index = it->second;
663     MergeGroupState* blk_state = merge_blk_state_[ra_index].get();
664     {
665         std::unique_lock<std::mutex> lock(blk_state->m_lock);
666 
667         MERGE_GROUP_STATE state = blk_state->merge_state_;
668         switch (state) {
669             case MERGE_GROUP_STATE::GROUP_MERGE_PENDING: {
670                 // If this is a merge-resume path, check if the data is
671                 // available from scratch space. Data from scratch space takes
672                 // higher precedence than from source device for overlapping
673                 // blocks.
674                 if (resume_merge_ && GetRABuffer(&lock, new_block, buffer)) {
675                     return (MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS);
676                 }
677                 blk_state->num_ios_in_progress += 1;  // ref count
678                 [[fallthrough]];
679             }
680             case MERGE_GROUP_STATE::GROUP_MERGE_COMPLETED: {
681                 [[fallthrough]];
682             }
683             case MERGE_GROUP_STATE::GROUP_MERGE_FAILED: {
684                 return state;
685             }
686             // Fetch the data from RA buffer.
687             case MERGE_GROUP_STATE::GROUP_MERGE_RA_READY: {
688                 [[fallthrough]];
689             }
690             case MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS: {
691                 if (!GetRABuffer(&lock, new_block, buffer)) {
692                     return MERGE_GROUP_STATE::GROUP_INVALID;
693                 }
694                 return state;
695             }
696             default: {
697                 return MERGE_GROUP_STATE::GROUP_INVALID;
698             }
699         }
700     }
701 }
702 
operator <<(std::ostream & os,MERGE_IO_TRANSITION value)703 std::ostream& operator<<(std::ostream& os, MERGE_IO_TRANSITION value) {
704     switch (value) {
705         case MERGE_IO_TRANSITION::INVALID:
706             return os << "INVALID";
707         case MERGE_IO_TRANSITION::MERGE_READY:
708             return os << "MERGE_READY";
709         case MERGE_IO_TRANSITION::MERGE_BEGIN:
710             return os << "MERGE_BEGIN";
711         case MERGE_IO_TRANSITION::MERGE_FAILED:
712             return os << "MERGE_FAILED";
713         case MERGE_IO_TRANSITION::MERGE_COMPLETE:
714             return os << "MERGE_COMPLETE";
715         case MERGE_IO_TRANSITION::IO_TERMINATED:
716             return os << "IO_TERMINATED";
717         case MERGE_IO_TRANSITION::READ_AHEAD_FAILURE:
718             return os << "READ_AHEAD_FAILURE";
719         default:
720             return os << "unknown";
721     }
722 }
723 
724 }  // namespace snapshot
725 }  // namespace android
726