1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libsnapshot/snapshot.h>
16
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23 #include <sys/xattr.h>
24
25 #include <chrono>
26 #include <filesystem>
27 #include <optional>
28 #include <thread>
29
30 #include <android-base/file.h>
31 #include <android-base/logging.h>
32 #include <android-base/parseint.h>
33 #include <android-base/properties.h>
34 #include <android-base/stringprintf.h>
35 #include <android-base/strings.h>
36 #include <android-base/unique_fd.h>
37 #include <cutils/sockets.h>
38 #include <ext4_utils/ext4_utils.h>
39 #include <fs_mgr.h>
40 #include <fs_mgr/file_wait.h>
41 #include <fs_mgr_dm_linear.h>
42 #include <fstab/fstab.h>
43 #include <libdm/dm.h>
44 #include <libfiemap/image_manager.h>
45 #include <liblp/liblp.h>
46
47 #include <android/snapshot/snapshot.pb.h>
48 #include <libsnapshot/snapshot_stats.h>
49 #include "device_info.h"
50 #include "partition_cow_creator.h"
51 #include "scratch_super.h"
52 #include "snapshot_metadata_updater.h"
53 #include "utility.h"
54
55 namespace android {
56 namespace snapshot {
57
58 using aidl::android::hardware::boot::MergeStatus;
59 using android::base::unique_fd;
60 using android::dm::DeviceMapper;
61 using android::dm::DmDeviceState;
62 using android::dm::DmTable;
63 using android::dm::DmTargetLinear;
64 using android::dm::DmTargetSnapshot;
65 using android::dm::DmTargetUser;
66 using android::dm::kSectorSize;
67 using android::dm::SnapshotStorageMode;
68 using android::fiemap::FiemapStatus;
69 using android::fiemap::IImageManager;
70 using android::fs_mgr::CreateDmTable;
71 using android::fs_mgr::CreateLogicalPartition;
72 using android::fs_mgr::CreateLogicalPartitionParams;
73 using android::fs_mgr::GetPartitionGroupName;
74 using android::fs_mgr::GetPartitionName;
75 using android::fs_mgr::LpMetadata;
76 using android::fs_mgr::MetadataBuilder;
77 using android::fs_mgr::SlotNumberForSlotSuffix;
78 using chromeos_update_engine::DeltaArchiveManifest;
79 using chromeos_update_engine::Extent;
80 using chromeos_update_engine::FileDescriptor;
81 using chromeos_update_engine::PartitionUpdate;
82 template <typename T>
83 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
84 using std::chrono::duration_cast;
85 using namespace std::chrono_literals;
86 using namespace std::string_literals;
87 using android::base::Realpath;
88 using android::base::StringPrintf;
89
90 static constexpr char kBootSnapshotsWithoutSlotSwitch[] =
91 "/metadata/ota/snapshot-boot-without-slot-switch";
92 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
93 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
94 static constexpr char kSnapuserdFromSystem[] = "/metadata/ota/snapuserd-from-system";
95 static constexpr auto kUpdateStateCheckInterval = 2s;
96 static constexpr char kOtaFileContext[] = "u:object_r:ota_metadata_file:s0";
97
98 /*
99 * The readahead size is set to 32kb so that
100 * there is no significant memory pressure (/proc/pressure/memory) during boot.
101 * After OTA, during boot, partitions are scanned before marking slot as successful.
102 * This scan will trigger readahead both on source and COW block device thereby
103 * leading to Inactive(file) pages to be very high.
104 *
105 * A lower value may help reduce memory pressure further, however, that will
106 * increase the boot time. Thus, for device which don't care about OTA boot
107 * time, they could use O_DIRECT functionality wherein the I/O to the source
108 * block device will be O_DIRECT.
109 */
110 static constexpr auto kReadAheadSizeKb = 32;
111
112 // Note: IImageManager is an incomplete type in the header, so the default
113 // destructor doesn't work.
~SnapshotManager()114 SnapshotManager::~SnapshotManager() {}
115
New(IDeviceInfo * info)116 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
117 if (!info) {
118 info = new DeviceInfo();
119 }
120
121 auto sm = std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
122 if (info->IsTempMetadata()) {
123 LOG(INFO) << "Using temp metadata from super";
124 }
125 return sm;
126 }
127
NewForFirstStageMount(IDeviceInfo * info)128 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
129 if (!info) {
130 DeviceInfo* impl = new DeviceInfo();
131 impl->set_first_stage_init(true);
132 info = impl;
133 }
134 auto sm = New(info);
135
136 // The first-stage version of snapuserd is explicitly started by init. Do
137 // not attempt to using it during tests (which run in normal AOSP).
138 if (!sm->device()->IsTestDevice()) {
139 sm->use_first_stage_snapuserd_ = true;
140 }
141 return sm;
142 }
143
SnapshotManager(IDeviceInfo * device)144 SnapshotManager::SnapshotManager(IDeviceInfo* device)
145 : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {}
146
GetCowName(const std::string & snapshot_name)147 static std::string GetCowName(const std::string& snapshot_name) {
148 return snapshot_name + "-cow";
149 }
150
GetSnapshotDriver(LockedFile * lock)151 SnapshotManager::SnapshotDriver SnapshotManager::GetSnapshotDriver(LockedFile* lock) {
152 if (UpdateUsesUserSnapshots(lock)) {
153 return SnapshotManager::SnapshotDriver::DM_USER;
154 } else {
155 return SnapshotManager::SnapshotDriver::DM_SNAPSHOT;
156 }
157 }
158
GetDmUserCowName(const std::string & snapshot_name,SnapshotManager::SnapshotDriver driver)159 static std::string GetDmUserCowName(const std::string& snapshot_name,
160 SnapshotManager::SnapshotDriver driver) {
161 // dm-user block device will act as a snapshot device. We identify it with
162 // the same partition name so that when partitions can be mounted off
163 // dm-user.
164
165 switch (driver) {
166 case SnapshotManager::SnapshotDriver::DM_USER: {
167 return snapshot_name;
168 }
169
170 case SnapshotManager::SnapshotDriver::DM_SNAPSHOT: {
171 return snapshot_name + "-user-cow";
172 }
173
174 default: {
175 LOG(ERROR) << "Invalid snapshot driver";
176 return "";
177 }
178 }
179 }
180
GetCowImageDeviceName(const std::string & snapshot_name)181 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
182 return snapshot_name + "-cow-img";
183 }
184
GetBaseDeviceName(const std::string & partition_name)185 static std::string GetBaseDeviceName(const std::string& partition_name) {
186 return partition_name + "-base";
187 }
188
GetSourceDeviceName(const std::string & partition_name)189 static std::string GetSourceDeviceName(const std::string& partition_name) {
190 return partition_name + "-src";
191 }
192
BeginUpdate()193 bool SnapshotManager::BeginUpdate() {
194 bool needs_merge = false;
195 if (!TryCancelUpdate(&needs_merge)) {
196 return false;
197 }
198 if (needs_merge) {
199 LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
200 auto state = ProcessUpdateState();
201 LOG(INFO) << "Merged with state = " << state;
202 }
203
204 auto file = LockExclusive();
205 if (!file) return false;
206
207 // Purge the ImageManager just in case there is a corrupt lp_metadata file
208 // lying around. (NB: no need to return false on an error, we can let the
209 // update try to progress.)
210 if (EnsureImageManager()) {
211 images_->RemoveAllImages();
212 }
213
214 // Clear any cached metadata (this allows re-using one manager across tests).
215 old_partition_metadata_ = nullptr;
216
217 auto state = ReadUpdateState(file.get());
218 if (state != UpdateState::None) {
219 LOG(ERROR) << "An update is already in progress, cannot begin a new update";
220 return false;
221 }
222 return WriteUpdateState(file.get(), UpdateState::Initiated);
223 }
224
CancelUpdate()225 bool SnapshotManager::CancelUpdate() {
226 bool needs_merge = false;
227 if (!TryCancelUpdate(&needs_merge)) {
228 return false;
229 }
230 if (needs_merge) {
231 LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
232 }
233 return !needs_merge;
234 }
235
TryCancelUpdate(bool * needs_merge)236 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
237 *needs_merge = false;
238
239 auto file = LockExclusive();
240 if (!file) return false;
241
242 if (IsSnapshotWithoutSlotSwitch()) {
243 LOG(ERROR) << "Cannot cancel the snapshots as partitions are mounted off the snapshots on "
244 "current slot.";
245 return false;
246 }
247
248 UpdateState state = ReadUpdateState(file.get());
249 if (state == UpdateState::None) {
250 RemoveInvalidSnapshots(file.get());
251 return true;
252 }
253
254 if (state == UpdateState::Initiated) {
255 LOG(INFO) << "Update has been initiated, now canceling";
256 return RemoveAllUpdateState(file.get());
257 }
258
259 if (state == UpdateState::Unverified) {
260 // We completed an update, but it can still be canceled if we haven't booted into it.
261 auto slot = GetCurrentSlot();
262 if (slot != Slot::Target) {
263 LOG(INFO) << "Canceling previously completed updates (if any)";
264 return RemoveAllUpdateState(file.get());
265 }
266 }
267 *needs_merge = true;
268 return true;
269 }
270
ReadUpdateSourceSlotSuffix()271 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
272 auto boot_file = GetSnapshotBootIndicatorPath();
273 std::string contents;
274 if (!android::base::ReadFileToString(boot_file, &contents)) {
275 return {};
276 }
277 return contents;
278 }
279
GetCurrentSlot()280 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
281 auto contents = ReadUpdateSourceSlotSuffix();
282 if (contents.empty()) {
283 return Slot::Unknown;
284 }
285 if (device_->GetSlotSuffix() == contents) {
286 return Slot::Source;
287 }
288 return Slot::Target;
289 }
290
GetSnapshotSlotSuffix()291 std::string SnapshotManager::GetSnapshotSlotSuffix() {
292 switch (GetCurrentSlot()) {
293 case Slot::Target:
294 return device_->GetSlotSuffix();
295 default:
296 return device_->GetOtherSlotSuffix();
297 }
298 }
299
RemoveFileIfExists(const std::string & path)300 static bool RemoveFileIfExists(const std::string& path) {
301 std::string message;
302 if (!android::base::RemoveFileIfExists(path, &message)) {
303 LOG(ERROR) << "Remove failed: " << path << ": " << message;
304 return false;
305 }
306 return true;
307 }
308
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)309 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
310 if (prolog && !prolog()) {
311 LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
312 return false;
313 }
314
315 LOG(INFO) << "Removing all update state.";
316
317 if (!RemoveAllSnapshots(lock)) {
318 LOG(ERROR) << "Could not remove all snapshots";
319 return false;
320 }
321
322 // It's okay if these fail:
323 // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
324 // reading the indicator file, so it's not a problem if it still exists
325 // after the update completes.
326 // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
327 // matches the incoming update.
328 std::vector<std::string> files = {
329 GetSnapshotBootIndicatorPath(), GetRollbackIndicatorPath(),
330 GetForwardMergeIndicatorPath(), GetOldPartitionMetadataPath(),
331 GetBootSnapshotsWithoutSlotSwitchPath(), GetSnapuserdFromSystemPath(),
332 };
333 for (const auto& file : files) {
334 RemoveFileIfExists(file);
335 }
336
337 // If this fails, we'll keep trying to remove the update state (as the
338 // device reboots or starts a new update) until it finally succeeds.
339 return WriteUpdateState(lock, UpdateState::None);
340 }
341
FinishedSnapshotWrites(bool wipe)342 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
343 auto lock = LockExclusive();
344 if (!lock) return false;
345
346 auto update_state = ReadUpdateState(lock.get());
347 if (update_state == UpdateState::Unverified) {
348 LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
349 return true;
350 }
351
352 if (update_state != UpdateState::Initiated) {
353 LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
354 return false;
355 }
356
357 if (!EnsureNoOverflowSnapshot(lock.get())) {
358 LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
359 return false;
360 }
361
362 if (!UpdateForwardMergeIndicator(wipe)) {
363 return false;
364 }
365
366 // This file is written on boot to detect whether a rollback occurred. It
367 // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
368 // snapshots too early.
369 if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
370 return false;
371 }
372
373 // This file acts as both a quick indicator for init (it can use access(2)
374 // to decide how to do first-stage mounts), and it stores the old slot, so
375 // we can tell whether or not we performed a rollback.
376 auto contents = device_->GetSlotSuffix();
377 auto boot_file = GetSnapshotBootIndicatorPath();
378 if (!WriteStringToFileAtomic(contents, boot_file)) {
379 PLOG(ERROR) << "write failed: " << boot_file;
380 return false;
381 }
382 return WriteUpdateState(lock.get(), UpdateState::Unverified);
383 }
384
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)385 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
386 SnapshotStatus* status) {
387 CHECK(lock);
388 CHECK(lock->lock_mode() == LOCK_EX);
389 CHECK(status);
390
391 if (status->name().empty()) {
392 LOG(ERROR) << "SnapshotStatus has no name.";
393 return false;
394 }
395 // Check these sizes. Like liblp, we guarantee the partition size is
396 // respected, which means it has to be sector-aligned. (This guarantee is
397 // useful for locating avb footers correctly). The COW file size, however,
398 // can be arbitrarily larger than specified, so we can safely round it up.
399 if (status->device_size() % kSectorSize != 0) {
400 LOG(ERROR) << "Snapshot " << status->name()
401 << " device size is not a multiple of the sector size: "
402 << status->device_size();
403 return false;
404 }
405 if (status->snapshot_size() % kSectorSize != 0) {
406 LOG(ERROR) << "Snapshot " << status->name()
407 << " snapshot size is not a multiple of the sector size: "
408 << status->snapshot_size();
409 return false;
410 }
411 if (status->cow_partition_size() % kSectorSize != 0) {
412 LOG(ERROR) << "Snapshot " << status->name()
413 << " cow partition size is not a multiple of the sector size: "
414 << status->cow_partition_size();
415 return false;
416 }
417 if (status->cow_file_size() % kSectorSize != 0) {
418 LOG(ERROR) << "Snapshot " << status->name()
419 << " cow file size is not a multiple of the sector size: "
420 << status->cow_file_size();
421 return false;
422 }
423
424 status->set_state(SnapshotState::CREATED);
425 status->set_sectors_allocated(0);
426 status->set_metadata_sectors(0);
427 status->set_using_snapuserd(cow_creator->using_snapuserd);
428 status->set_compression_algorithm(cow_creator->compression_algorithm);
429 status->set_compression_factor(cow_creator->compression_factor);
430 status->set_read_ahead_size(cow_creator->read_ahead_size);
431 if (cow_creator->enable_threading) {
432 status->set_enable_threading(cow_creator->enable_threading);
433 }
434 if (cow_creator->batched_writes) {
435 status->set_batched_writes(cow_creator->batched_writes);
436 }
437
438 if (!WriteSnapshotStatus(lock, *status)) {
439 PLOG(ERROR) << "Could not write snapshot status: " << status->name();
440 return false;
441 }
442 return true;
443 }
444
CreateCowImage(LockedFile * lock,const std::string & name)445 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
446 CHECK(lock);
447 CHECK(lock->lock_mode() == LOCK_EX);
448 if (!EnsureImageManager()) return Return::Error();
449
450 SnapshotStatus status;
451 if (!ReadSnapshotStatus(lock, name, &status)) {
452 return Return::Error();
453 }
454
455 // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
456 if (status.cow_file_size() % kSectorSize != 0) {
457 LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
458 << status.cow_file_size();
459 return Return::Error();
460 }
461
462 std::string cow_image_name = GetCowImageDeviceName(name);
463 int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
464 return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
465 }
466
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::string & base_path_merge,const std::chrono::milliseconds & timeout_ms,std::string * path)467 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
468 const std::string& cow_file, const std::string& base_device,
469 const std::string& base_path_merge,
470 const std::chrono::milliseconds& timeout_ms, std::string* path) {
471 CHECK(lock);
472
473 if (UpdateUsesUserSnapshots(lock)) {
474 SnapshotStatus status;
475 if (!ReadSnapshotStatus(lock, name, &status)) {
476 LOG(ERROR) << "MapDmUserCow: ReadSnapshotStatus failed...";
477 return false;
478 }
479
480 if (status.state() == SnapshotState::NONE ||
481 status.state() == SnapshotState::MERGE_COMPLETED) {
482 LOG(ERROR) << "Should not create a snapshot device for " << name
483 << " after merging has completed.";
484 return false;
485 }
486
487 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
488 if (update_status.state() == UpdateState::MergeCompleted ||
489 update_status.state() == UpdateState::MergeNeedsReboot) {
490 LOG(ERROR) << "Should not create a snapshot device for " << name
491 << " after global merging has completed.";
492 return false;
493 }
494 }
495
496 // Use an extra decoration for first-stage init, so we can transition
497 // to a new table entry in second-stage.
498 std::string misc_name = name;
499 if (use_first_stage_snapuserd_) {
500 misc_name += "-init";
501 }
502
503 if (!EnsureSnapuserdConnected()) {
504 return false;
505 }
506
507 uint64_t base_sectors = 0;
508 if (!UpdateUsesUserSnapshots(lock)) {
509 base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
510 if (base_sectors == 0) {
511 LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
512 return false;
513 }
514 } else if (IsSnapshotWithoutSlotSwitch()) {
515 // When snapshots are on current slot, we determine the size
516 // of block device based on the number of COW operations. We cannot
517 // use base device as it will be from older image.
518 unique_fd fd(open(cow_file.c_str(), O_RDONLY | O_CLOEXEC));
519 if (fd < 0) {
520 PLOG(ERROR) << "Failed to open " << cow_file;
521 return false;
522 }
523
524 CowReader reader;
525 if (!reader.Parse(std::move(fd))) {
526 LOG(ERROR) << "Failed to parse cow " << cow_file;
527 return false;
528 }
529
530 uint64_t dev_sz = 0;
531 const auto& header = reader.GetHeader();
532 if (header.prefix.major_version == 2) {
533 const size_t num_ops = reader.get_num_total_data_ops();
534 dev_sz = (num_ops * header.block_size);
535 } else {
536 // create_snapshot will skip in-place copy ops. Hence, fetch this
537 // information directly from v3 header.
538 const auto& v3_header = reader.header_v3();
539 dev_sz = v3_header.op_count_max * v3_header.block_size;
540 }
541
542 base_sectors = dev_sz >> 9;
543 } else {
544 // For userspace snapshots, the size of the base device is taken as the
545 // size of the dm-user block device. Since there is no pseudo mapping
546 // created in the daemon, we no longer need to rely on the daemon for
547 // sizing the dm-user block device.
548 unique_fd fd(TEMP_FAILURE_RETRY(open(base_path_merge.c_str(), O_RDONLY | O_CLOEXEC)));
549 if (fd < 0) {
550 LOG(ERROR) << "Cannot open block device: " << base_path_merge;
551 return false;
552 }
553
554 uint64_t dev_sz = get_block_device_size(fd.get());
555 if (!dev_sz) {
556 LOG(ERROR) << "Failed to find block device size: " << base_path_merge;
557 return false;
558 }
559
560 base_sectors = dev_sz >> 9;
561 }
562
563 DmTable table;
564 table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
565 if (!dm_.CreateDevice(name, table, path, timeout_ms)) {
566 LOG(ERROR) << " dm-user: CreateDevice failed... ";
567 return false;
568 }
569 if (!WaitForDevice(*path, timeout_ms)) {
570 LOG(ERROR) << " dm-user: timeout: Failed to create block device for: " << name;
571 return false;
572 }
573
574 auto control_device = "/dev/dm-user/" + misc_name;
575 if (!WaitForDevice(control_device, timeout_ms)) {
576 return false;
577 }
578
579 if (UpdateUsesUserSnapshots(lock)) {
580 // Now that the dm-user device is created, initialize the daemon and
581 // spin up the worker threads.
582 if (!snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device, base_path_merge)) {
583 LOG(ERROR) << "InitDmUserCow failed";
584 return false;
585 }
586 }
587
588 return snapuserd_client_->AttachDmUser(misc_name);
589 }
590
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)591 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
592 const std::string& base_device, const std::string& cow_device,
593 const std::chrono::milliseconds& timeout_ms,
594 std::string* dev_path) {
595 CHECK(lock);
596
597 SnapshotStatus status;
598 if (!ReadSnapshotStatus(lock, name, &status)) {
599 return false;
600 }
601 if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
602 LOG(ERROR) << "Should not create a snapshot device for " << name
603 << " after merging has completed.";
604 return false;
605 }
606
607 // Validate the block device size, as well as the requested snapshot size.
608 // Note that during first-stage init, we don't have the device paths.
609 if (android::base::StartsWith(base_device, "/")) {
610 unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
611 if (fd < 0) {
612 PLOG(ERROR) << "open failed: " << base_device;
613 return false;
614 }
615 auto dev_size = get_block_device_size(fd);
616 if (!dev_size) {
617 PLOG(ERROR) << "Could not determine block device size: " << base_device;
618 return false;
619 }
620 if (status.device_size() != dev_size) {
621 LOG(ERROR) << "Block device size for " << base_device << " does not match"
622 << "(expected " << status.device_size() << ", got " << dev_size << ")";
623 return false;
624 }
625 }
626 if (status.device_size() % kSectorSize != 0) {
627 LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
628 return false;
629 }
630 if (status.snapshot_size() % kSectorSize != 0 ||
631 status.snapshot_size() > status.device_size()) {
632 LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
633 return false;
634 }
635 if (status.device_size() != status.snapshot_size()) {
636 LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
637 << status.device_size() << ", snapshot size = " << status.snapshot_size();
638 return false;
639 }
640
641 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
642
643 // Note that merging is a global state. We do track whether individual devices
644 // have completed merging, but the start of the merge process is considered
645 // atomic.
646 SnapshotStorageMode mode;
647 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
648 switch (update_status.state()) {
649 case UpdateState::MergeCompleted:
650 case UpdateState::MergeNeedsReboot:
651 LOG(ERROR) << "Should not create a snapshot device for " << name
652 << " after global merging has completed.";
653 return false;
654 case UpdateState::Merging:
655 case UpdateState::MergeFailed:
656 // Note: MergeFailed indicates that a merge is in progress, but
657 // is possibly stalled. We still have to honor the merge.
658 if (DecideMergePhase(status) == update_status.merge_phase()) {
659 mode = SnapshotStorageMode::Merge;
660 } else {
661 mode = SnapshotStorageMode::Persistent;
662 }
663 break;
664 default:
665 mode = SnapshotStorageMode::Persistent;
666 break;
667 }
668
669 if (mode == SnapshotStorageMode::Persistent && status.state() == SnapshotState::MERGING) {
670 LOG(ERROR) << "Snapshot: " << name
671 << " has snapshot status Merging but mode set to Persistent."
672 << " Changing mode to Snapshot-Merge.";
673 mode = SnapshotStorageMode::Merge;
674 }
675
676 DmTable table;
677 table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
678 kSnapshotChunkSize);
679 if (!dm_.CreateDevice(name, table, dev_path, timeout_ms)) {
680 LOG(ERROR) << "Could not create snapshot device: " << name;
681 return false;
682 }
683 return true;
684 }
685
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)686 std::optional<std::string> SnapshotManager::MapCowImage(
687 const std::string& name, const std::chrono::milliseconds& timeout_ms) {
688 if (!EnsureImageManager()) return std::nullopt;
689 auto cow_image_name = GetCowImageDeviceName(name);
690
691 bool ok;
692 std::string cow_dev;
693 if (device_->IsRecovery() || device_->IsFirstStageInit()) {
694 const auto& opener = device_->GetPartitionOpener();
695 ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
696 } else {
697 ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
698 }
699
700 if (ok) {
701 LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
702 return cow_dev;
703 }
704 LOG(ERROR) << "Could not map image device: " << cow_image_name;
705 return std::nullopt;
706 }
707
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)708 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
709 const std::chrono::milliseconds& timeout_ms,
710 std::string* path) {
711 CHECK(lock);
712
713 auto metadata = ReadOldPartitionMetadata(lock);
714 if (!metadata) {
715 LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
716 return false;
717 }
718
719 auto old_name = GetOtherPartitionName(name);
720 auto slot_suffix = device_->GetSlotSuffix();
721 auto slot = SlotNumberForSlotSuffix(slot_suffix);
722
723 CreateLogicalPartitionParams params = {
724 .block_device = device_->GetSuperDevice(slot),
725 .metadata = metadata,
726 .partition_name = old_name,
727 .timeout_ms = timeout_ms,
728 .device_name = GetSourceDeviceName(name),
729 .partition_opener = &device_->GetPartitionOpener(),
730 };
731 if (!CreateLogicalPartition(std::move(params), path)) {
732 LOG(ERROR) << "Could not create source device for snapshot " << name;
733 return false;
734 }
735 return true;
736 }
737
UnmapSnapshot(LockedFile * lock,const std::string & name)738 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
739 CHECK(lock);
740
741 if (UpdateUsesUserSnapshots(lock)) {
742 if (!UnmapUserspaceSnapshotDevice(lock, name)) {
743 return false;
744 }
745 } else {
746 if (!DeleteDeviceIfExists(name)) {
747 LOG(ERROR) << "Could not delete snapshot device: " << name;
748 return false;
749 }
750 }
751 return true;
752 }
753
UnmapCowImage(const std::string & name)754 bool SnapshotManager::UnmapCowImage(const std::string& name) {
755 if (!EnsureImageManager()) return false;
756 return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
757 }
758
DeleteSnapshot(LockedFile * lock,const std::string & name)759 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
760 CHECK(lock);
761 CHECK(lock->lock_mode() == LOCK_EX);
762 if (!EnsureImageManager()) return false;
763
764 if (!UnmapCowDevices(lock, name)) {
765 return false;
766 }
767
768 // We can't delete snapshots in recovery. The only way we'd try is it we're
769 // completing or canceling a merge in preparation for a data wipe, in which
770 // case, we don't care if the file sticks around.
771 if (device_->IsRecovery()) {
772 LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
773 return true;
774 }
775
776 auto cow_image_name = GetCowImageDeviceName(name);
777 if (images_->BackingImageExists(cow_image_name)) {
778 if (!images_->DeleteBackingImage(cow_image_name)) {
779 return false;
780 }
781 }
782
783 std::string error;
784 auto file_path = GetSnapshotStatusFilePath(name);
785 if (!android::base::RemoveFileIfExists(file_path, &error)) {
786 LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
787 return false;
788 }
789
790 // This path may never exist. If it is present, then it's a stale
791 // snapshot status file. Just remove the file and log the message.
792 const std::string tmp_path = file_path + ".tmp";
793 if (!android::base::RemoveFileIfExists(tmp_path, &error)) {
794 LOG(ERROR) << "Failed to remove stale snapshot file " << tmp_path;
795 }
796
797 return true;
798 }
799
InitiateMerge()800 bool SnapshotManager::InitiateMerge() {
801 auto lock = LockExclusive();
802 if (!lock) return false;
803
804 UpdateState state = ReadUpdateState(lock.get());
805 if (state != UpdateState::Unverified) {
806 LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
807 return false;
808 }
809
810 auto slot = GetCurrentSlot();
811 if (slot != Slot::Target) {
812 LOG(ERROR) << "Device cannot merge while not booting from new slot";
813 return false;
814 }
815
816 std::vector<std::string> snapshots;
817 if (!ListSnapshots(lock.get(), &snapshots)) {
818 LOG(ERROR) << "Could not list snapshots";
819 return false;
820 }
821
822 auto current_slot_suffix = device_->GetSlotSuffix();
823
824 for (const auto& snapshot : snapshots) {
825 if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
826 // Allow the merge to continue, but log this unexpected case.
827 LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
828 continue;
829 }
830
831 // The device has to be mapped, since everything should be merged at
832 // the same time. This is a fairly serious error. We could forcefully
833 // map everything here, but it should have been mapped during first-
834 // stage init.
835 if (dm_.GetState(snapshot) == DmDeviceState::INVALID) {
836 LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
837 return false;
838 }
839 }
840
841 auto metadata = ReadCurrentMetadata();
842 for (auto it = snapshots.begin(); it != snapshots.end();) {
843 switch (GetMetadataPartitionState(*metadata, *it)) {
844 case MetadataPartitionState::Flashed:
845 LOG(WARNING) << "Detected re-flashing for partition " << *it
846 << ". Skip merging it.";
847 [[fallthrough]];
848 case MetadataPartitionState::None: {
849 LOG(WARNING) << "Deleting snapshot for partition " << *it;
850 if (!DeleteSnapshot(lock.get(), *it)) {
851 LOG(WARNING) << "Cannot delete snapshot for partition " << *it
852 << ". Skip merging it anyways.";
853 }
854 it = snapshots.erase(it);
855 } break;
856 case MetadataPartitionState::Updated: {
857 ++it;
858 } break;
859 }
860 }
861
862 bool using_snapuserd = false;
863
864 std::vector<std::string> first_merge_group;
865
866 DmTargetSnapshot::Status initial_target_values = {};
867 for (const auto& snapshot : snapshots) {
868 if (!UpdateUsesUserSnapshots(lock.get())) {
869 DmTargetSnapshot::Status current_status;
870 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) {
871 return false;
872 }
873 initial_target_values.sectors_allocated += current_status.sectors_allocated;
874 initial_target_values.total_sectors += current_status.total_sectors;
875 initial_target_values.metadata_sectors += current_status.metadata_sectors;
876 }
877
878 SnapshotStatus snapshot_status;
879 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
880 return false;
881 }
882
883 using_snapuserd |= snapshot_status.using_snapuserd();
884 if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
885 first_merge_group.emplace_back(snapshot);
886 }
887 }
888
889 SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
890 initial_status.set_state(UpdateState::Merging);
891 initial_status.set_using_snapuserd(using_snapuserd);
892
893 if (!UpdateUsesUserSnapshots(lock.get())) {
894 initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
895 initial_status.set_total_sectors(initial_target_values.total_sectors);
896 initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
897 }
898
899 // If any partitions shrunk, we need to merge them before we merge any other
900 // partitions (see b/177935716). Otherwise, a merge from another partition
901 // may overwrite the source block of a copy operation.
902 const std::vector<std::string>* merge_group;
903 if (first_merge_group.empty()) {
904 merge_group = &snapshots;
905 initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
906 } else {
907 merge_group = &first_merge_group;
908 initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
909 }
910
911 // Point of no return - mark that we're starting a merge. From now on every
912 // eligible snapshot must be a merge target.
913 if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
914 return false;
915 }
916
917 auto reported_code = MergeFailureCode::Ok;
918 for (const auto& snapshot : *merge_group) {
919 // If this fails, we have no choice but to continue. Everything must
920 // be merged. This is not an ideal state to be in, but it is safe,
921 // because we the next boot will try again.
922 auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
923 if (code != MergeFailureCode::Ok) {
924 LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
925 if (reported_code == MergeFailureCode::Ok) {
926 reported_code = code;
927 }
928 }
929 }
930
931 // If we couldn't switch everything to a merge target, pre-emptively mark
932 // this merge as failed. It will get acknowledged when WaitForMerge() is
933 // called.
934 if (reported_code != MergeFailureCode::Ok) {
935 WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
936 }
937
938 // Return true no matter what, because a merge was initiated.
939 return true;
940 }
941
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)942 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
943 SnapshotStatus status;
944 if (!ReadSnapshotStatus(lock, name, &status)) {
945 return MergeFailureCode::ReadStatus;
946 }
947 if (status.state() != SnapshotState::CREATED) {
948 LOG(WARNING) << "Snapshot " << name
949 << " has unexpected state: " << SnapshotState_Name(status.state());
950 }
951
952 if (UpdateUsesUserSnapshots(lock)) {
953 if (EnsureSnapuserdConnected()) {
954 // This is the point where we inform the daemon to initiate/resume
955 // the merge
956 if (!snapuserd_client_->InitiateMerge(name)) {
957 return MergeFailureCode::UnknownTable;
958 }
959 } else {
960 LOG(ERROR) << "Failed to connect to snapuserd daemon to initiate merge";
961 return MergeFailureCode::UnknownTable;
962 }
963 } else {
964 // After this, we return true because we technically did switch to a merge
965 // target. Everything else we do here is just informational.
966 if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
967 return code;
968 }
969 }
970
971 status.set_state(SnapshotState::MERGING);
972
973 if (!UpdateUsesUserSnapshots(lock)) {
974 DmTargetSnapshot::Status dm_status;
975 if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
976 LOG(ERROR) << "Could not query merge status for snapshot: " << name;
977 }
978 status.set_sectors_allocated(dm_status.sectors_allocated);
979 status.set_metadata_sectors(dm_status.metadata_sectors);
980 }
981
982 if (!WriteSnapshotStatus(lock, status)) {
983 LOG(ERROR) << "Could not update status file for snapshot: " << name;
984 }
985 return MergeFailureCode::Ok;
986 }
987
RewriteSnapshotDeviceTable(const std::string & name)988 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
989 std::vector<DeviceMapper::TargetInfo> old_targets;
990 if (!dm_.GetTableInfo(name, &old_targets)) {
991 LOG(ERROR) << "Could not read snapshot device table: " << name;
992 return MergeFailureCode::GetTableInfo;
993 }
994 if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
995 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
996 return MergeFailureCode::UnknownTable;
997 }
998
999 std::string base_device, cow_device;
1000 if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
1001 LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
1002 return MergeFailureCode::GetTableParams;
1003 }
1004
1005 DmTable table;
1006 table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
1007 SnapshotStorageMode::Merge, kSnapshotChunkSize);
1008 if (!dm_.LoadTableAndActivate(name, table)) {
1009 LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
1010 return MergeFailureCode::ActivateNewTable;
1011 }
1012 LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
1013 return MergeFailureCode::Ok;
1014 }
1015
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)1016 bool SnapshotManager::GetSingleTarget(const std::string& dm_name, TableQuery query,
1017 DeviceMapper::TargetInfo* target) {
1018 if (dm_.GetState(dm_name) == DmDeviceState::INVALID) {
1019 return false;
1020 }
1021
1022 std::vector<DeviceMapper::TargetInfo> targets;
1023 bool result;
1024 if (query == TableQuery::Status) {
1025 result = dm_.GetTableStatus(dm_name, &targets);
1026 } else {
1027 result = dm_.GetTableInfo(dm_name, &targets);
1028 }
1029 if (!result) {
1030 LOG(ERROR) << "Could not query device: " << dm_name;
1031 return false;
1032 }
1033 if (targets.size() != 1) {
1034 return false;
1035 }
1036
1037 *target = std::move(targets[0]);
1038 return true;
1039 }
1040
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)1041 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
1042 DeviceMapper::TargetInfo snap_target;
1043 if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
1044 return false;
1045 }
1046 auto type = DeviceMapper::GetTargetType(snap_target.spec);
1047
1048 // If this is not a user-snapshot device then it should either
1049 // be a dm-snapshot or dm-snapshot-merge target
1050 if (type != "user") {
1051 if (type != "snapshot" && type != "snapshot-merge") {
1052 return false;
1053 }
1054 }
1055
1056 if (target) {
1057 *target = std::move(snap_target);
1058 }
1059 return true;
1060 }
1061
UpdateStateToStr(const enum UpdateState state)1062 auto SnapshotManager::UpdateStateToStr(const enum UpdateState state) {
1063 switch (state) {
1064 case None:
1065 return "None";
1066 case Initiated:
1067 return "Initiated";
1068 case Unverified:
1069 return "Unverified";
1070 case Merging:
1071 return "Merging";
1072 case MergeNeedsReboot:
1073 return "MergeNeedsReboot";
1074 case MergeCompleted:
1075 return "MergeCompleted";
1076 case MergeFailed:
1077 return "MergeFailed";
1078 case Cancelled:
1079 return "Cancelled";
1080 default:
1081 return "Unknown";
1082 }
1083 }
1084
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)1085 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
1086 DmTargetSnapshot::Status* status) {
1087 DeviceMapper::TargetInfo target;
1088 if (!IsSnapshotDevice(dm_name, &target)) {
1089 LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
1090 return false;
1091 }
1092 if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
1093 LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
1094 return false;
1095 }
1096 if (target_type) {
1097 *target_type = DeviceMapper::GetTargetType(target.spec);
1098 }
1099 if (!status->error.empty()) {
1100 LOG(ERROR) << "Snapshot: " << dm_name << " returned error code: " << status->error;
1101 return false;
1102 }
1103 return true;
1104 }
1105
1106 // Note that when a merge fails, we will *always* try again to complete the
1107 // merge each time the device boots. There is no harm in doing so, and if
1108 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)1109 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
1110 const std::function<bool()>& before_cancel) {
1111 while (true) {
1112 auto result = CheckMergeState(before_cancel);
1113 LOG(INFO) << "ProcessUpdateState handling state: " << UpdateStateToStr(result.state);
1114
1115 if (result.state == UpdateState::MergeFailed) {
1116 AcknowledgeMergeFailure(result.failure_code);
1117 }
1118
1119 if (result.state == UpdateState::MergeCompleted) {
1120 if (device_->IsTempMetadata()) {
1121 CleanupScratchOtaMetadataIfPresent();
1122 }
1123 }
1124
1125 if (result.state != UpdateState::Merging) {
1126 // Either there is no merge, or the merge was finished, so no need
1127 // to keep waiting.
1128 return result.state;
1129 }
1130
1131 if (callback && !callback()) {
1132 return result.state;
1133 }
1134
1135 // This wait is not super time sensitive, so we have a relatively
1136 // low polling frequency.
1137 std::this_thread::sleep_for(kUpdateStateCheckInterval);
1138 }
1139 }
1140
CheckMergeState(const std::function<bool ()> & before_cancel)1141 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
1142 auto lock = LockExclusive();
1143 if (!lock) {
1144 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
1145 }
1146
1147 auto result = CheckMergeState(lock.get(), before_cancel);
1148 LOG(INFO) << "CheckMergeState for snapshots returned: " << UpdateStateToStr(result.state);
1149
1150 if (result.state == UpdateState::MergeCompleted) {
1151 // Do this inside the same lock. Failures get acknowledged without the
1152 // lock, because flock() might have failed.
1153 AcknowledgeMergeSuccess(lock.get());
1154 } else if (result.state == UpdateState::Cancelled) {
1155 if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
1156 LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
1157 }
1158 }
1159 return result;
1160 }
1161
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)1162 auto SnapshotManager::CheckMergeState(LockedFile* lock,
1163 const std::function<bool()>& before_cancel) -> MergeResult {
1164 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1165 switch (update_status.state()) {
1166 case UpdateState::None:
1167 case UpdateState::MergeCompleted:
1168 // Harmless races are allowed between two callers of WaitForMerge,
1169 // so in both of these cases we just propagate the state.
1170 return MergeResult(update_status.state());
1171
1172 case UpdateState::Merging:
1173 case UpdateState::MergeNeedsReboot:
1174 case UpdateState::MergeFailed:
1175 // We'll poll each snapshot below. Note that for the NeedsReboot
1176 // case, we always poll once to give cleanup another opportunity to
1177 // run.
1178 break;
1179
1180 case UpdateState::Unverified:
1181 // This is an edge case. Normally cancelled updates are detected
1182 // via the merge poll below, but if we never started a merge, we
1183 // need to also check here.
1184 if (HandleCancelledUpdate(lock, before_cancel)) {
1185 return MergeResult(UpdateState::Cancelled);
1186 }
1187 return MergeResult(update_status.state());
1188
1189 default:
1190 return MergeResult(update_status.state());
1191 }
1192
1193 std::vector<std::string> snapshots;
1194 if (!ListSnapshots(lock, &snapshots)) {
1195 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
1196 }
1197
1198 auto current_slot_suffix = device_->GetSlotSuffix();
1199
1200 bool cancelled = false;
1201 bool merging = false;
1202 bool needs_reboot = false;
1203 bool wrong_phase = false;
1204 MergeFailureCode failure_code = MergeFailureCode::Ok;
1205 for (const auto& snapshot : snapshots) {
1206 if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
1207 // This will have triggered an error message in InitiateMerge already.
1208 LOG(ERROR) << "Skipping merge validation of unexpected snapshot: " << snapshot;
1209 continue;
1210 }
1211
1212 auto result = CheckTargetMergeState(lock, snapshot, update_status);
1213 LOG(INFO) << "CheckTargetMergeState for " << snapshot
1214 << " returned: " << UpdateStateToStr(result.state);
1215
1216 switch (result.state) {
1217 case UpdateState::MergeFailed:
1218 // Take the first failure code in case other failures compound.
1219 if (failure_code == MergeFailureCode::Ok) {
1220 failure_code = result.failure_code;
1221 }
1222 break;
1223 case UpdateState::Merging:
1224 merging = true;
1225 break;
1226 case UpdateState::MergeNeedsReboot:
1227 needs_reboot = true;
1228 break;
1229 case UpdateState::MergeCompleted:
1230 break;
1231 case UpdateState::Cancelled:
1232 cancelled = true;
1233 break;
1234 case UpdateState::None:
1235 wrong_phase = true;
1236 break;
1237 default:
1238 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": " << "\""
1239 << result.state << "\"";
1240 if (failure_code == MergeFailureCode::Ok) {
1241 failure_code = MergeFailureCode::UnexpectedMergeState;
1242 }
1243 break;
1244 }
1245 }
1246
1247 if (merging) {
1248 // Note that we handle "Merging" before we handle anything else. We
1249 // want to poll until *nothing* is merging if we can, so everything has
1250 // a chance to get marked as completed or failed.
1251 return MergeResult(UpdateState::Merging);
1252 }
1253 if (failure_code != MergeFailureCode::Ok) {
1254 // Note: since there are many drop-out cases for failure, we acknowledge
1255 // it in WaitForMerge rather than here and elsewhere.
1256 return MergeResult(UpdateState::MergeFailed, failure_code);
1257 }
1258 if (wrong_phase) {
1259 // If we got here, no other partitions are being merged, and nothing
1260 // failed to merge. It's safe to move to the next merge phase.
1261 auto code = MergeSecondPhaseSnapshots(lock);
1262 if (code != MergeFailureCode::Ok) {
1263 return MergeResult(UpdateState::MergeFailed, code);
1264 }
1265 return MergeResult(UpdateState::Merging);
1266 }
1267 if (needs_reboot) {
1268 WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1269 return MergeResult(UpdateState::MergeNeedsReboot);
1270 }
1271 if (cancelled) {
1272 // This is an edge case, that we handle as correctly as we sensibly can.
1273 // The underlying partition has changed behind update_engine, and we've
1274 // removed the snapshot as a result. The exact state of the update is
1275 // undefined now, but this can only happen on an unlocked device where
1276 // partitions can be flashed without wiping userdata.
1277 return MergeResult(UpdateState::Cancelled);
1278 }
1279 return MergeResult(UpdateState::MergeCompleted);
1280 }
1281
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1282 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1283 const SnapshotUpdateStatus& update_status)
1284 -> MergeResult {
1285 SnapshotStatus snapshot_status;
1286 if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1287 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1288 }
1289
1290 std::unique_ptr<LpMetadata> current_metadata;
1291
1292 if (!IsSnapshotDevice(name)) {
1293 if (!current_metadata) {
1294 current_metadata = ReadCurrentMetadata();
1295 }
1296
1297 if (!current_metadata ||
1298 GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1299 DeleteSnapshot(lock, name);
1300 return MergeResult(UpdateState::Cancelled);
1301 }
1302
1303 // During a check, we decided the merge was complete, but we were unable to
1304 // collapse the device-mapper stack and perform COW cleanup. If we haven't
1305 // rebooted after this check, the device will still be a snapshot-merge
1306 // target. If we have rebooted, the device will now be a linear target,
1307 // and we can try cleanup again.
1308 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1309 // NB: It's okay if this fails now, we gave cleanup our best effort.
1310 OnSnapshotMergeComplete(lock, name, snapshot_status);
1311 return MergeResult(UpdateState::MergeCompleted);
1312 }
1313
1314 LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1315 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1316 }
1317
1318 // This check is expensive so it is only enabled for debugging.
1319 DCHECK((current_metadata = ReadCurrentMetadata()) &&
1320 GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1321
1322 if (UpdateUsesUserSnapshots(lock)) {
1323 if (!EnsureSnapuserdConnected()) {
1324 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1325 }
1326
1327 // Query the snapshot status from the daemon
1328 const auto merge_status = snapuserd_client_->QuerySnapshotStatus(name);
1329 if (merge_status == "snapshot-merge-failed") {
1330 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1331 }
1332
1333 // This is the case when device reboots during merge. Once the device boots,
1334 // snapuserd daemon will not resume merge immediately in first stage init.
1335 // This is slightly different as compared to dm-snapshot-merge; In this
1336 // case, metadata file will have "MERGING" state whereas the daemon will be
1337 // waiting to resume the merge. Thus, we resume the merge at this point.
1338 if (merge_status == "snapshot" && snapshot_status.state() == SnapshotState::MERGING) {
1339 if (!snapuserd_client_->InitiateMerge(name)) {
1340 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1341 }
1342 return MergeResult(UpdateState::Merging);
1343 }
1344
1345 if (merge_status == "snapshot" &&
1346 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE) {
1347 if (update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1348 // The snapshot is not being merged because it's in the wrong phase.
1349 return MergeResult(UpdateState::None);
1350 } else {
1351 // update_status is already in second phase but the
1352 // snapshot_status is still not set to SnapshotState::MERGING.
1353 //
1354 // Resume the merge at this point. see b/374225913
1355 LOG(INFO) << "SwitchSnapshotToMerge: " << name << " after resuming merge";
1356 auto code = SwitchSnapshotToMerge(lock, name);
1357 if (code != MergeFailureCode::Ok) {
1358 LOG(ERROR) << "Failed to switch snapshot: " << name
1359 << " to merge during second phase";
1360 return MergeResult(UpdateState::MergeFailed,
1361 MergeFailureCode::UnknownTargetType);
1362 }
1363 return MergeResult(UpdateState::Merging);
1364 }
1365 }
1366
1367 if (merge_status == "snapshot-merge") {
1368 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1369 LOG(ERROR) << "Snapshot " << name
1370 << " is merging after being marked merge-complete.";
1371 return MergeResult(UpdateState::MergeFailed,
1372 MergeFailureCode::UnmergedSectorsAfterCompletion);
1373 }
1374 return MergeResult(UpdateState::Merging);
1375 }
1376
1377 if (merge_status != "snapshot-merge-complete") {
1378 LOG(ERROR) << "Snapshot " << name << " has incorrect status: " << merge_status;
1379 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1380 }
1381 } else {
1382 // dm-snapshot in the kernel
1383 std::string target_type;
1384 DmTargetSnapshot::Status status;
1385 if (!QuerySnapshotStatus(name, &target_type, &status)) {
1386 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1387 }
1388 if (target_type == "snapshot" &&
1389 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1390 update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1391 // The snapshot is not being merged because it's in the wrong phase.
1392 return MergeResult(UpdateState::None);
1393 }
1394 if (target_type != "snapshot-merge") {
1395 // We can get here if we failed to rewrite the target type in
1396 // InitiateMerge(). If we failed to create the target in first-stage
1397 // init, boot would not succeed.
1398 LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1399 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1400 }
1401
1402 // These two values are equal when merging is complete.
1403 if (status.sectors_allocated != status.metadata_sectors) {
1404 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1405 LOG(ERROR) << "Snapshot " << name
1406 << " is merging after being marked merge-complete.";
1407 return MergeResult(UpdateState::MergeFailed,
1408 MergeFailureCode::UnmergedSectorsAfterCompletion);
1409 }
1410 return MergeResult(UpdateState::Merging);
1411 }
1412 }
1413
1414 // Merging is done. First, update the status file to indicate the merge
1415 // is complete. We do this before calling OnSnapshotMergeComplete, even
1416 // though this means the write is potentially wasted work (since in the
1417 // ideal case we'll immediately delete the file).
1418 //
1419 // This makes it simpler to reason about the next reboot: no matter what
1420 // part of cleanup failed, first-stage init won't try to create another
1421 // snapshot device for this partition.
1422 snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1423 if (!WriteSnapshotStatus(lock, snapshot_status)) {
1424 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1425 }
1426 if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1427 return MergeResult(UpdateState::MergeNeedsReboot);
1428 }
1429 return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1430 }
1431
1432 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1433 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1434 const SnapshotStatus& status) {
1435 // If no partition was created (the COW exists entirely on /data), the
1436 // device-mapper layering is different than if we had a partition.
1437 if (status.cow_partition_size() == 0) {
1438 return GetCowImageDeviceName(snapshot);
1439 }
1440 return GetCowName(snapshot);
1441 }
1442
MergeSecondPhaseSnapshots(LockedFile * lock)1443 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1444 std::vector<std::string> snapshots;
1445 if (!ListSnapshots(lock, &snapshots)) {
1446 return MergeFailureCode::ListSnapshots;
1447 }
1448
1449 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1450 CHECK(update_status.state() == UpdateState::Merging ||
1451 update_status.state() == UpdateState::MergeFailed);
1452 CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1453
1454 update_status.set_state(UpdateState::Merging);
1455 update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1456 if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1457 return MergeFailureCode::WriteStatus;
1458 }
1459
1460 auto current_slot_suffix = device_->GetSlotSuffix();
1461 MergeFailureCode result = MergeFailureCode::Ok;
1462 for (const auto& snapshot : snapshots) {
1463 if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
1464 LOG(ERROR) << "Skipping invalid snapshot: " << snapshot
1465 << " during MergeSecondPhaseSnapshots";
1466 continue;
1467 }
1468 SnapshotStatus snapshot_status;
1469 if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1470 return MergeFailureCode::ReadStatus;
1471 }
1472 if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1473 continue;
1474 }
1475 auto code = SwitchSnapshotToMerge(lock, snapshot);
1476 if (code != MergeFailureCode::Ok) {
1477 LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1478 if (result == MergeFailureCode::Ok) {
1479 result = code;
1480 }
1481 }
1482 }
1483 return result;
1484 }
1485
GetBootSnapshotsWithoutSlotSwitchPath()1486 std::string SnapshotManager::GetBootSnapshotsWithoutSlotSwitchPath() {
1487 return metadata_dir_ + "/" + android::base::Basename(kBootSnapshotsWithoutSlotSwitch);
1488 }
1489
GetSnapshotBootIndicatorPath()1490 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1491 return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1492 }
1493
GetRollbackIndicatorPath()1494 std::string SnapshotManager::GetRollbackIndicatorPath() {
1495 return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1496 }
1497
GetSnapuserdFromSystemPath()1498 std::string SnapshotManager::GetSnapuserdFromSystemPath() {
1499 return metadata_dir_ + "/" + android::base::Basename(kSnapuserdFromSystem);
1500 }
1501
GetForwardMergeIndicatorPath()1502 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1503 return metadata_dir_ + "/allow-forward-merge";
1504 }
1505
GetOldPartitionMetadataPath()1506 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1507 return metadata_dir_ + "/old-partition-metadata";
1508 }
1509
AcknowledgeMergeSuccess(LockedFile * lock)1510 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1511 // It's not possible to remove update state in recovery, so write an
1512 // indicator that cleanup is needed on reboot. If a factory data reset
1513 // was requested, it doesn't matter, everything will get wiped anyway.
1514 // To make testing easier we consider a /data wipe as cleaned up.
1515 if (device_->IsRecovery()) {
1516 WriteUpdateState(lock, UpdateState::MergeCompleted);
1517 return;
1518 }
1519
1520 RemoveAllUpdateState(lock);
1521
1522 if (UpdateUsesUserSnapshots(lock) && !device()->IsTestDevice()) {
1523 if (snapuserd_client_) {
1524 snapuserd_client_->DetachSnapuserd();
1525 snapuserd_client_->RemoveTransitionedDaemonIndicator();
1526 snapuserd_client_ = nullptr;
1527 }
1528 }
1529 }
1530
AcknowledgeMergeFailure(MergeFailureCode failure_code)1531 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1532 // Log first, so worst case, we always have a record of why the calls below
1533 // were being made.
1534 LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1535
1536 auto lock = LockExclusive();
1537 if (!lock) return;
1538
1539 // Since we released the lock in between WaitForMerge and here, it's
1540 // possible (1) the merge successfully completed or (2) was already
1541 // marked as a failure. So make sure to check the state again, and
1542 // only mark as a failure if appropriate.
1543 UpdateState state = ReadUpdateState(lock.get());
1544 if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1545 return;
1546 }
1547
1548 WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1549 }
1550
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1551 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1552 const SnapshotStatus& status) {
1553 if (!UpdateUsesUserSnapshots(lock)) {
1554 if (IsSnapshotDevice(name)) {
1555 // We are extra-cautious here, to avoid deleting the wrong table.
1556 std::string target_type;
1557 DmTargetSnapshot::Status dm_status;
1558 if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1559 return false;
1560 }
1561 if (target_type != "snapshot-merge") {
1562 LOG(ERROR) << "Unexpected target type " << target_type
1563 << " for snapshot device: " << name;
1564 return false;
1565 }
1566 if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1567 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1568 return false;
1569 }
1570 if (!CollapseSnapshotDevice(lock, name, status)) {
1571 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1572 return false;
1573 }
1574 }
1575 } else {
1576 // Just collapse the device - no need to query again as we just did
1577 // prior to calling this function
1578 if (!CollapseSnapshotDevice(lock, name, status)) {
1579 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1580 return false;
1581 }
1582 }
1583
1584 // Note that collapsing is implicitly an Unmap, so we don't need to
1585 // unmap the snapshot.
1586
1587 if (!DeleteSnapshot(lock, name)) {
1588 LOG(ERROR) << "Could not delete snapshot: " << name;
1589 return false;
1590 }
1591 return true;
1592 }
1593
CollapseSnapshotDevice(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1594 bool SnapshotManager::CollapseSnapshotDevice(LockedFile* lock, const std::string& name,
1595 const SnapshotStatus& status) {
1596 if (!UpdateUsesUserSnapshots(lock)) {
1597 // Verify we have a snapshot-merge device.
1598 DeviceMapper::TargetInfo target;
1599 if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1600 return false;
1601 }
1602 if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1603 // This should be impossible, it was checked earlier.
1604 LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1605 return false;
1606 }
1607
1608 std::string base_device, cow_device;
1609 if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1610 LOG(ERROR) << "Could not parse snapshot device " << name
1611 << " parameters: " << target.data;
1612 return false;
1613 }
1614 }
1615
1616 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1617 if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1618 LOG(ERROR) << "Snapshot " << name
1619 << " size is not sector aligned: " << status.snapshot_size();
1620 return false;
1621 }
1622
1623 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1624 // Create a DmTable that is identical to the base device.
1625 CreateLogicalPartitionParams base_device_params{
1626 .block_device = device_->GetSuperDevice(slot),
1627 .metadata_slot = slot,
1628 .partition_name = name,
1629 .partition_opener = &device_->GetPartitionOpener(),
1630 };
1631 DmTable table;
1632 if (!CreateDmTable(base_device_params, &table)) {
1633 LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1634 return false;
1635 }
1636
1637 if (!dm_.LoadTableAndActivate(name, table)) {
1638 return false;
1639 }
1640
1641 if (!UpdateUsesUserSnapshots(lock)) {
1642 // Attempt to delete the snapshot device if one still exists. Nothing
1643 // should be depending on the device, and device-mapper should have
1644 // flushed remaining I/O. We could in theory replace with dm-zero (or
1645 // re-use the table above), but for now it's better to know why this
1646 // would fail.
1647 //
1648 // Furthermore, we should not be trying to unmap for userspace snapshot
1649 // as unmap will fail since dm-user itself was a snapshot device prior
1650 // to switching of tables. Unmap will fail as the device will be mounted
1651 // by system partitions
1652 if (status.using_snapuserd()) {
1653 auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
1654 UnmapDmUserDevice(dm_user_name);
1655 }
1656 }
1657
1658 // We can't delete base device immediately as daemon holds a reference.
1659 // Make sure we wait for all the worker threads to terminate and release
1660 // the reference
1661 if (UpdateUsesUserSnapshots(lock) && EnsureSnapuserdConnected()) {
1662 if (!snapuserd_client_->WaitForDeviceDelete(name)) {
1663 LOG(ERROR) << "Failed to wait for " << name << " control device to delete";
1664 }
1665 }
1666
1667 auto base_name = GetBaseDeviceName(name);
1668 if (!DeleteDeviceIfExists(base_name)) {
1669 LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1670 }
1671
1672 if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1673 LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1674 }
1675
1676 return true;
1677 }
1678
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1679 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1680 const std::function<bool()>& before_cancel) {
1681 auto slot = GetCurrentSlot();
1682 if (slot == Slot::Unknown) {
1683 return false;
1684 }
1685
1686 // If all snapshots were reflashed, then cancel the entire update.
1687 if (AreAllSnapshotsCancelled(lock)) {
1688 LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1689 return RemoveAllUpdateState(lock, before_cancel);
1690 }
1691
1692 // If update has been rolled back, then cancel the entire update.
1693 // Client (update_engine) is responsible for doing additional cleanup work on its own states
1694 // when ProcessUpdateState() returns UpdateState::Cancelled.
1695 auto current_slot = GetCurrentSlot();
1696 if (current_slot != Slot::Source) {
1697 LOG(INFO) << "Update state is being processed while booting at " << current_slot
1698 << " slot, taking no action.";
1699 return false;
1700 }
1701
1702 // current_slot == Source. Attempt to detect rollbacks.
1703 if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1704 // This unverified update is not attempted. Take no action.
1705 PLOG(INFO) << "Rollback indicator not detected. "
1706 << "Update state is being processed before reboot, taking no action.";
1707 return false;
1708 }
1709
1710 LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1711 return RemoveAllUpdateState(lock, before_cancel);
1712 }
1713
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1714 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1715 std::vector<std::string>* snapuserd_argv) {
1716 LOG(INFO) << "Performing transition for snapuserd.";
1717
1718 // Don't use EnsureSnapuserdConnected() because this is called from init,
1719 // and attempting to do so will deadlock.
1720 if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1721 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1722 if (!snapuserd_client_) {
1723 LOG(ERROR) << "Unable to connect to snapuserd";
1724 return false;
1725 }
1726 }
1727
1728 auto lock = LockExclusive();
1729 if (!lock) return false;
1730
1731 std::vector<std::string> snapshots;
1732 if (!ListSnapshots(lock.get(), &snapshots)) {
1733 LOG(ERROR) << "Failed to list snapshots.";
1734 return false;
1735 }
1736
1737 if (UpdateUsesUserSnapshots(lock.get()) && transition == InitTransition::SELINUX_DETACH) {
1738 snapuserd_argv->emplace_back("-user_snapshot");
1739 if (UpdateUsesIouring(lock.get())) {
1740 snapuserd_argv->emplace_back("-io_uring");
1741 }
1742 if (UpdateUsesODirect(lock.get())) {
1743 snapuserd_argv->emplace_back("-o_direct");
1744 }
1745 uint cow_op_merge_size = GetUpdateCowOpMergeSize(lock.get());
1746 if (cow_op_merge_size != 0) {
1747 snapuserd_argv->emplace_back("-cow_op_merge_size=" + std::to_string(cow_op_merge_size));
1748 }
1749 uint32_t worker_count = GetUpdateWorkerCount(lock.get());
1750 if (worker_count != 0) {
1751 snapuserd_argv->emplace_back("-worker_count=" + std::to_string(worker_count));
1752 }
1753 }
1754
1755 size_t num_cows = 0;
1756 size_t ok_cows = 0;
1757 for (const auto& snapshot : snapshots) {
1758 std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
1759
1760 if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
1761 continue;
1762 }
1763
1764 DeviceMapper::TargetInfo target;
1765 if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1766 continue;
1767 }
1768
1769 auto target_type = DeviceMapper::GetTargetType(target.spec);
1770 if (target_type != "user") {
1771 LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1772 continue;
1773 }
1774
1775 num_cows++;
1776
1777 SnapshotStatus snapshot_status;
1778 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1779 LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1780 continue;
1781 }
1782
1783 auto misc_name = user_cow_name;
1784
1785 std::string source_device_name;
1786 if (snapshot_status.old_partition_size() > 0) {
1787 source_device_name = GetSourceDeviceName(snapshot);
1788 } else {
1789 source_device_name = GetBaseDeviceName(snapshot);
1790 }
1791
1792 std::string source_device;
1793 if (!dm_.GetDmDevicePathByName(source_device_name, &source_device)) {
1794 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1795 continue;
1796 }
1797
1798 std::string base_path_merge;
1799 if (!dm_.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &base_path_merge)) {
1800 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1801 continue;
1802 }
1803
1804 std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1805
1806 std::string cow_image_device;
1807 if (!dm_.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1808 LOG(ERROR) << "Could not get device path for " << cow_image_name;
1809 continue;
1810 }
1811
1812 if (transition == InitTransition::SELINUX_DETACH) {
1813 if (!UpdateUsesUserSnapshots(lock.get())) {
1814 auto message = misc_name + "," + cow_image_device + "," + source_device;
1815 snapuserd_argv->emplace_back(std::move(message));
1816 } else {
1817 auto message = misc_name + "," + cow_image_device + "," + source_device + "," +
1818 base_path_merge;
1819 snapuserd_argv->emplace_back(std::move(message));
1820 }
1821 SetReadAheadSize(cow_image_device, snapshot_status.read_ahead_size());
1822 SetReadAheadSize(source_device, snapshot_status.read_ahead_size());
1823
1824 // Do not attempt to connect to the new snapuserd yet, it hasn't
1825 // been started. We do however want to wait for the misc device
1826 // to have been created.
1827 ok_cows++;
1828 continue;
1829 }
1830
1831 DmTable table;
1832 table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1833 if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
1834 LOG(ERROR) << "Unable to swap tables for " << misc_name;
1835 continue;
1836 }
1837
1838 // Wait for ueventd to acknowledge and create the control device node.
1839 std::string control_device = "/dev/dm-user/" + misc_name;
1840 if (!WaitForDevice(control_device, 10s)) {
1841 LOG(ERROR) << "dm-user control device no found: " << misc_name;
1842 continue;
1843 }
1844
1845 uint64_t base_sectors;
1846 if (!UpdateUsesUserSnapshots(lock.get())) {
1847 base_sectors =
1848 snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1849 } else {
1850 base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_image_device,
1851 source_device, base_path_merge);
1852 }
1853
1854 if (base_sectors == 0) {
1855 // Unrecoverable as metadata reads from cow device failed
1856 LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1857 return false;
1858 }
1859
1860 CHECK(base_sectors <= target.spec.length);
1861
1862 if (!snapuserd_client_->AttachDmUser(misc_name)) {
1863 // This error is unrecoverable. We cannot proceed because reads to
1864 // the underlying device will fail.
1865 LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1866 return false;
1867 }
1868
1869 ok_cows++;
1870 }
1871
1872 if (ok_cows != num_cows) {
1873 LOG(ERROR) << "Could not transition all snapuserd consumers.";
1874 return false;
1875 }
1876 return true;
1877 }
1878
ReadCurrentMetadata()1879 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1880 const auto& opener = device_->GetPartitionOpener();
1881 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1882 auto super_device = device_->GetSuperDevice(slot);
1883 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1884 if (!metadata) {
1885 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1886 return nullptr;
1887 }
1888 return metadata;
1889 }
1890
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1891 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1892 const LpMetadata& metadata, const std::string& name) {
1893 auto partition = android::fs_mgr::FindPartition(metadata, name);
1894 if (!partition) return MetadataPartitionState::None;
1895 if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1896 return MetadataPartitionState::Updated;
1897 }
1898 return MetadataPartitionState::Flashed;
1899 }
1900
AreAllSnapshotsCancelled(LockedFile * lock)1901 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1902 std::vector<std::string> snapshots;
1903 if (!ListSnapshots(lock, &snapshots)) {
1904 LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1905 << "after applying an update. Assuming no snapshots.";
1906 // Let HandleCancelledUpdate resets UpdateState.
1907 return true;
1908 }
1909
1910 std::map<std::string, bool> flashing_status;
1911
1912 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1913 LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1914 << "removing update states.";
1915 return false;
1916 }
1917
1918 bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1919 [](const auto& pair) { return pair.second; });
1920
1921 if (all_snapshots_cancelled) {
1922 LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1923 }
1924 return all_snapshots_cancelled;
1925 }
1926
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1927 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1928 const std::vector<std::string>& snapshots,
1929 std::map<std::string, bool>* out) {
1930 CHECK(lock);
1931
1932 auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1933 if (source_slot_suffix.empty()) {
1934 return false;
1935 }
1936 uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1937 uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1938
1939 // Attempt to detect re-flashing on each partition.
1940 // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1941 // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1942 // deleted. Caller is responsible for merging the rest of the snapshots.
1943 // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1944 //
1945 // Note that we use target slot metadata, since if an OTA has been applied
1946 // to the target slot, we can detect the UPDATED flag. Any kind of flash
1947 // operation against dynamic partitions ensures that all copies of the
1948 // metadata are in sync, so flashing all partitions on the source slot will
1949 // remove the UPDATED flag on the target slot as well.
1950 const auto& opener = device_->GetPartitionOpener();
1951 auto super_device = device_->GetSuperDevice(target_slot);
1952 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1953 if (!metadata) {
1954 return false;
1955 }
1956
1957 for (const auto& snapshot_name : snapshots) {
1958 if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1959 MetadataPartitionState::Updated) {
1960 out->emplace(snapshot_name, false);
1961 } else {
1962 // Delete snapshots for partitions that are re-flashed after the update.
1963 LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1964 out->emplace(snapshot_name, true);
1965 }
1966 }
1967 return true;
1968 }
1969
RemoveInvalidSnapshots(LockedFile * lock)1970 void SnapshotManager::RemoveInvalidSnapshots(LockedFile* lock) {
1971 std::vector<std::string> snapshots;
1972
1973 // Remove the stale snapshot metadata
1974 //
1975 // We make sure that all the three cases
1976 // are valid before removing the snapshot metadata:
1977 //
1978 // 1: dm state is active
1979 // 2: Root fs is not mounted off as a snapshot device
1980 // 3: Snapshot slot suffix should match current device slot
1981 if (!ListSnapshots(lock, &snapshots, device_->GetSlotSuffix()) || snapshots.empty()) {
1982 return;
1983 }
1984
1985 // We indeed have some invalid snapshots
1986 for (const auto& name : snapshots) {
1987 if (dm_.GetState(name) == DmDeviceState::ACTIVE && !IsSnapshotDevice(name)) {
1988 if (!DeleteSnapshot(lock, name)) {
1989 LOG(ERROR) << "Failed to delete invalid snapshot: " << name;
1990 } else {
1991 LOG(INFO) << "Invalid snapshot: " << name << " deleted";
1992 }
1993 }
1994 }
1995 }
1996
RemoveAllSnapshots(LockedFile * lock)1997 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1998 std::vector<std::string> snapshots;
1999 if (!ListSnapshots(lock, &snapshots)) {
2000 LOG(ERROR) << "Could not list snapshots";
2001 return false;
2002 }
2003
2004 std::map<std::string, bool> flashing_status;
2005 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
2006 LOG(WARNING) << "Failed to get flashing status";
2007 }
2008
2009 auto current_slot = GetCurrentSlot();
2010 bool ok = true;
2011 bool has_mapped_cow_images = false;
2012 for (const auto& name : snapshots) {
2013 // If booting off source slot, it is okay to unmap and delete all the snapshots.
2014 // If boot indicator is missing, update state is None or Initiated, so
2015 // it is also okay to unmap and delete all the snapshots.
2016 // If booting off target slot,
2017 // - should not unmap because:
2018 // - In Android mode, snapshots are not mapped, but
2019 // filesystems are mounting off dm-linear targets directly.
2020 // - In recovery mode, assume nothing is mapped, so it is optional to unmap.
2021 // - If partition is flashed or unknown, it is okay to delete snapshots.
2022 // Otherwise (UPDATED flag), only delete snapshots if they are not mapped
2023 // as dm-snapshot (for example, after merge completes).
2024 bool should_unmap = current_slot != Slot::Target;
2025 bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
2026 if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
2027 // Something very unexpected has happened - we want to unmap this
2028 // snapshot, but it's on the wrong slot. We can't unmap an active
2029 // partition. If this is not really a snapshot, skip the unmap
2030 // step.
2031 if (dm_.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
2032 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
2033 << " for source partition; removing without unmap.";
2034 should_unmap = false;
2035 }
2036 }
2037
2038 bool partition_ok = true;
2039 if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
2040 partition_ok = false;
2041 }
2042 if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
2043 partition_ok = false;
2044 }
2045
2046 if (!partition_ok) {
2047 // Remember whether or not we were able to unmap the cow image.
2048 auto cow_image_device = GetCowImageDeviceName(name);
2049 has_mapped_cow_images |=
2050 (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
2051
2052 ok = false;
2053 }
2054 }
2055
2056 if (ok || !has_mapped_cow_images) {
2057 // Delete any image artifacts as a precaution, in case an update is
2058 // being cancelled due to some corrupted state in an lp_metadata file.
2059 // Note that we do not do this if some cow images are still mapped,
2060 // since we must not remove backing storage if it's in use.
2061 if (!EnsureImageManager() || !images_->RemoveAllImages()) {
2062 LOG(ERROR) << "Could not remove all snapshot artifacts";
2063 return false;
2064 }
2065 }
2066 return ok;
2067 }
2068
2069 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)2070 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
2071 Slot current_slot, const std::string& name) {
2072 if (current_slot != Slot::Target) {
2073 return true;
2074 }
2075 auto it = flashing_status.find(name);
2076 if (it == flashing_status.end()) {
2077 LOG(WARNING) << "Can't determine flashing status for " << name;
2078 return true;
2079 }
2080 if (it->second) {
2081 // partition flashed, okay to delete obsolete snapshots
2082 return true;
2083 }
2084 return !IsSnapshotDevice(name);
2085 }
2086
GetUpdateState(double * progress)2087 UpdateState SnapshotManager::GetUpdateState(double* progress) {
2088 // If we've never started an update, the state file won't exist.
2089 auto state_file = GetStateFilePath();
2090 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2091 return UpdateState::None;
2092 }
2093
2094 auto lock = LockShared();
2095 if (!lock) {
2096 return UpdateState::None;
2097 }
2098
2099 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
2100 auto state = update_status.state();
2101 if (progress == nullptr) {
2102 return state;
2103 }
2104
2105 if (state == UpdateState::MergeCompleted) {
2106 *progress = 100.0;
2107 return state;
2108 }
2109
2110 *progress = 0.0;
2111 if (state != UpdateState::Merging) {
2112 return state;
2113 }
2114
2115 if (!UpdateUsesUserSnapshots(lock.get())) {
2116 // Sum all the snapshot states as if the system consists of a single huge
2117 // snapshots device, then compute the merge completion percentage of that
2118 // device.
2119 std::vector<std::string> snapshots;
2120 if (!ListSnapshots(lock.get(), &snapshots)) {
2121 LOG(ERROR) << "Could not list snapshots";
2122 return state;
2123 }
2124
2125 DmTargetSnapshot::Status fake_snapshots_status = {};
2126 for (const auto& snapshot : snapshots) {
2127 DmTargetSnapshot::Status current_status;
2128
2129 if (!IsSnapshotDevice(snapshot)) continue;
2130 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) continue;
2131
2132 fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
2133 fake_snapshots_status.total_sectors += current_status.total_sectors;
2134 fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
2135 }
2136
2137 *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
2138 update_status.sectors_allocated());
2139 } else {
2140 if (EnsureSnapuserdConnected()) {
2141 *progress = snapuserd_client_->GetMergePercent();
2142 }
2143 }
2144
2145 return state;
2146 }
2147
IsSnapshotWithoutSlotSwitch()2148 bool SnapshotManager::IsSnapshotWithoutSlotSwitch() {
2149 return (access(GetBootSnapshotsWithoutSlotSwitchPath().c_str(), F_OK) == 0);
2150 }
2151
UpdateUsesCompression()2152 bool SnapshotManager::UpdateUsesCompression() {
2153 auto lock = LockShared();
2154 if (!lock) return false;
2155 return UpdateUsesCompression(lock.get());
2156 }
2157
UpdateUsesCompression(LockedFile * lock)2158 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
2159 // This returns true even if compression is "none", since update_engine is
2160 // really just trying to see if snapuserd is in use.
2161 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2162 return update_status.using_snapuserd();
2163 }
2164
UpdateUsesIouring(LockedFile * lock)2165 bool SnapshotManager::UpdateUsesIouring(LockedFile* lock) {
2166 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2167 return update_status.io_uring_enabled();
2168 }
2169
UpdateUsesODirect(LockedFile * lock)2170 bool SnapshotManager::UpdateUsesODirect(LockedFile* lock) {
2171 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2172 return update_status.o_direct();
2173 }
2174
GetUpdateCowOpMergeSize(LockedFile * lock)2175 uint32_t SnapshotManager::GetUpdateCowOpMergeSize(LockedFile* lock) {
2176 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2177 return update_status.cow_op_merge_size();
2178 }
2179
GetUpdateWorkerCount(LockedFile * lock)2180 uint32_t SnapshotManager::GetUpdateWorkerCount(LockedFile* lock) {
2181 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2182 return update_status.num_worker_threads();
2183 }
2184
MarkSnapuserdFromSystem()2185 bool SnapshotManager::MarkSnapuserdFromSystem() {
2186 auto path = GetSnapuserdFromSystemPath();
2187
2188 if (!android::base::WriteStringToFile("1", path)) {
2189 PLOG(ERROR) << "Unable to write to vendor update path: " << path;
2190 return false;
2191 }
2192
2193 unique_fd fd(open(path.c_str(), O_PATH));
2194 if (fd < 0) {
2195 PLOG(ERROR) << "Failed to open file: " << path;
2196 return false;
2197 }
2198
2199 /*
2200 * This function is invoked by first stage init and hence we need to
2201 * explicitly set the correct selinux label for this file as update_engine
2202 * will try to remove this file later on once the snapshot merge is
2203 * complete.
2204 */
2205 if (fsetxattr(fd.get(), XATTR_NAME_SELINUX, kOtaFileContext, strlen(kOtaFileContext) + 1, 0) <
2206 0) {
2207 PLOG(ERROR) << "fsetxattr for the path: " << path << " failed";
2208 }
2209
2210 return true;
2211 }
2212
2213 /*
2214 * Please see b/304829384 for more details.
2215 *
2216 * In Android S, we use dm-snapshot for mounting snapshots and snapshot-merge
2217 * process. If the vendor partition continues to be on Android S, then
2218 * "snapuserd" binary in first stage ramdisk will be from vendor partition.
2219 * Thus, we need to maintain backward compatibility.
2220 *
2221 * Now, We take a two step approach to maintain the backward compatibility:
2222 *
2223 * 1: During OTA installation, we will continue to use "user-space" snapshots
2224 * for OTA installation as both update-engine and snapuserd binary will be from system partition.
2225 * However, during installation, we mark "legacy_snapuserd" in
2226 * SnapshotUpdateStatus file to mark that this is a path to support backward compatibility.
2227 * Thus, this function will return "false" during OTA installation.
2228 *
2229 * 2: Post OTA reboot, there are two key steps:
2230 * a: During first stage init, "init" and "snapuserd" could be from vendor
2231 * partition. This could be from Android S. Thus, the snapshot mount path
2232 * will be based off dm-snapshot.
2233 *
2234 * b: Post selinux transition, "init" and "update-engine" will be "system"
2235 * partition. Now, since the snapshots are mounted off dm-snapshot,
2236 * update-engine interaction with "snapuserd" should work based off
2237 * dm-snapshots.
2238 *
2239 * TL;DR: update-engine will use the "system" snapuserd for installing new
2240 * updates (this is safe as there is no "vendor" snapuserd running during
2241 * installation). Post reboot, update-engine will use the legacy path when
2242 * communicating with "vendor" snapuserd that was started in first-stage
2243 * init. Hence, this function checks:
2244 * i: Are we in post OTA reboot
2245 * ii: Is the Vendor from Android 12
2246 * iii: If both (i) and (ii) are true, then use the dm-snapshot based
2247 * approach.
2248 *
2249 * 3: Post OTA reboot, if the vendor partition was updated from Android 12 to
2250 * any other release post Android 12, then snapuserd binary will be "system"
2251 * partition as post Android 12, init_boot will contain a copy of snapuserd
2252 * binary. Thus, during first stage init, if init is able to communicate to
2253 * daemon, that gives us a signal that the binary is from "system" copy. Hence,
2254 * there is no need to fallback to legacy dm-snapshot. Thus, init will use a
2255 * marker in /metadata to signal that the snapuserd binary from first stage init
2256 * can handle userspace snapshots.
2257 *
2258 */
IsLegacySnapuserdPostReboot()2259 bool SnapshotManager::IsLegacySnapuserdPostReboot() {
2260 auto slot = GetCurrentSlot();
2261 if (slot == Slot::Target) {
2262 /*
2263 If this marker is present, the daemon can handle userspace snapshots.
2264 During post-OTA reboot, this implies that the vendor partition is
2265 Android 13 or higher. If the snapshots were created on an
2266 Android 12 vendor, this means the vendor partition has been updated.
2267 */
2268 if (access(GetSnapuserdFromSystemPath().c_str(), F_OK) == 0) {
2269 is_snapshot_userspace_ = true;
2270 return false;
2271 }
2272 // If the marker isn't present and if the vendor is still in Android 12
2273 if (is_legacy_snapuserd_.has_value() && is_legacy_snapuserd_.value() == true) {
2274 return true;
2275 }
2276 }
2277
2278 return false;
2279 }
2280
UpdateUsesUserSnapshots()2281 bool SnapshotManager::UpdateUsesUserSnapshots() {
2282 // This and the following function is constantly
2283 // invoked during snapshot merge. We want to avoid
2284 // constantly reading from disk. Hence, store this
2285 // value in memory.
2286 //
2287 // Furthermore, this value in the disk is set
2288 // only when OTA is applied and doesn't change
2289 // during merge phase. Hence, once we know that
2290 // the value is read from disk the very first time,
2291 // it is safe to read successive checks from memory.
2292
2293 if (is_snapshot_userspace_.has_value()) {
2294 // Check if legacy snapuserd is running post OTA reboot
2295 if (IsLegacySnapuserdPostReboot()) {
2296 return false;
2297 }
2298 return is_snapshot_userspace_.value();
2299 }
2300
2301 auto lock = LockShared();
2302 if (!lock) return false;
2303
2304 return UpdateUsesUserSnapshots(lock.get());
2305 }
2306
UpdateUsesUserSnapshots(LockedFile * lock)2307 bool SnapshotManager::UpdateUsesUserSnapshots(LockedFile* lock) {
2308 if (!is_snapshot_userspace_.has_value()) {
2309 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2310 is_snapshot_userspace_ = update_status.userspace_snapshots();
2311 is_legacy_snapuserd_ = update_status.legacy_snapuserd();
2312 }
2313
2314 if (IsLegacySnapuserdPostReboot()) {
2315 return false;
2316 }
2317
2318 return is_snapshot_userspace_.value();
2319 }
2320
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)2321 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
2322 const std::string& suffix) {
2323 CHECK(lock);
2324
2325 auto dir_path = metadata_dir_ + "/snapshots"s;
2326 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
2327 if (!dir) {
2328 PLOG(ERROR) << "opendir failed: " << dir_path;
2329 return false;
2330 }
2331
2332 struct dirent* dp;
2333 while ((dp = readdir(dir.get())) != nullptr) {
2334 if (dp->d_type != DT_REG) continue;
2335
2336 std::string name(dp->d_name);
2337 if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
2338 continue;
2339 }
2340
2341 // Insert system and product partition at the beginning so that
2342 // during snapshot-merge, these partitions are merged first.
2343 if (name == "system_a" || name == "system_b" || name == "product_a" ||
2344 name == "product_b") {
2345 snapshots->insert(snapshots->begin(), std::move(name));
2346 } else {
2347 snapshots->emplace_back(std::move(name));
2348 }
2349 }
2350
2351 return true;
2352 }
2353
IsSnapshotManagerNeeded()2354 bool SnapshotManager::IsSnapshotManagerNeeded() {
2355 if (access(kBootIndicatorPath, F_OK) == 0) {
2356 return true;
2357 }
2358
2359 if (IsScratchOtaMetadataOnSuper()) {
2360 return true;
2361 }
2362
2363 return false;
2364 }
2365
MapTempOtaMetadataPartitionIfNeeded(const std::function<bool (const std::string &)> & init)2366 bool SnapshotManager::MapTempOtaMetadataPartitionIfNeeded(
2367 const std::function<bool(const std::string&)>& init) {
2368 auto device = android::snapshot::GetScratchOtaMetadataPartition();
2369 if (!device.empty()) {
2370 init(device);
2371 if (android::snapshot::MapScratchOtaMetadataPartition(device).empty()) {
2372 return false;
2373 }
2374 }
2375 return true;
2376 }
2377
GetGlobalRollbackIndicatorPath()2378 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
2379 return kRollbackIndicatorPath;
2380 }
2381
NeedSnapshotsInFirstStageMount()2382 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
2383 if (IsSnapshotWithoutSlotSwitch()) {
2384 if (GetCurrentSlot() != Slot::Source) {
2385 LOG(ERROR) << "Snapshots marked to boot without slot switch; but slot is wrong";
2386 return false;
2387 }
2388 return true;
2389 }
2390 // If we fail to read, we'll wind up using CreateLogicalPartitions, which
2391 // will create devices that look like the old slot, except with extra
2392 // content at the end of each device. This will confuse dm-verity, and
2393 // ultimately we'll fail to boot. Why not make it a fatal error and have
2394 // the reason be clearer? Because the indicator file still exists, and
2395 // if this was FATAL, reverting to the old slot would be broken.
2396 auto slot = GetCurrentSlot();
2397
2398 if (slot != Slot::Target) {
2399 if (slot == Slot::Source) {
2400 // Device is rebooting into the original slot, so mark this as a
2401 // rollback.
2402 auto path = GetRollbackIndicatorPath();
2403 if (!android::base::WriteStringToFile("1", path)) {
2404 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
2405 } else {
2406 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
2407 if (device_->IsTempMetadata()) {
2408 CleanupScratchOtaMetadataIfPresent();
2409 }
2410 }
2411 }
2412 LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
2413 return false;
2414 }
2415
2416 // If we can't read the update state, it's unlikely anything else will
2417 // succeed, so this is a fatal error. We'll eventually exhaust boot
2418 // attempts and revert to the old slot.
2419 auto lock = LockShared();
2420 if (!lock) {
2421 LOG(FATAL) << "Could not read update state to determine snapshot status";
2422 return false;
2423 }
2424 switch (ReadUpdateState(lock.get())) {
2425 case UpdateState::Unverified:
2426 case UpdateState::Merging:
2427 case UpdateState::MergeFailed:
2428 return true;
2429 default:
2430 return false;
2431 }
2432 }
2433
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)2434 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
2435 const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
2436 LOG(INFO) << "Creating logical partitions with snapshots as needed";
2437
2438 auto lock = LockExclusive();
2439 if (!lock) return false;
2440
2441 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2442 return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
2443 }
2444
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)2445 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
2446 uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
2447 const auto& opener = device_->GetPartitionOpener();
2448 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2449 if (!metadata) {
2450 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2451 return false;
2452 }
2453
2454 if (!EnsureImageManager()) {
2455 return false;
2456 }
2457
2458 for (const auto& partition : metadata->partitions) {
2459 if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
2460 LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
2461 << kCowGroupName;
2462 continue;
2463 }
2464
2465 if (GetPartitionName(partition) ==
2466 android::base::Basename(android::snapshot::kOtaMetadataMount)) {
2467 LOG(INFO) << "Partition: " << GetPartitionName(partition) << " skipping";
2468 continue;
2469 }
2470
2471 CreateLogicalPartitionParams params = {
2472 .block_device = super_device,
2473 .metadata = metadata.get(),
2474 .partition = &partition,
2475 .timeout_ms = timeout_ms,
2476 .partition_opener = &opener,
2477 };
2478 if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
2479 return false;
2480 }
2481 }
2482
2483 LOG(INFO) << "Created logical partitions with snapshot.";
2484 return true;
2485 }
2486
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)2487 static std::chrono::milliseconds GetRemainingTime(
2488 const std::chrono::milliseconds& timeout,
2489 const std::chrono::time_point<std::chrono::steady_clock>& begin) {
2490 // If no timeout is specified, execute all commands without specifying any timeout.
2491 if (timeout.count() == 0) return std::chrono::milliseconds(0);
2492 auto passed_time = std::chrono::steady_clock::now() - begin;
2493 auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
2494 if (remaining_time.count() <= 0) {
2495 LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
2496 << remaining_time.count() << "ms remaining)";
2497 // Return min() instead of remaining_time here because 0 is treated as a special value for
2498 // no timeout, where the rest of the commands will still be executed.
2499 return std::chrono::milliseconds::min();
2500 }
2501 return remaining_time;
2502 }
2503
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)2504 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
2505 CreateLogicalPartitionParams params,
2506 SnapshotContext context, SnapshotPaths* paths) {
2507 auto begin = std::chrono::steady_clock::now();
2508
2509 CHECK(lock);
2510
2511 if (params.GetPartitionName() != params.GetDeviceName()) {
2512 LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
2513 << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
2514 return false;
2515 }
2516
2517 // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
2518 // reading super partition metadata).
2519 CreateLogicalPartitionParams::OwnedData params_owned_data;
2520 if (!params.InitDefaults(¶ms_owned_data)) {
2521 return false;
2522 }
2523
2524 if (!params.partition->num_extents) {
2525 LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
2526 return true; // leave path empty to indicate that nothing is mapped.
2527 }
2528
2529 // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
2530 // partition still has a snapshot that needs to be mapped. If no live snapshot or merge
2531 // completed, live_snapshot_status is set to nullopt.
2532 std::optional<SnapshotStatus> live_snapshot_status;
2533 do {
2534 if (!IsSnapshotWithoutSlotSwitch() &&
2535 !(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
2536 LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
2537 << params.GetPartitionName();
2538 break;
2539 }
2540 auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
2541 if (access(file_path.c_str(), F_OK) != 0) {
2542 if (errno != ENOENT) {
2543 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2544 << ": Can't access " << file_path;
2545 return false;
2546 }
2547 break;
2548 }
2549 live_snapshot_status = std::make_optional<SnapshotStatus>();
2550 if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2551 return false;
2552 }
2553 // No live snapshot if merge is completed.
2554 if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2555 live_snapshot_status.reset();
2556 }
2557
2558 if (live_snapshot_status->state() == SnapshotState::NONE ||
2559 live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2560 0) {
2561 LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2562 << " is invalid, ignoring: state = "
2563 << SnapshotState_Name(live_snapshot_status->state())
2564 << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2565 << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2566 live_snapshot_status.reset();
2567 }
2568 } while (0);
2569
2570 if (live_snapshot_status.has_value()) {
2571 // dm-snapshot requires the base device to be writable.
2572 params.force_writable = true;
2573 // Map the base device with a different name to avoid collision.
2574 params.device_name = GetBaseDeviceName(params.GetPartitionName());
2575 }
2576
2577 AutoDeviceList created_devices;
2578
2579 // Create the base device for the snapshot, or if there is no snapshot, the
2580 // device itself. This device consists of the real blocks in the super
2581 // partition that this logical partition occupies.
2582 std::string base_path;
2583 if (!CreateLogicalPartition(params, &base_path)) {
2584 LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2585 << " as device " << params.GetDeviceName();
2586 return false;
2587 }
2588 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, params.GetDeviceName());
2589
2590 if (paths) {
2591 paths->target_device = base_path;
2592 }
2593
2594 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2595 if (remaining_time.count() < 0) {
2596 return false;
2597 }
2598
2599 // Wait for the base device to appear
2600 if (!WaitForDevice(base_path, remaining_time)) {
2601 return false;
2602 }
2603
2604 if (!live_snapshot_status.has_value()) {
2605 created_devices.Release();
2606 return true;
2607 }
2608
2609 // We don't have ueventd in first-stage init, so use device major:minor
2610 // strings instead.
2611 std::string base_device;
2612 if (!dm_.GetDeviceString(params.GetDeviceName(), &base_device)) {
2613 LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2614 return false;
2615 }
2616
2617 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2618 if (remaining_time.count() < 0) return false;
2619
2620 std::string cow_name;
2621 CreateLogicalPartitionParams cow_params = params;
2622 cow_params.timeout_ms = remaining_time;
2623 if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2624 return false;
2625 }
2626 std::string cow_device;
2627 if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2628 LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2629 return false;
2630 }
2631 if (paths) {
2632 paths->cow_device_name = cow_name;
2633 }
2634
2635 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2636 if (remaining_time.count() < 0) return false;
2637
2638 if (context == SnapshotContext::Update && live_snapshot_status->using_snapuserd()) {
2639 // Stop here, we can't run dm-user yet, the COW isn't built.
2640 created_devices.Release();
2641 return true;
2642 }
2643
2644 if (live_snapshot_status->using_snapuserd()) {
2645 // Get the source device (eg the view of the partition from before it was resized).
2646 std::string source_device_path;
2647 if (live_snapshot_status->old_partition_size() > 0) {
2648 if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2649 &source_device_path)) {
2650 LOG(ERROR) << "Could not map source device for: " << cow_name;
2651 return false;
2652 }
2653
2654 auto source_device = GetSourceDeviceName(params.GetPartitionName());
2655 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, source_device);
2656 } else {
2657 source_device_path = base_path;
2658 }
2659
2660 if (!WaitForDevice(source_device_path, remaining_time)) {
2661 return false;
2662 }
2663
2664 std::string cow_path;
2665 if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2666 LOG(ERROR) << "Could not determine path for: " << cow_name;
2667 return false;
2668 }
2669 if (!WaitForDevice(cow_path, remaining_time)) {
2670 return false;
2671 }
2672
2673 auto name = GetDmUserCowName(params.GetPartitionName(), GetSnapshotDriver(lock));
2674
2675 std::string new_cow_device;
2676 if (!MapDmUserCow(lock, name, cow_path, source_device_path, base_path, remaining_time,
2677 &new_cow_device)) {
2678 LOG(ERROR) << "Could not map dm-user device for partition "
2679 << params.GetPartitionName();
2680 return false;
2681 }
2682 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, name);
2683
2684 cow_device = new_cow_device;
2685 }
2686
2687 // For userspace snapshots, dm-user block device itself will act as a
2688 // snapshot device. There is one subtle difference - MapSnapshot will create
2689 // either snapshot target or snapshot-merge target based on the underlying
2690 // state of the snapshot device. If snapshot-merge target is created, merge
2691 // will immediately start in the kernel.
2692 //
2693 // This is no longer true with respect to userspace snapshots. When dm-user
2694 // block device is created, we just have the snapshots ready but daemon in
2695 // the user-space will not start the merge. We have to explicitly inform the
2696 // daemon to resume the merge. Check ProcessUpdateState() call stack.
2697 if (!UpdateUsesUserSnapshots(lock)) {
2698 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2699 if (remaining_time.count() < 0) return false;
2700
2701 std::string path;
2702 if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2703 &path)) {
2704 LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2705 return false;
2706 }
2707 // No need to add params.GetPartitionName() to created_devices since it is immediately
2708 // released.
2709
2710 if (paths) {
2711 paths->snapshot_device = path;
2712 }
2713 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2714 } else {
2715 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at "
2716 << cow_device;
2717 }
2718
2719 created_devices.Release();
2720
2721 return true;
2722 }
2723
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2724 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2725 const std::string& target_partition_name) {
2726 CHECK(lock);
2727
2728 if (!UnmapSnapshot(lock, target_partition_name)) {
2729 return false;
2730 }
2731
2732 if (!UnmapCowDevices(lock, target_partition_name)) {
2733 return false;
2734 }
2735
2736 auto base_name = GetBaseDeviceName(target_partition_name);
2737 if (!DeleteDeviceIfExists(base_name)) {
2738 LOG(ERROR) << "Cannot delete base device: " << base_name;
2739 return false;
2740 }
2741
2742 auto source_name = GetSourceDeviceName(target_partition_name);
2743 if (!DeleteDeviceIfExists(source_name)) {
2744 LOG(ERROR) << "Cannot delete source device: " << source_name;
2745 return false;
2746 }
2747
2748 LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2749
2750 return true;
2751 }
2752
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2753 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2754 const SnapshotStatus& snapshot_status,
2755 AutoDeviceList* created_devices, std::string* cow_name) {
2756 CHECK(lock);
2757 CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2758 auto begin = std::chrono::steady_clock::now();
2759
2760 std::string partition_name = params.GetPartitionName();
2761 std::string cow_image_name = GetCowImageDeviceName(partition_name);
2762 *cow_name = GetCowName(partition_name);
2763
2764 // Map COW image if necessary.
2765 if (snapshot_status.cow_file_size() > 0) {
2766 if (!EnsureImageManager()) return false;
2767 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2768 if (remaining_time.count() < 0) return false;
2769
2770 if (!MapCowImage(partition_name, remaining_time).has_value()) {
2771 LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2772 return false;
2773 }
2774 created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2775
2776 // If no COW partition exists, just return the image alone.
2777 if (snapshot_status.cow_partition_size() == 0) {
2778 *cow_name = std::move(cow_image_name);
2779 LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2780 return true;
2781 }
2782 }
2783
2784 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2785 if (remaining_time.count() < 0) return false;
2786
2787 CHECK(snapshot_status.cow_partition_size() > 0);
2788
2789 // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2790 // COW image device as the last extent.
2791 CreateLogicalPartitionParams cow_partition_params = params;
2792 cow_partition_params.partition = nullptr;
2793 cow_partition_params.partition_name = *cow_name;
2794 cow_partition_params.device_name.clear();
2795 DmTable table;
2796 if (!CreateDmTable(cow_partition_params, &table)) {
2797 return false;
2798 }
2799 // If the COW image exists, append it as the last extent.
2800 if (snapshot_status.cow_file_size() > 0) {
2801 std::string cow_image_device;
2802 if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2803 LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2804 return false;
2805 }
2806 auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2807 auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2808 table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2809 0);
2810 }
2811
2812 // We have created the DmTable now. Map it.
2813 std::string cow_path;
2814 if (!dm_.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2815 LOG(ERROR) << "Could not create COW device: " << *cow_name;
2816 return false;
2817 }
2818 created_devices->EmplaceBack<AutoUnmapDevice>(&dm_, *cow_name);
2819 LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2820 return true;
2821 }
2822
UnmapCowDevices(LockedFile * lock,const std::string & name)2823 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2824 CHECK(lock);
2825 if (!EnsureImageManager()) return false;
2826
2827 if (UpdateUsesCompression(lock) && !UpdateUsesUserSnapshots(lock)) {
2828 auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
2829 if (!UnmapDmUserDevice(dm_user_name)) {
2830 return false;
2831 }
2832 }
2833
2834 if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2835 LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2836 return false;
2837 }
2838
2839 std::string cow_image_name = GetCowImageDeviceName(name);
2840 if (!images_->UnmapImageIfExists(cow_image_name)) {
2841 LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2842 return false;
2843 }
2844 return true;
2845 }
2846
UnmapDmUserDevice(const std::string & dm_user_name)2847 bool SnapshotManager::UnmapDmUserDevice(const std::string& dm_user_name) {
2848 if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2849 return true;
2850 }
2851
2852 if (!DeleteDeviceIfExists(dm_user_name)) {
2853 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2854 return false;
2855 }
2856
2857 if (EnsureSnapuserdConnected()) {
2858 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2859 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2860 return false;
2861 }
2862 }
2863
2864 // Ensure the control device is gone so we don't run into ABA problems.
2865 auto control_device = "/dev/dm-user/" + dm_user_name;
2866 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2867 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2868 return false;
2869 }
2870 return true;
2871 }
2872
UnmapUserspaceSnapshotDevice(LockedFile * lock,const std::string & snapshot_name)2873 bool SnapshotManager::UnmapUserspaceSnapshotDevice(LockedFile* lock,
2874 const std::string& snapshot_name) {
2875 auto dm_user_name = GetDmUserCowName(snapshot_name, GetSnapshotDriver(lock));
2876 if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2877 return true;
2878 }
2879
2880 CHECK(lock);
2881
2882 SnapshotStatus snapshot_status;
2883
2884 if (!ReadSnapshotStatus(lock, snapshot_name, &snapshot_status)) {
2885 return false;
2886 }
2887 // If the merge is complete, then we switch dm tables which is equivalent
2888 // to unmap; hence, we can't be deleting the device
2889 // as the table would be mounted off partitions and will fail.
2890 if (snapshot_status.state() != SnapshotState::MERGE_COMPLETED) {
2891 if (!DeleteDeviceIfExists(dm_user_name, 4000ms)) {
2892 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2893 return false;
2894 }
2895 }
2896
2897 if (EnsureSnapuserdConnected()) {
2898 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2899 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2900 return false;
2901 }
2902 }
2903
2904 // Ensure the control device is gone so we don't run into ABA problems.
2905 auto control_device = "/dev/dm-user/" + dm_user_name;
2906 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2907 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2908 return false;
2909 }
2910 return true;
2911 }
2912
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2913 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2914 auto lock = LockExclusive();
2915 if (!lock) return false;
2916
2917 auto state = ReadUpdateState(lock.get());
2918 if (state == UpdateState::Unverified) {
2919 if (GetCurrentSlot() == Slot::Target) {
2920 LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2921 return false;
2922 }
2923 } else if (state != UpdateState::Initiated) {
2924 LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2925 return false;
2926 }
2927
2928 std::vector<std::string> snapshots;
2929 if (!ListSnapshots(lock.get(), &snapshots)) {
2930 return false;
2931 }
2932
2933 const auto& opener = device_->GetPartitionOpener();
2934 auto slot_suffix = device_->GetOtherSlotSuffix();
2935 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2936 auto super_device = device_->GetSuperDevice(slot_number);
2937 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2938 if (!metadata) {
2939 LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2940 << super_device;
2941 return false;
2942 }
2943
2944 for (const auto& snapshot : snapshots) {
2945 if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2946 LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2947 return false;
2948 }
2949
2950 CreateLogicalPartitionParams params = {
2951 .block_device = super_device,
2952 .metadata = metadata.get(),
2953 .partition_name = snapshot,
2954 .timeout_ms = timeout_ms,
2955 .partition_opener = &opener,
2956 };
2957 if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2958 nullptr)) {
2959 LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2960 return false;
2961 }
2962 }
2963
2964 LOG(INFO) << "MapAllSnapshots succeeded.";
2965 return true;
2966 }
2967
UnmapAllSnapshots()2968 bool SnapshotManager::UnmapAllSnapshots() {
2969 auto lock = LockExclusive();
2970 if (!lock) return false;
2971
2972 return UnmapAllSnapshots(lock.get());
2973 }
2974
UnmapAllSnapshots(LockedFile * lock)2975 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2976 LOG(INFO) << "Lock acquired for " << __FUNCTION__;
2977 std::vector<std::string> snapshots;
2978 if (!ListSnapshots(lock, &snapshots)) {
2979 return false;
2980 }
2981 LOG(INFO) << "Found " << snapshots.size() << " partitions with snapshots";
2982
2983 for (const auto& snapshot : snapshots) {
2984 if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2985 LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2986 return false;
2987 }
2988 }
2989 LOG(INFO) << "Unmapped " << snapshots.size() << " partitions with snapshots";
2990
2991 // Terminate the daemon and release the snapuserd_client_ object.
2992 // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2993 // will re-create the object and establish the socket connection.
2994 if (snapuserd_client_) {
2995 LOG(INFO) << "Shutdown snapuserd daemon";
2996 snapuserd_client_->DetachSnapuserd();
2997 snapuserd_client_ = nullptr;
2998 }
2999
3000 return true;
3001 }
3002
OpenFile(const std::string & file,int lock_flags)3003 auto SnapshotManager::OpenFile(const std::string& file,
3004 int lock_flags) -> std::unique_ptr<LockedFile> {
3005 const auto start = std::chrono::system_clock::now();
3006 unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
3007 if (fd < 0) {
3008 PLOG(ERROR) << "Open failed: " << file;
3009 return nullptr;
3010 }
3011 if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
3012 PLOG(ERROR) << "Acquire flock failed: " << file;
3013 return nullptr;
3014 }
3015 // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
3016 // calls, so strip extra flags.
3017 int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
3018 const auto end = std::chrono::system_clock::now();
3019 const auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
3020 if (duration_ms >= 1000ms) {
3021 LOG(INFO) << "Taking lock on " << file << " took " << duration_ms.count() << "ms";
3022 }
3023 return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
3024 }
3025
~LockedFile()3026 SnapshotManager::LockedFile::~LockedFile() {
3027 if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
3028 PLOG(ERROR) << "Failed to unlock file: " << path_;
3029 }
3030 }
3031
GetStateFilePath() const3032 std::string SnapshotManager::GetStateFilePath() const {
3033 return metadata_dir_ + "/state"s;
3034 }
3035
GetMergeStateFilePath() const3036 std::string SnapshotManager::GetMergeStateFilePath() const {
3037 return metadata_dir_ + "/merge_state"s;
3038 }
3039
GetLockPath() const3040 std::string SnapshotManager::GetLockPath() const {
3041 return metadata_dir_;
3042 }
3043
OpenLock(int lock_flags)3044 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
3045 auto lock_file = GetLockPath();
3046 return OpenFile(lock_file, lock_flags);
3047 }
3048
LockShared()3049 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
3050 return OpenLock(LOCK_SH);
3051 }
3052
LockExclusive()3053 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
3054 return OpenLock(LOCK_EX);
3055 }
3056
UpdateStateFromString(const std::string & contents)3057 static UpdateState UpdateStateFromString(const std::string& contents) {
3058 if (contents.empty() || contents == "none") {
3059 return UpdateState::None;
3060 } else if (contents == "initiated") {
3061 return UpdateState::Initiated;
3062 } else if (contents == "unverified") {
3063 return UpdateState::Unverified;
3064 } else if (contents == "merging") {
3065 return UpdateState::Merging;
3066 } else if (contents == "merge-completed") {
3067 return UpdateState::MergeCompleted;
3068 } else if (contents == "merge-needs-reboot") {
3069 return UpdateState::MergeNeedsReboot;
3070 } else if (contents == "merge-failed") {
3071 return UpdateState::MergeFailed;
3072 } else if (contents == "cancelled") {
3073 return UpdateState::Cancelled;
3074 } else {
3075 LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
3076 return UpdateState::None;
3077 }
3078 }
3079
operator <<(std::ostream & os,UpdateState state)3080 std::ostream& operator<<(std::ostream& os, UpdateState state) {
3081 switch (state) {
3082 case UpdateState::None:
3083 return os << "none";
3084 case UpdateState::Initiated:
3085 return os << "initiated";
3086 case UpdateState::Unverified:
3087 return os << "unverified";
3088 case UpdateState::Merging:
3089 return os << "merging";
3090 case UpdateState::MergeCompleted:
3091 return os << "merge-completed";
3092 case UpdateState::MergeNeedsReboot:
3093 return os << "merge-needs-reboot";
3094 case UpdateState::MergeFailed:
3095 return os << "merge-failed";
3096 case UpdateState::Cancelled:
3097 return os << "cancelled";
3098 default:
3099 LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
3100 return os;
3101 }
3102 }
3103
operator <<(std::ostream & os,MergePhase phase)3104 std::ostream& operator<<(std::ostream& os, MergePhase phase) {
3105 switch (phase) {
3106 case MergePhase::NO_MERGE:
3107 return os << "none";
3108 case MergePhase::FIRST_PHASE:
3109 return os << "first";
3110 case MergePhase::SECOND_PHASE:
3111 return os << "second";
3112 default:
3113 LOG(ERROR) << "Unknown merge phase: " << static_cast<uint32_t>(phase);
3114 return os << "unknown(" << static_cast<uint32_t>(phase) << ")";
3115 }
3116 }
3117
ReadUpdateState(LockedFile * lock)3118 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
3119 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
3120 return status.state();
3121 }
3122
ReadSnapshotUpdateStatus(LockedFile * lock)3123 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
3124 CHECK(lock);
3125
3126 SnapshotUpdateStatus status = {};
3127 std::string contents;
3128 if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
3129 PLOG(ERROR) << "Read state file failed";
3130 status.set_state(UpdateState::None);
3131 return status;
3132 }
3133
3134 if (!status.ParseFromString(contents)) {
3135 LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
3136
3137 // Try to rollback to legacy file to support devices that are
3138 // currently using the old file format.
3139 // TODO(b/147409432)
3140 status.set_state(UpdateStateFromString(contents));
3141 }
3142
3143 return status;
3144 }
3145
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)3146 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
3147 MergeFailureCode failure_code) {
3148 SnapshotUpdateStatus status;
3149 status.set_state(state);
3150
3151 switch (state) {
3152 case UpdateState::MergeFailed:
3153 status.set_merge_failure_code(failure_code);
3154 break;
3155 case UpdateState::Initiated:
3156 status.set_source_build_fingerprint(
3157 android::base::GetProperty("ro.build.fingerprint", ""));
3158 break;
3159 default:
3160 break;
3161 }
3162
3163 // If we're transitioning between two valid states (eg, we're not beginning
3164 // or ending an OTA), then make sure to propagate the compression bit and
3165 // build fingerprint.
3166 if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
3167 SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
3168 status.set_using_snapuserd(old_status.using_snapuserd());
3169 status.set_source_build_fingerprint(old_status.source_build_fingerprint());
3170 status.set_merge_phase(old_status.merge_phase());
3171 status.set_userspace_snapshots(old_status.userspace_snapshots());
3172 status.set_io_uring_enabled(old_status.io_uring_enabled());
3173 status.set_legacy_snapuserd(old_status.legacy_snapuserd());
3174 status.set_o_direct(old_status.o_direct());
3175 status.set_cow_op_merge_size(old_status.cow_op_merge_size());
3176 status.set_num_worker_threads(old_status.num_worker_threads());
3177 }
3178 return WriteSnapshotUpdateStatus(lock, status);
3179 }
3180
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)3181 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
3182 const SnapshotUpdateStatus& status) {
3183 CHECK(lock);
3184 CHECK(lock->lock_mode() == LOCK_EX);
3185
3186 std::string contents;
3187 if (!status.SerializeToString(&contents)) {
3188 LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
3189 return false;
3190 }
3191
3192 #ifdef LIBSNAPSHOT_USE_HAL
3193 auto merge_status = MergeStatus::UNKNOWN;
3194 switch (status.state()) {
3195 // The needs-reboot and completed cases imply that /data and /metadata
3196 // can be safely wiped, so we don't report a merge status.
3197 case UpdateState::None:
3198 case UpdateState::MergeNeedsReboot:
3199 case UpdateState::MergeCompleted:
3200 case UpdateState::Initiated:
3201 merge_status = MergeStatus::NONE;
3202 break;
3203 case UpdateState::Unverified:
3204 merge_status = MergeStatus::SNAPSHOTTED;
3205 break;
3206 case UpdateState::Merging:
3207 case UpdateState::MergeFailed:
3208 merge_status = MergeStatus::MERGING;
3209 break;
3210 default:
3211 // Note that Cancelled flows to here - it is never written, since
3212 // it only communicates a transient state to the caller.
3213 LOG(ERROR) << "Unexpected update status: " << status.state();
3214 break;
3215 }
3216
3217 bool set_before_write =
3218 merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
3219 if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
3220 return false;
3221 }
3222 #endif
3223
3224 if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
3225 PLOG(ERROR) << "Could not write to state file";
3226 return false;
3227 }
3228
3229 #ifdef LIBSNAPSHOT_USE_HAL
3230 if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
3231 return false;
3232 }
3233 #endif
3234 return true;
3235 }
3236
GetSnapshotStatusFilePath(const std::string & name)3237 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
3238 auto file = metadata_dir_ + "/snapshots/"s + name;
3239 return file;
3240 }
3241
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)3242 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
3243 SnapshotStatus* status) {
3244 CHECK(lock);
3245 auto path = GetSnapshotStatusFilePath(name);
3246
3247 unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
3248 if (fd < 0) {
3249 PLOG(ERROR) << "Open failed: " << path;
3250 return false;
3251 }
3252
3253 if (!status->ParseFromFileDescriptor(fd.get())) {
3254 PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
3255 return false;
3256 }
3257
3258 if (status->name() != name) {
3259 LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
3260 status->set_name(name);
3261 }
3262
3263 return true;
3264 }
3265
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)3266 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
3267 // The caller must take an exclusive lock to modify snapshots.
3268 CHECK(lock);
3269 CHECK(lock->lock_mode() == LOCK_EX);
3270 CHECK(!status.name().empty());
3271
3272 auto path = GetSnapshotStatusFilePath(status.name());
3273
3274 std::string content;
3275 if (!status.SerializeToString(&content)) {
3276 LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
3277 return false;
3278 }
3279
3280 if (!WriteStringToFileAtomic(content, path)) {
3281 PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
3282 return false;
3283 }
3284
3285 return true;
3286 }
3287
EnsureImageManager()3288 bool SnapshotManager::EnsureImageManager() {
3289 if (images_) return true;
3290
3291 images_ = device_->OpenImageManager();
3292 if (!images_) {
3293 LOG(ERROR) << "Could not open ImageManager";
3294 return false;
3295 }
3296 return true;
3297 }
3298
EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms)3299 bool SnapshotManager::EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms) {
3300 if (snapuserd_client_) {
3301 return true;
3302 }
3303
3304 if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
3305 return false;
3306 }
3307
3308 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, timeout_ms);
3309 if (!snapuserd_client_) {
3310 LOG(ERROR) << "Unable to connect to snapuserd";
3311 return false;
3312 }
3313 return true;
3314 }
3315
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)3316 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
3317 std::vector<std::string> to_delete;
3318 for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
3319 if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
3320 LOG(WARNING) << existing_cow_partition->name()
3321 << " cannot be unmapped and its space cannot be reclaimed";
3322 continue;
3323 }
3324 to_delete.push_back(existing_cow_partition->name());
3325 }
3326 for (const auto& name : to_delete) {
3327 current_metadata->RemovePartition(name);
3328 }
3329 }
3330
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3331 static Return AddRequiredSpace(Return orig,
3332 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3333 if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
3334 return orig;
3335 }
3336 uint64_t sum = 0;
3337 for (auto&& [name, status] : all_snapshot_status) {
3338 sum += status.cow_file_size();
3339 }
3340 LOG(INFO) << "Calculated needed COW space: " << sum << " bytes";
3341 return Return::NoSpace(sum);
3342 }
3343
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)3344 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
3345 auto lock = LockExclusive();
3346 if (!lock) return Return::Error();
3347
3348 auto update_state = ReadUpdateState(lock.get());
3349 if (update_state != UpdateState::Initiated) {
3350 LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
3351 return Return::Error();
3352 }
3353
3354 // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
3355 // partition takes up a big chunk of space in super, causing COW images to be created on
3356 // retrofit Virtual A/B devices.
3357 if (device_->IsOverlayfsSetup()) {
3358 LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
3359 << ", reboot, then try again.";
3360 return Return::Error();
3361 }
3362
3363 const auto& opener = device_->GetPartitionOpener();
3364 auto current_suffix = device_->GetSlotSuffix();
3365 uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
3366 auto target_suffix = device_->GetOtherSlotSuffix();
3367 uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
3368 auto current_super = device_->GetSuperDevice(current_slot);
3369
3370 auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
3371 if (current_metadata == nullptr) {
3372 LOG(ERROR) << "Cannot create metadata builder.";
3373 return Return::Error();
3374 }
3375
3376 auto target_metadata =
3377 MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
3378 if (target_metadata == nullptr) {
3379 LOG(ERROR) << "Cannot create target metadata builder.";
3380 return Return::Error();
3381 }
3382
3383 // Delete partitions with target suffix in |current_metadata|. Otherwise,
3384 // partition_cow_creator recognizes these left-over partitions as used space.
3385 for (const auto& group_name : current_metadata->ListGroups()) {
3386 if (android::base::EndsWith(group_name, target_suffix)) {
3387 current_metadata->RemoveGroupAndPartitions(group_name);
3388 }
3389 }
3390
3391 SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
3392 if (!metadata_updater.Update()) {
3393 LOG(ERROR) << "Cannot calculate new metadata.";
3394 return Return::Error();
3395 }
3396
3397 // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
3398 // free regions.
3399 UnmapAndDeleteCowPartition(current_metadata.get());
3400
3401 // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
3402 // devices with retrofit dynamic partitions does not make sense.
3403 // This ensures that current_metadata->GetFreeRegions() uses the same device
3404 // indices as target_metadata (i.e. 0 -> "super").
3405 // This is also assumed in MapCowDevices() call below.
3406 CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
3407 target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
3408
3409 const auto& dap_metadata = manifest.dynamic_partition_metadata();
3410
3411 std::string vabc_disable_reason;
3412 if (!dap_metadata.vabc_enabled()) {
3413 vabc_disable_reason = "not enabled metadata";
3414 } else if (device_->IsRecovery()) {
3415 vabc_disable_reason = "recovery";
3416 } else if (!KernelSupportsCompressedSnapshots()) {
3417 vabc_disable_reason = "kernel missing userspace block device support";
3418 }
3419
3420 // Deduce supported features.
3421 bool userspace_snapshots = CanUseUserspaceSnapshots();
3422 bool legacy_compression = GetLegacyCompressionEnabledProperty();
3423 bool is_legacy_snapuserd = IsVendorFromAndroid12();
3424
3425 if (!vabc_disable_reason.empty()) {
3426 if (userspace_snapshots) {
3427 LOG(INFO) << "Userspace snapshots disabled: " << vabc_disable_reason;
3428 }
3429 if (legacy_compression) {
3430 LOG(INFO) << "Compression disabled: " << vabc_disable_reason;
3431 }
3432 userspace_snapshots = false;
3433 legacy_compression = false;
3434 is_legacy_snapuserd = false;
3435 }
3436
3437 if (legacy_compression || userspace_snapshots) {
3438 if (dap_metadata.cow_version() < kMinCowVersion ||
3439 dap_metadata.cow_version() > kMaxCowVersion) {
3440 LOG(ERROR) << "Manifest cow version is out of bounds (got: "
3441 << dap_metadata.cow_version() << ", min: " << kMinCowVersion
3442 << ", max: " << kMaxCowVersion << ")";
3443 return Return::Error();
3444 }
3445 }
3446
3447 if (!userspace_snapshots && is_legacy_snapuserd && legacy_compression) {
3448 userspace_snapshots = true;
3449 LOG(INFO) << "Vendor from Android 12. Enabling userspace snapshot for OTA install";
3450 }
3451
3452 const bool using_snapuserd = userspace_snapshots || legacy_compression;
3453 if (!using_snapuserd) {
3454 LOG(INFO) << "Using legacy Virtual A/B (dm-snapshot)";
3455 }
3456
3457 std::string compression_algorithm;
3458 uint64_t compression_factor{};
3459 if (using_snapuserd) {
3460 compression_algorithm = dap_metadata.vabc_compression_param();
3461 compression_factor = dap_metadata.compression_factor();
3462 if (compression_algorithm.empty()) {
3463 // Older OTAs don't set an explicit compression type, so default to gz.
3464 compression_algorithm = "gz";
3465 }
3466 LOG(INFO) << "using compression algorithm: " << compression_algorithm
3467 << ", max compressible block size: " << compression_factor;
3468 }
3469 auto read_ahead_size =
3470 android::base::GetUintProperty<uint>("ro.virtual_ab.read_ahead_size", kReadAheadSizeKb);
3471 PartitionCowCreator cow_creator{
3472 .target_metadata = target_metadata.get(),
3473 .target_suffix = target_suffix,
3474 .target_partition = nullptr,
3475 .current_metadata = current_metadata.get(),
3476 .current_suffix = current_suffix,
3477 .update = nullptr,
3478 .extra_extents = {},
3479 .using_snapuserd = using_snapuserd,
3480 .compression_algorithm = compression_algorithm,
3481 .compression_factor = compression_factor,
3482 .read_ahead_size = read_ahead_size,
3483 };
3484
3485 if (dap_metadata.vabc_feature_set().has_threaded()) {
3486 cow_creator.enable_threading = dap_metadata.vabc_feature_set().threaded();
3487 }
3488 if (dap_metadata.vabc_feature_set().has_batch_writes()) {
3489 cow_creator.batched_writes = dap_metadata.vabc_feature_set().batch_writes();
3490 }
3491
3492 // In case of error, automatically delete devices that are created along the way.
3493 // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
3494 // these devices.
3495 AutoDeviceList created_devices;
3496 std::map<std::string, SnapshotStatus> all_snapshot_status;
3497 auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
3498 &all_snapshot_status);
3499 if (!ret.is_ok()) {
3500 LOG(ERROR) << "CreateUpdateSnapshotsInternal failed: " << ret.string();
3501 return ret;
3502 }
3503
3504 auto exported_target_metadata = target_metadata->Export();
3505 if (exported_target_metadata == nullptr) {
3506 LOG(ERROR) << "Cannot export target metadata";
3507 return Return::Error();
3508 }
3509
3510 ret = InitializeUpdateSnapshots(lock.get(), dap_metadata.cow_version(), target_metadata.get(),
3511 exported_target_metadata.get(), target_suffix,
3512 all_snapshot_status);
3513 if (!ret.is_ok()) return ret;
3514
3515 if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
3516 *exported_target_metadata, target_slot)) {
3517 LOG(ERROR) << "Cannot write target metadata";
3518 return Return::Error();
3519 }
3520
3521 // If snapuserd is enabled, we need to retain a copy of the old metadata
3522 // so we can access original blocks in case they are moved around. We do
3523 // not want to rely on the old super metadata slot because we don't
3524 // guarantee its validity after the slot switch is successful.
3525 if (using_snapuserd) {
3526 auto metadata = current_metadata->Export();
3527 if (!metadata) {
3528 LOG(ERROR) << "Could not export current metadata";
3529 return Return::Error();
3530 }
3531
3532 auto path = GetOldPartitionMetadataPath();
3533 if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
3534 LOG(ERROR) << "Cannot write old metadata to " << path;
3535 return Return::Error();
3536 }
3537 }
3538
3539 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3540 status.set_state(update_state);
3541 status.set_using_snapuserd(using_snapuserd);
3542
3543 if (userspace_snapshots) {
3544 status.set_userspace_snapshots(true);
3545 LOG(INFO) << "Virtual A/B using userspace snapshots";
3546
3547 if (GetIouringEnabledProperty()) {
3548 status.set_io_uring_enabled(true);
3549 LOG(INFO) << "io_uring for snapshots enabled";
3550 }
3551 if (GetODirectEnabledProperty()) {
3552 status.set_o_direct(true);
3553 LOG(INFO) << "o_direct for source image enabled";
3554 }
3555 if (is_legacy_snapuserd) {
3556 status.set_legacy_snapuserd(true);
3557 LOG(INFO) << "Setting legacy_snapuserd to true";
3558 }
3559 status.set_cow_op_merge_size(
3560 android::base::GetUintProperty<uint32_t>("ro.virtual_ab.cow_op_merge_size", 0));
3561 status.set_num_worker_threads(
3562 android::base::GetUintProperty<uint32_t>("ro.virtual_ab.num_worker_threads", 0));
3563
3564 } else if (legacy_compression) {
3565 LOG(INFO) << "Virtual A/B using legacy snapuserd";
3566 } else {
3567 LOG(INFO) << "Virtual A/B using dm-snapshot";
3568 }
3569
3570 is_snapshot_userspace_.emplace(userspace_snapshots);
3571 is_legacy_snapuserd_.emplace(is_legacy_snapuserd);
3572
3573 if (!device()->IsTestDevice() && using_snapuserd) {
3574 // Terminate stale daemon if any
3575 std::unique_ptr<SnapuserdClient> snapuserd_client = std::move(snapuserd_client_);
3576 if (!snapuserd_client) {
3577 snapuserd_client = SnapuserdClient::TryConnect(kSnapuserdSocket, 5s);
3578 }
3579 if (snapuserd_client) {
3580 snapuserd_client->DetachSnapuserd();
3581 snapuserd_client = nullptr;
3582 }
3583 }
3584
3585 if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
3586 LOG(ERROR) << "Unable to write new update state";
3587 return Return::Error();
3588 }
3589
3590 created_devices.Release();
3591 LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
3592
3593 return Return::Ok();
3594 }
3595
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)3596 Return SnapshotManager::CreateUpdateSnapshotsInternal(
3597 LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
3598 AutoDeviceList* created_devices,
3599 std::map<std::string, SnapshotStatus>* all_snapshot_status) {
3600 CHECK(lock);
3601
3602 auto* target_metadata = cow_creator->target_metadata;
3603 const auto& target_suffix = cow_creator->target_suffix;
3604
3605 if (!target_metadata->AddGroup(kCowGroupName, 0)) {
3606 LOG(ERROR) << "Cannot add group " << kCowGroupName;
3607 return Return::Error();
3608 }
3609
3610 std::map<std::string, const PartitionUpdate*> partition_map;
3611 std::map<std::string, std::vector<Extent>> extra_extents_map;
3612 for (const auto& partition_update : manifest.partitions()) {
3613 auto suffixed_name = partition_update.partition_name() + target_suffix;
3614 auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
3615 if (!inserted) {
3616 LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
3617 << " in update manifest.";
3618 return Return::Error();
3619 }
3620
3621 auto& extra_extents = extra_extents_map[suffixed_name];
3622 if (partition_update.has_hash_tree_extent()) {
3623 extra_extents.push_back(partition_update.hash_tree_extent());
3624 }
3625 if (partition_update.has_fec_extent()) {
3626 extra_extents.push_back(partition_update.fec_extent());
3627 }
3628 }
3629
3630 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3631 cow_creator->target_partition = target_partition;
3632 cow_creator->update = nullptr;
3633 auto iter = partition_map.find(target_partition->name());
3634 if (iter != partition_map.end()) {
3635 cow_creator->update = iter->second;
3636 } else {
3637 LOG(INFO) << target_partition->name()
3638 << " isn't included in the payload, skipping the cow creation.";
3639 continue;
3640 }
3641
3642 cow_creator->extra_extents.clear();
3643 auto extra_extents_it = extra_extents_map.find(target_partition->name());
3644 if (extra_extents_it != extra_extents_map.end()) {
3645 cow_creator->extra_extents = std::move(extra_extents_it->second);
3646 }
3647
3648 // Compute the device sizes for the partition.
3649 auto cow_creator_ret = cow_creator->Run();
3650 if (!cow_creator_ret.has_value()) {
3651 LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
3652 return Return::Error();
3653 }
3654
3655 LOG(INFO) << "For partition " << target_partition->name()
3656 << ", device size = " << cow_creator_ret->snapshot_status.device_size()
3657 << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
3658 << ", cow partition size = "
3659 << cow_creator_ret->snapshot_status.cow_partition_size()
3660 << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
3661
3662 // Delete any existing snapshot before re-creating one.
3663 if (!DeleteSnapshot(lock, target_partition->name())) {
3664 LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
3665 << target_partition->name();
3666 return Return::Error();
3667 }
3668
3669 // It is possible that the whole partition uses free space in super, and snapshot / COW
3670 // would not be needed. In this case, skip the partition.
3671 bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
3672 bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
3673 cow_creator_ret->snapshot_status.cow_file_size()) > 0;
3674 CHECK(needs_snapshot == needs_cow);
3675
3676 if (!needs_snapshot) {
3677 LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
3678 << "because nothing needs to be snapshotted.";
3679 continue;
3680 }
3681
3682 // Find the original partition size.
3683 auto name = target_partition->name();
3684 auto old_partition_name =
3685 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
3686 auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
3687 if (old_partition) {
3688 cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
3689 }
3690
3691 // Store these device sizes to snapshot status file.
3692 if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
3693 return Return::Error();
3694 }
3695 created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
3696
3697 // Create the COW partition. That is, use any remaining free space in super partition before
3698 // creating the COW images.
3699 if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
3700 CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
3701 << "cow_partition_size == "
3702 << cow_creator_ret->snapshot_status.cow_partition_size()
3703 << " is not a multiple of sector size " << kSectorSize;
3704 auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
3705 kCowGroupName, 0 /* flags */);
3706 if (cow_partition == nullptr) {
3707 return Return::Error();
3708 }
3709
3710 if (!target_metadata->ResizePartition(
3711 cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
3712 cow_creator_ret->cow_partition_usable_regions)) {
3713 LOG(ERROR) << "Cannot create COW partition on metadata with size "
3714 << cow_creator_ret->snapshot_status.cow_partition_size();
3715 return Return::Error();
3716 }
3717 // Only the in-memory target_metadata is modified; nothing to clean up if there is an
3718 // error in the future.
3719 }
3720
3721 all_snapshot_status->emplace(target_partition->name(),
3722 std::move(cow_creator_ret->snapshot_status));
3723
3724 LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
3725 }
3726
3727 LOG(INFO) << "Allocating CoW images.";
3728
3729 for (auto&& [name, snapshot_status] : *all_snapshot_status) {
3730 // Create the backing COW image if necessary.
3731 if (snapshot_status.cow_file_size() > 0) {
3732 auto ret = CreateCowImage(lock, name);
3733 if (!ret.is_ok()) {
3734 LOG(ERROR) << "CreateCowImage failed: " << ret.string();
3735 return AddRequiredSpace(ret, *all_snapshot_status);
3736 }
3737 }
3738
3739 LOG(INFO) << "Successfully created snapshot for " << name;
3740 }
3741
3742 return Return::Ok();
3743 }
3744
InitializeUpdateSnapshots(LockedFile * lock,uint32_t cow_version,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3745 Return SnapshotManager::InitializeUpdateSnapshots(
3746 LockedFile* lock, uint32_t cow_version, MetadataBuilder* target_metadata,
3747 const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3748 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3749 CHECK(lock);
3750
3751 CreateLogicalPartitionParams cow_params{
3752 .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3753 .metadata = exported_target_metadata,
3754 .timeout_ms = std::chrono::milliseconds::max(),
3755 .partition_opener = &device_->GetPartitionOpener(),
3756 };
3757 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3758 AutoDeviceList created_devices_for_cow;
3759
3760 if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3761 LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3762 << target_partition->name();
3763 return Return::Error();
3764 }
3765
3766 auto it = all_snapshot_status.find(target_partition->name());
3767 if (it == all_snapshot_status.end()) continue;
3768 cow_params.partition_name = target_partition->name();
3769 std::string cow_name;
3770 if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3771 return Return::Error();
3772 }
3773
3774 std::string cow_path;
3775 if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3776 LOG(ERROR) << "Cannot determine path for " << cow_name;
3777 return Return::Error();
3778 }
3779
3780 if (!android::fs_mgr::WaitForFile(cow_path, 6s)) {
3781 LOG(ERROR) << "Timed out waiting for device to appear: " << cow_path;
3782 return Return::Error();
3783 }
3784
3785 if (it->second.using_snapuserd()) {
3786 unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3787 if (fd < 0) {
3788 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3789 << cow_params.partition_name;
3790 return Return::Error();
3791 }
3792
3793 CowOptions options;
3794 if (device()->IsTestDevice()) {
3795 options.scratch_space = false;
3796 }
3797 options.compression = it->second.compression_algorithm();
3798 if (cow_version >= 3) {
3799 options.op_count_max = it->second.estimated_ops_buffer_size();
3800 options.max_blocks = {it->second.device_size() / options.block_size};
3801 }
3802
3803 auto writer = CreateCowWriter(cow_version, options, std::move(fd));
3804 if (!writer->Finalize()) {
3805 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3806 return Return::Error();
3807 }
3808 } else {
3809 auto ret = InitializeKernelCow(cow_path);
3810 if (!ret.is_ok()) {
3811 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3812 << cow_path;
3813 return AddRequiredSpace(ret, all_snapshot_status);
3814 }
3815 }
3816 // Let destructor of created_devices_for_cow to unmap the COW devices.
3817 };
3818 return Return::Ok();
3819 }
3820
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3821 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3822 std::string* snapshot_path) {
3823 auto lock = LockShared();
3824 if (!lock) return false;
3825 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3826 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3827 << params.GetPartitionName();
3828 return false;
3829 }
3830
3831 SnapshotStatus status;
3832 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3833 return false;
3834 }
3835 if (status.using_snapuserd()) {
3836 LOG(ERROR) << "Cannot use MapUpdateSnapshot with snapuserd";
3837 return false;
3838 }
3839
3840 SnapshotPaths paths;
3841 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3842 return false;
3843 }
3844
3845 if (!paths.snapshot_device.empty()) {
3846 *snapshot_path = paths.snapshot_device;
3847 } else {
3848 *snapshot_path = paths.target_device;
3849 }
3850 DCHECK(!snapshot_path->empty());
3851 return true;
3852 }
3853
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,std::optional<uint64_t> label)3854 std::unique_ptr<ICowWriter> SnapshotManager::OpenSnapshotWriter(
3855 const android::fs_mgr::CreateLogicalPartitionParams& params,
3856 std::optional<uint64_t> label) {
3857 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3858 (void)params;
3859 (void)label;
3860
3861 LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3862 return nullptr;
3863 #else
3864 // First unmap any existing mapping.
3865 auto lock = LockShared();
3866 if (!lock) return nullptr;
3867 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3868 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3869 << params.GetPartitionName();
3870 return nullptr;
3871 }
3872
3873 SnapshotPaths paths;
3874 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3875 return nullptr;
3876 }
3877
3878 SnapshotStatus status;
3879 if (!paths.cow_device_name.empty()) {
3880 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3881 return nullptr;
3882 }
3883 } else {
3884 // Currently, partition_cow_creator always creates snapshots. The
3885 // reason is that if partition X shrinks while partition Y grows, we
3886 // cannot bindly write to the newly freed extents in X. This would
3887 // make the old slot unusable. So, the entire size of the target
3888 // partition is currently considered snapshottable.
3889 LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3890 return nullptr;
3891 }
3892
3893 if (!status.using_snapuserd()) {
3894 LOG(ERROR) << "Can only create snapshot writers with userspace or compressed snapshots";
3895 return nullptr;
3896 }
3897
3898 return OpenCompressedSnapshotWriter(lock.get(), status, paths, label);
3899 #endif
3900 }
3901
3902 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
OpenCompressedSnapshotWriter(LockedFile * lock,const SnapshotStatus & status,const SnapshotPaths & paths,std::optional<uint64_t> label)3903 std::unique_ptr<ICowWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3904 LockedFile* lock, const SnapshotStatus& status, const SnapshotPaths& paths,
3905 std::optional<uint64_t> label) {
3906 CHECK(lock);
3907
3908 CowOptions cow_options;
3909 cow_options.compression = status.compression_algorithm();
3910 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3911 cow_options.batch_write = status.batched_writes();
3912 cow_options.num_compress_threads = status.enable_threading() ? 2 : 1;
3913 cow_options.op_count_max = status.estimated_ops_buffer_size();
3914 cow_options.compression_factor = status.compression_factor();
3915 // Disable scratch space for vts tests
3916 if (device()->IsTestDevice()) {
3917 cow_options.scratch_space = false;
3918 }
3919
3920 // Currently we don't support partial snapshots, since partition_cow_creator
3921 // never creates this scenario.
3922 CHECK(status.snapshot_size() == status.device_size());
3923
3924 std::string cow_path;
3925 if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3926 LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3927 return nullptr;
3928 }
3929
3930 unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3931 if (cow_fd < 0) {
3932 PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3933 return nullptr;
3934 }
3935
3936 CowHeaderV3 header;
3937 if (!ReadCowHeader(cow_fd, &header)) {
3938 LOG(ERROR) << "OpenCompressedSnapshotWriter: read header failed";
3939 return nullptr;
3940 }
3941
3942 return CreateCowWriter(header.prefix.major_version, cow_options, std::move(cow_fd), label);
3943 }
3944 #endif // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3945
UnmapUpdateSnapshot(const std::string & target_partition_name)3946 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3947 auto lock = LockShared();
3948 if (!lock) return false;
3949 return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3950 }
3951
UnmapAllPartitionsInRecovery()3952 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3953 auto lock = LockExclusive();
3954 if (!lock) return false;
3955
3956 const auto& opener = device_->GetPartitionOpener();
3957 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3958 auto super_device = device_->GetSuperDevice(slot);
3959 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3960 if (!metadata) {
3961 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3962 return false;
3963 }
3964
3965 bool ok = true;
3966 for (const auto& partition : metadata->partitions) {
3967 auto partition_name = GetPartitionName(partition);
3968 ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3969 }
3970 return ok;
3971 }
3972
operator <<(std::ostream & os,SnapshotManager::Slot slot)3973 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3974 switch (slot) {
3975 case SnapshotManager::Slot::Unknown:
3976 return os << "unknown";
3977 case SnapshotManager::Slot::Source:
3978 return os << "source";
3979 case SnapshotManager::Slot::Target:
3980 return os << "target";
3981 }
3982 }
3983
Dump(std::ostream & os)3984 bool SnapshotManager::Dump(std::ostream& os) {
3985 // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3986 // if it is racy.
3987 auto file = OpenLock(0 /* lock flag */);
3988 if (!file) return false;
3989
3990 std::stringstream ss;
3991
3992 auto update_status = ReadSnapshotUpdateStatus(file.get());
3993
3994 ss << "Update state: " << update_status.state() << std::endl;
3995 ss << "Using snapuserd: " << update_status.using_snapuserd() << std::endl;
3996 ss << "Using userspace snapshots: " << update_status.userspace_snapshots() << std::endl;
3997 ss << "Using io_uring: " << update_status.io_uring_enabled() << std::endl;
3998 ss << "Using o_direct: " << update_status.o_direct() << std::endl;
3999 ss << "Cow op merge size (0 for uncapped): " << update_status.cow_op_merge_size() << std::endl;
4000 ss << "Worker thread count: " << update_status.num_worker_threads() << std::endl;
4001 ss << "Using XOR compression: " << GetXorCompressionEnabledProperty() << std::endl;
4002 ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
4003 ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
4004 ss << "Rollback indicator: "
4005 << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
4006 << std::endl;
4007 ss << "Forward merge indicator: "
4008 << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
4009 << std::endl;
4010 ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
4011
4012 if (update_status.state() == UpdateState::Merging) {
4013 ss << "Merge completion: ";
4014 if (!EnsureSnapuserdConnected()) {
4015 ss << "N/A";
4016 } else {
4017 ss << snapuserd_client_->GetMergePercent() << "%";
4018 }
4019 ss << std::endl;
4020 ss << "Merge phase: " << update_status.merge_phase() << std::endl;
4021 }
4022
4023 bool ok = true;
4024 std::vector<std::string> snapshots;
4025 if (!ListSnapshots(file.get(), &snapshots)) {
4026 LOG(ERROR) << "Could not list snapshots";
4027 snapshots.clear();
4028 ok = false;
4029 }
4030 for (const auto& name : snapshots) {
4031 ss << "Snapshot: " << name << std::endl;
4032 SnapshotStatus status;
4033 if (!ReadSnapshotStatus(file.get(), name, &status)) {
4034 ok = false;
4035 continue;
4036 }
4037 ss << " state: " << SnapshotState_Name(status.state()) << std::endl;
4038 ss << " device size (bytes): " << status.device_size() << std::endl;
4039 ss << " snapshot size (bytes): " << status.snapshot_size() << std::endl;
4040 ss << " cow partition size (bytes): " << status.cow_partition_size() << std::endl;
4041 ss << " cow file size (bytes): " << status.cow_file_size() << std::endl;
4042 ss << " allocated sectors: " << status.sectors_allocated() << std::endl;
4043 ss << " metadata sectors: " << status.metadata_sectors() << std::endl;
4044 ss << " compression: " << status.compression_algorithm() << std::endl;
4045 ss << " compression factor: " << status.compression_factor() << std::endl;
4046 ss << " merge phase: " << DecideMergePhase(status) << std::endl;
4047 }
4048 os << ss.rdbuf();
4049 return ok;
4050 }
4051
EnsureMetadataMounted()4052 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
4053 if (!device_->IsRecovery()) {
4054 // No need to mount anything in recovery.
4055 LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
4056 return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
4057 }
4058 auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
4059 if (ret == nullptr) return nullptr;
4060
4061 // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
4062 // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
4063 // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
4064 // treat this case as if /metadata is not mounted.
4065 if (!LockShared()) {
4066 LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
4067 "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
4068 return nullptr;
4069 }
4070 return ret;
4071 }
4072
HandleImminentDataWipe(const std::function<void ()> & callback)4073 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
4074 if (!device_->IsRecovery()) {
4075 LOG(ERROR) << "Data wipes are only allowed in recovery.";
4076 return false;
4077 }
4078
4079 auto mount = EnsureMetadataMounted();
4080 if (!mount || !mount->HasDevice()) {
4081 // We allow the wipe to continue, because if we can't mount /metadata,
4082 // it is unlikely the device would have booted anyway. If there is no
4083 // metadata partition, then the device predates Virtual A/B.
4084 LOG(INFO) << "/metadata not found; allowing wipe.";
4085 return true;
4086 }
4087
4088 // This could happen if /metadata mounted but there is no filesystem
4089 // structure. Weird, but we have to assume there's no OTA pending, and
4090 // thus we let the wipe proceed.
4091 UpdateState state;
4092 {
4093 auto lock = LockExclusive();
4094 if (!lock) {
4095 LOG(ERROR) << "Unable to determine update state; allowing wipe.";
4096 return true;
4097 }
4098
4099 state = ReadUpdateState(lock.get());
4100 LOG(INFO) << "Update state before wipe: " << state << "; slot: " << GetCurrentSlot()
4101 << "; suffix: " << device_->GetSlotSuffix();
4102 }
4103
4104 bool try_merge = false;
4105 switch (state) {
4106 case UpdateState::None:
4107 case UpdateState::Initiated:
4108 LOG(INFO) << "Wipe is not impacted by update state; allowing wipe.";
4109 break;
4110 case UpdateState::Unverified:
4111 if (GetCurrentSlot() != Slot::Target) {
4112 LOG(INFO) << "Wipe is not impacted by rolled back update; allowing wipe";
4113 break;
4114 }
4115 if (!HasForwardMergeIndicator()) {
4116 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
4117 auto other_slot_number = SlotNumberForSlotSuffix(device_->GetOtherSlotSuffix());
4118
4119 // We're not allowed to forward merge, so forcefully rollback the
4120 // slot switch.
4121 LOG(INFO) << "Allowing wipe due to lack of forward merge indicator; reverting to "
4122 "old slot since update will be deleted.";
4123 device_->SetSlotAsUnbootable(slot_number);
4124 device_->SetActiveBootSlot(other_slot_number);
4125 break;
4126 }
4127
4128 // Forward merge indicator means we have to mount snapshots and try to merge.
4129 LOG(INFO) << "Forward merge indicator is present.";
4130 try_merge = true;
4131 break;
4132 case UpdateState::Merging:
4133 case UpdateState::MergeFailed:
4134 try_merge = true;
4135 break;
4136 case UpdateState::MergeNeedsReboot:
4137 case UpdateState::Cancelled:
4138 LOG(INFO) << "Unexpected update state in recovery; allowing wipe.";
4139 break;
4140 default:
4141 break;
4142 }
4143
4144 if (try_merge) {
4145 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
4146 auto super_path = device_->GetSuperDevice(slot_number);
4147 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
4148 LOG(ERROR) << "Unable to map partitions to complete merge.";
4149 return false;
4150 }
4151
4152 auto process_callback = [&]() -> bool {
4153 if (callback) {
4154 callback();
4155 }
4156 return true;
4157 };
4158
4159 state = ProcessUpdateStateOnDataWipe(process_callback);
4160 if (state == UpdateState::MergeFailed) {
4161 return false;
4162 }
4163
4164 // Nothing should be depending on partitions now, so unmap them all.
4165 if (!UnmapAllPartitionsInRecovery()) {
4166 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
4167 }
4168 }
4169
4170 if (state != UpdateState::None) {
4171 auto lock = LockExclusive();
4172 if (!lock) return false;
4173
4174 // Zap the update state so the bootloader doesn't think we're still
4175 // merging. It's okay if this fails, it's informative only at this
4176 // point.
4177 WriteUpdateState(lock.get(), UpdateState::None);
4178 }
4179 return true;
4180 }
4181
FinishMergeInRecovery()4182 bool SnapshotManager::FinishMergeInRecovery() {
4183 if (!device_->IsRecovery()) {
4184 LOG(ERROR) << "Data wipes are only allowed in recovery.";
4185 return false;
4186 }
4187
4188 auto mount = EnsureMetadataMounted();
4189 if (!mount || !mount->HasDevice()) {
4190 return false;
4191 }
4192
4193 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
4194 auto super_path = device_->GetSuperDevice(slot_number);
4195 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
4196 LOG(ERROR) << "Unable to map partitions to complete merge.";
4197 return false;
4198 }
4199
4200 UpdateState state = ProcessUpdateState();
4201 if (state != UpdateState::MergeCompleted) {
4202 LOG(ERROR) << "Merge returned unexpected status: " << state;
4203 return false;
4204 }
4205
4206 // Nothing should be depending on partitions now, so unmap them all.
4207 if (!UnmapAllPartitionsInRecovery()) {
4208 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
4209 }
4210 return true;
4211 }
4212
ProcessUpdateStateOnDataWipe(const std::function<bool ()> & callback)4213 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(const std::function<bool()>& callback) {
4214 while (true) {
4215 UpdateState state = ProcessUpdateState(callback);
4216 LOG(INFO) << "Processed updated state in recovery: " << state;
4217 switch (state) {
4218 case UpdateState::MergeFailed:
4219 LOG(ERROR) << "Unrecoverable merge failure detected.";
4220 return state;
4221 case UpdateState::Unverified: {
4222 // Unverified was already handled earlier, in HandleImminentDataWipe,
4223 // but it will fall through here if a forward merge is required.
4224 //
4225 // If InitiateMerge fails, we early return. If it succeeds, then we
4226 // are guaranteed that the next call to ProcessUpdateState will not
4227 // return Unverified.
4228 if (!InitiateMerge()) {
4229 LOG(ERROR) << "Failed to initiate merge on data wipe.";
4230 return UpdateState::MergeFailed;
4231 }
4232 continue;
4233 }
4234 case UpdateState::MergeNeedsReboot:
4235 // We shouldn't get here, because nothing is depending on
4236 // logical partitions.
4237 LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
4238 return state;
4239 default:
4240 return state;
4241 }
4242 }
4243 }
4244
HasForwardMergeIndicator()4245 bool SnapshotManager::HasForwardMergeIndicator() {
4246 return access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0;
4247 }
4248
EnsureNoOverflowSnapshot(LockedFile * lock)4249 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
4250 CHECK(lock);
4251
4252 std::vector<std::string> snapshots;
4253 if (!ListSnapshots(lock, &snapshots)) {
4254 LOG(ERROR) << "Could not list snapshots.";
4255 return false;
4256 }
4257
4258 for (const auto& snapshot : snapshots) {
4259 SnapshotStatus status;
4260 if (!ReadSnapshotStatus(lock, snapshot, &status)) {
4261 return false;
4262 }
4263 if (status.using_snapuserd()) {
4264 continue;
4265 }
4266
4267 std::vector<DeviceMapper::TargetInfo> targets;
4268 if (!dm_.GetTableStatus(snapshot, &targets)) {
4269 LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
4270 return false;
4271 }
4272 if (targets.size() != 1) {
4273 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
4274 << ", size = " << targets.size();
4275 return false;
4276 }
4277 if (targets[0].IsOverflowSnapshot()) {
4278 LOG(ERROR) << "Detected overflow in snapshot " << snapshot
4279 << ", CoW device size computation is wrong!";
4280 return false;
4281 }
4282 }
4283
4284 return true;
4285 }
4286
RecoveryCreateSnapshotDevices()4287 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
4288 if (!device_->IsRecovery()) {
4289 LOG(ERROR) << __func__ << " is only allowed in recovery.";
4290 return CreateResult::NOT_CREATED;
4291 }
4292
4293 auto mount = EnsureMetadataMounted();
4294 if (!mount || !mount->HasDevice()) {
4295 LOG(ERROR) << "Couldn't mount Metadata.";
4296 return CreateResult::NOT_CREATED;
4297 }
4298 return RecoveryCreateSnapshotDevices(mount);
4299 }
4300
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)4301 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
4302 const std::unique_ptr<AutoDevice>& metadata_device) {
4303 if (!device_->IsRecovery()) {
4304 LOG(ERROR) << __func__ << " is only allowed in recovery.";
4305 return CreateResult::NOT_CREATED;
4306 }
4307
4308 if (metadata_device == nullptr || !metadata_device->HasDevice()) {
4309 LOG(ERROR) << "Metadata not mounted.";
4310 return CreateResult::NOT_CREATED;
4311 }
4312
4313 auto state_file = GetStateFilePath();
4314 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
4315 LOG(ERROR) << "Couldn't access state file.";
4316 return CreateResult::NOT_CREATED;
4317 }
4318
4319 if (!NeedSnapshotsInFirstStageMount()) {
4320 return CreateResult::NOT_CREATED;
4321 }
4322
4323 auto slot_suffix = device_->GetOtherSlotSuffix();
4324 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
4325 auto super_path = device_->GetSuperDevice(slot_number);
4326 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
4327 LOG(ERROR) << "Unable to map partitions.";
4328 return CreateResult::ERROR;
4329 }
4330 return CreateResult::CREATED;
4331 }
4332
UpdateForwardMergeIndicator(bool wipe)4333 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
4334 auto path = GetForwardMergeIndicatorPath();
4335
4336 if (!wipe) {
4337 LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
4338 return RemoveFileIfExists(path);
4339 }
4340
4341 // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
4342
4343 LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
4344 if (!android::base::WriteStringToFile("1", path)) {
4345 PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
4346 return false;
4347 }
4348
4349 return true;
4350 }
4351
GetSnapshotMergeStatsInstance()4352 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
4353 return SnapshotMergeStats::GetInstance(*this);
4354 }
4355
4356 // This is only to be used in recovery or normal Android (not first-stage init).
4357 // We don't guarantee dm paths are available in first-stage init, because ueventd
4358 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)4359 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
4360 std::string* device_path) {
4361 // Try getting the device string if it is a device mapper device.
4362 if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4363 return dm_.GetDmDevicePathByName(device_name, device_path);
4364 }
4365
4366 // Otherwise, get path from IImageManager.
4367 return images_->GetMappedImageDevice(device_name, device_path);
4368 }
4369
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)4370 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
4371 std::string* device_string_or_mapped_path) {
4372 // Try getting the device string if it is a device mapper device.
4373 if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4374 return dm_.GetDeviceString(device_name, device_string_or_mapped_path);
4375 }
4376
4377 // Otherwise, get path from IImageManager.
4378 if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
4379 return false;
4380 }
4381
4382 LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
4383 << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
4384 << "may not be available in first stage init! ";
4385 return true;
4386 }
4387
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)4388 bool SnapshotManager::WaitForDevice(const std::string& device,
4389 std::chrono::milliseconds timeout_ms) {
4390 if (!android::base::StartsWith(device, "/")) {
4391 return true;
4392 }
4393
4394 // In first-stage init, we rely on init setting a callback which can
4395 // regenerate uevents and populate /dev for us.
4396 if (uevent_regen_callback_) {
4397 if (!uevent_regen_callback_(device)) {
4398 LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
4399 return false;
4400 }
4401 return true;
4402 }
4403
4404 // Otherwise, the only kind of device we need to wait for is a dm-user
4405 // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
4406 // the path has been created.
4407 if (!android::base::StartsWith(device, "/dev/dm-user/")) {
4408 return true;
4409 }
4410
4411 if (timeout_ms.count() == 0) {
4412 LOG(ERROR) << "No timeout was specified to wait for device: " << device;
4413 return false;
4414 }
4415 if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
4416 LOG(ERROR) << "Timed out waiting for device to appear: " << device;
4417 return false;
4418 }
4419 return true;
4420 }
4421
IsSnapuserdRequired()4422 bool SnapshotManager::IsSnapuserdRequired() {
4423 auto lock = LockExclusive();
4424 if (!lock) return false;
4425
4426 auto status = ReadSnapshotUpdateStatus(lock.get());
4427 return status.state() != UpdateState::None && status.using_snapuserd();
4428 }
4429
PrepareSnapuserdArgsForSelinux(std::vector<std::string> * snapuserd_argv)4430 bool SnapshotManager::PrepareSnapuserdArgsForSelinux(std::vector<std::string>* snapuserd_argv) {
4431 return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
4432 }
4433
DetachFirstStageSnapuserdForSelinux()4434 bool SnapshotManager::DetachFirstStageSnapuserdForSelinux() {
4435 LOG(INFO) << "Detaching first stage snapuserd";
4436
4437 auto lock = LockExclusive();
4438 if (!lock) return false;
4439
4440 std::vector<std::string> snapshots;
4441 if (!ListSnapshots(lock.get(), &snapshots)) {
4442 LOG(ERROR) << "Failed to list snapshots.";
4443 return false;
4444 }
4445
4446 size_t num_cows = 0;
4447 size_t ok_cows = 0;
4448 for (const auto& snapshot : snapshots) {
4449 std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
4450
4451 if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
4452 continue;
4453 }
4454
4455 DeviceMapper::TargetInfo target;
4456 if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
4457 continue;
4458 }
4459
4460 auto target_type = DeviceMapper::GetTargetType(target.spec);
4461 if (target_type != "user") {
4462 LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
4463 continue;
4464 }
4465
4466 num_cows++;
4467 auto misc_name = user_cow_name;
4468
4469 DmTable table;
4470 table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
4471 if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
4472 LOG(ERROR) << "Unable to swap tables for " << misc_name;
4473 continue;
4474 }
4475
4476 // Wait for ueventd to acknowledge and create the control device node.
4477 std::string control_device = "/dev/dm-user/" + misc_name;
4478 if (!WaitForDevice(control_device, 10s)) {
4479 LOG(ERROR) << "dm-user control device no found: " << misc_name;
4480 continue;
4481 }
4482
4483 ok_cows++;
4484 LOG(INFO) << "control device is ready: " << control_device;
4485 }
4486
4487 if (ok_cows != num_cows) {
4488 LOG(ERROR) << "Could not transition all snapuserd consumers.";
4489 return false;
4490 }
4491
4492 return true;
4493 }
4494
PerformSecondStageInitTransition()4495 bool SnapshotManager::PerformSecondStageInitTransition() {
4496 return PerformInitTransition(InitTransition::SECOND_STAGE);
4497 }
4498
ReadOldPartitionMetadata(LockedFile * lock)4499 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
4500 CHECK(lock);
4501
4502 if (!old_partition_metadata_) {
4503 auto path = GetOldPartitionMetadataPath();
4504 old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
4505 if (!old_partition_metadata_) {
4506 LOG(ERROR) << "Could not read old partition metadata from " << path;
4507 return nullptr;
4508 }
4509 }
4510 return old_partition_metadata_.get();
4511 }
4512
DecideMergePhase(const SnapshotStatus & status)4513 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
4514 if (status.using_snapuserd() && status.device_size() < status.old_partition_size()) {
4515 return MergePhase::FIRST_PHASE;
4516 }
4517 return MergePhase::SECOND_PHASE;
4518 }
4519
UpdateCowStats(ISnapshotMergeStats * stats)4520 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
4521 auto lock = LockExclusive();
4522 if (!lock) return;
4523
4524 std::vector<std::string> snapshots;
4525 if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
4526 LOG(ERROR) << "Could not list snapshots";
4527 return;
4528 }
4529
4530 uint64_t cow_file_size = 0;
4531 uint64_t total_cow_size = 0;
4532 uint64_t estimated_cow_size = 0;
4533 for (const auto& snapshot : snapshots) {
4534 SnapshotStatus status;
4535 if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
4536 return;
4537 }
4538
4539 cow_file_size += status.cow_file_size();
4540 total_cow_size += status.cow_file_size() + status.cow_partition_size();
4541 estimated_cow_size += status.estimated_cow_size();
4542 }
4543
4544 stats->report()->set_cow_file_size(cow_file_size);
4545 stats->report()->set_total_cow_size_bytes(total_cow_size);
4546 stats->report()->set_estimated_cow_size_bytes(estimated_cow_size);
4547 }
4548
SetMergeStatsFeatures(ISnapshotMergeStats * stats)4549 void SnapshotManager::SetMergeStatsFeatures(ISnapshotMergeStats* stats) {
4550 auto lock = LockExclusive();
4551 if (!lock) return;
4552
4553 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4554 stats->report()->set_iouring_used(update_status.io_uring_enabled());
4555 stats->report()->set_userspace_snapshots_used(update_status.userspace_snapshots());
4556 stats->report()->set_xor_compression_used(GetXorCompressionEnabledProperty());
4557 }
4558
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)4559 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
4560 const std::chrono::milliseconds& timeout_ms) {
4561 auto start = std::chrono::steady_clock::now();
4562 while (true) {
4563 if (dm_.DeleteDeviceIfExists(name)) {
4564 return true;
4565 }
4566 auto now = std::chrono::steady_clock::now();
4567 auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
4568 if (elapsed >= timeout_ms) {
4569 break;
4570 }
4571 std::this_thread::sleep_for(400ms);
4572 }
4573
4574 // Try to diagnose why this failed. First get the actual device path.
4575 std::string full_path;
4576 if (!dm_.GetDmDevicePathByName(name, &full_path)) {
4577 LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
4578 return false;
4579 }
4580
4581 // Check for child dm-devices.
4582 std::string block_name = android::base::Basename(full_path);
4583 std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
4584
4585 std::error_code ec;
4586 std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
4587 if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
4588 LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
4589 return false;
4590 }
4591
4592 // Check for mounted partitions.
4593 android::fs_mgr::Fstab fstab;
4594 android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
4595 for (const auto& entry : fstab) {
4596 if (android::base::Basename(entry.blk_device) == block_name) {
4597 LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
4598 return false;
4599 }
4600 }
4601
4602 // Check for detached mounted partitions.
4603 for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
4604 std::string fs_type = android::base::Basename(fs.path().c_str());
4605 if (!(fs_type == "ext4" || fs_type == "f2fs")) {
4606 continue;
4607 }
4608
4609 std::string path = fs.path().c_str() + "/"s + block_name;
4610 if (access(path.c_str(), F_OK) == 0) {
4611 LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
4612 << "; possibly open file descriptor or attached loop device.";
4613 return false;
4614 }
4615 }
4616
4617 LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")" << " still in use."
4618 << " Probably a file descriptor was leaked or held open, or a loop device is"
4619 << " attached.";
4620 return false;
4621 }
4622
ReadMergeFailureCode()4623 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
4624 auto lock = LockExclusive();
4625 if (!lock) return MergeFailureCode::AcquireLock;
4626
4627 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4628 if (status.state() != UpdateState::MergeFailed) {
4629 return MergeFailureCode::Ok;
4630 }
4631 return status.merge_failure_code();
4632 }
4633
ReadSourceBuildFingerprint()4634 std::string SnapshotManager::ReadSourceBuildFingerprint() {
4635 auto lock = LockExclusive();
4636 if (!lock) return {};
4637
4638 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4639 return status.source_build_fingerprint();
4640 }
4641
IsUserspaceSnapshotUpdateInProgress()4642 bool SnapshotManager::IsUserspaceSnapshotUpdateInProgress() {
4643 // We cannot grab /metadata/ota lock here as this
4644 // is in reboot path. See b/308900853
4645 //
4646 // Check if any of the partitions are mounted
4647 // off dm-user block device. If so, then we are certain
4648 // that OTA update in progress.
4649 auto current_suffix = device_->GetSlotSuffix();
4650 auto& dm = DeviceMapper::Instance();
4651 auto dm_block_devices = dm.FindDmPartitions();
4652 if (dm_block_devices.empty()) {
4653 LOG(ERROR) << "No dm-enabled block device is found.";
4654 return false;
4655 }
4656 for (auto& partition : dm_block_devices) {
4657 std::string partition_name = partition.first + current_suffix;
4658 DeviceMapper::TargetInfo snap_target;
4659 if (!GetSingleTarget(partition_name, TableQuery::Status, &snap_target)) {
4660 return false;
4661 }
4662 auto type = DeviceMapper::GetTargetType(snap_target.spec);
4663 if (type == "user") {
4664 return true;
4665 }
4666 }
4667 return false;
4668 }
4669
BootFromSnapshotsWithoutSlotSwitch()4670 bool SnapshotManager::BootFromSnapshotsWithoutSlotSwitch() {
4671 auto lock = LockExclusive();
4672 if (!lock) return false;
4673
4674 auto contents = device_->GetSlotSuffix();
4675 // This is the indicator which tells first-stage init
4676 // to boot from snapshots even though there was no slot-switch
4677 auto boot_file = GetBootSnapshotsWithoutSlotSwitchPath();
4678 if (!WriteStringToFileAtomic(contents, boot_file)) {
4679 PLOG(ERROR) << "write failed: " << boot_file;
4680 return false;
4681 }
4682
4683 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4684 update_status.set_state(UpdateState::Initiated);
4685 update_status.set_userspace_snapshots(true);
4686 update_status.set_using_snapuserd(true);
4687 if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
4688 return false;
4689 }
4690 return true;
4691 }
4692
PrepareDeviceToBootWithoutSnapshot()4693 bool SnapshotManager::PrepareDeviceToBootWithoutSnapshot() {
4694 auto lock = LockExclusive();
4695 if (!lock) return false;
4696
4697 android::base::RemoveFileIfExists(GetSnapshotBootIndicatorPath());
4698 android::base::RemoveFileIfExists(GetBootSnapshotsWithoutSlotSwitchPath());
4699
4700 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4701 update_status.set_state(UpdateState::Cancelled);
4702 if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
4703 return false;
4704 }
4705 return true;
4706 }
4707
SetReadAheadSize(const std::string & entry_block_device,off64_t size_kb)4708 void SnapshotManager::SetReadAheadSize(const std::string& entry_block_device, off64_t size_kb) {
4709 std::string block_device;
4710 if (!Realpath(entry_block_device, &block_device)) {
4711 PLOG(ERROR) << "Failed to realpath " << entry_block_device;
4712 return;
4713 }
4714
4715 static constexpr std::string_view kDevBlockPrefix("/dev/block/");
4716 if (!android::base::StartsWith(block_device, kDevBlockPrefix)) {
4717 LOG(ERROR) << block_device << " is not a block device";
4718 return;
4719 }
4720
4721 std::string block_name = block_device.substr(kDevBlockPrefix.length());
4722 std::string sys_partition =
4723 android::base::StringPrintf("/sys/class/block/%s/partition", block_name.c_str());
4724 struct stat info;
4725 if (lstat(sys_partition.c_str(), &info) == 0) {
4726 block_name += "/..";
4727 }
4728 std::string sys_ra = android::base::StringPrintf("/sys/class/block/%s/queue/read_ahead_kb",
4729 block_name.c_str());
4730 std::string size = std::to_string(size_kb);
4731 android::base::WriteStringToFile(size, sys_ra.c_str());
4732 }
4733
4734 } // namespace snapshot
4735 } // namespace android
4736