From d2ecdf180bf3dc9a7a526b73c0f0b93f724de304 Mon Sep 17 00:00:00 2001 From: Sarp Misoglu Date: Wed, 5 Mar 2025 19:14:05 +0000 Subject: Don't cancel the entire backup if agent disconnects and log if the agent pipe is broken Normally the bug fix and the extra logging should be in separate CLs. However as explained in the linked bug, there is a race condition between the method throwing the IOException returning and us getting a cancellation callback from ActivityManager. If we get the callback first, the session fails. The extra IPC from the logging in handling the exception means the callback gets a few additional milliseconds to win the race. So with the logging, the race condition is much more likely to happen and the logging should not be checked in without the bug fix. And if one gets rolled back, the other should as well. This is why I made them a single CL. Flag: EXEMPT bugfix Bug: 400669414 Test: manual // PFTBT is notoriously impossible to write unit tests for. Change-Id: Icbcb72a876f927a94675c9837b0206f1a631dff5 --- cmds/bmgr/src/com/android/commands/bmgr/Bmgr.java | 2 ++ core/java/android/app/backup/BackupManagerMonitor.java | 3 +++ .../com/android/server/backup/fullbackup/FullBackupEngine.java | 7 +++++++ .../server/backup/fullbackup/PerformFullTransportBackupTask.java | 8 ++++++++ .../server/backup/utils/BackupManagerMonitorDumpsysUtils.java | 2 ++ 5 files changed, 22 insertions(+) diff --git a/cmds/bmgr/src/com/android/commands/bmgr/Bmgr.java b/cmds/bmgr/src/com/android/commands/bmgr/Bmgr.java index 696bc82a9ffc..ae150aece432 100644 --- a/cmds/bmgr/src/com/android/commands/bmgr/Bmgr.java +++ b/cmds/bmgr/src/com/android/commands/bmgr/Bmgr.java @@ -1309,6 +1309,8 @@ public class Bmgr { return "AGENT_FAILURE_DURING_RESTORE"; case BackupManagerMonitor.LOG_EVENT_ID_FAILED_TO_READ_DATA_FROM_TRANSPORT: return "FAILED_TO_READ_DATA_FROM_TRANSPORT"; + case BackupManagerMonitor.LOG_EVENT_ID_FULL_BACKUP_AGENT_PIPE_BROKEN: + return "LOG_EVENT_ID_FULL_BACKUP_AGENT_PIPE_BROKEN"; default: return "UNKNOWN_ID"; } diff --git a/core/java/android/app/backup/BackupManagerMonitor.java b/core/java/android/app/backup/BackupManagerMonitor.java index e741bc2bf608..19c24cd8a14a 100644 --- a/core/java/android/app/backup/BackupManagerMonitor.java +++ b/core/java/android/app/backup/BackupManagerMonitor.java @@ -297,6 +297,9 @@ public class BackupManagerMonitor { @hide */ public static final int LOG_EVENT_ID_FAILED_TO_READ_DATA_FROM_TRANSPORT = 81; + /** The pipe between the BackupAgent and the framework was broken during full backup. @hide */ + public static final int LOG_EVENT_ID_FULL_BACKUP_AGENT_PIPE_BROKEN = 82; + /** * This method will be called each time something important happens on BackupManager. * diff --git a/services/backup/java/com/android/server/backup/fullbackup/FullBackupEngine.java b/services/backup/java/com/android/server/backup/fullbackup/FullBackupEngine.java index ebb1194c7c4a..b173f76e5f6f 100644 --- a/services/backup/java/com/android/server/backup/fullbackup/FullBackupEngine.java +++ b/services/backup/java/com/android/server/backup/fullbackup/FullBackupEngine.java @@ -25,6 +25,7 @@ import static com.android.server.backup.UserBackupManagerService.SHARED_BACKUP_A import android.annotation.UserIdInt; import android.app.ApplicationThreadConstants; import android.app.IBackupAgent; +import android.app.backup.BackupManagerMonitor; import android.app.backup.BackupTransport; import android.app.backup.FullBackupDataOutput; import android.content.pm.ApplicationInfo; @@ -268,6 +269,12 @@ public class FullBackupEngine { mBackupManagerMonitorEventSender.monitorAgentLoggingResults(mPkg, mAgent); } catch (IOException e) { Slog.e(TAG, "Error backing up " + mPkg.packageName + ": " + e.getMessage()); + // This is likely due to the app process dying. + mBackupManagerMonitorEventSender.monitorEvent( + BackupManagerMonitor.LOG_EVENT_ID_FULL_BACKUP_AGENT_PIPE_BROKEN, + mPkg, + BackupManagerMonitor.LOG_EVENT_CATEGORY_AGENT, + /* extras= */ null); result = BackupTransport.AGENT_ERROR; } finally { try { diff --git a/services/backup/java/com/android/server/backup/fullbackup/PerformFullTransportBackupTask.java b/services/backup/java/com/android/server/backup/fullbackup/PerformFullTransportBackupTask.java index f677c9dbf4d0..48d21c3a222f 100644 --- a/services/backup/java/com/android/server/backup/fullbackup/PerformFullTransportBackupTask.java +++ b/services/backup/java/com/android/server/backup/fullbackup/PerformFullTransportBackupTask.java @@ -294,6 +294,14 @@ public class PerformFullTransportBackupTask extends FullBackupTask implements Ba // SinglePackageBackupPreflight. if (cancellationReason == CancellationReason.TIMEOUT) { Slog.wtf(TAG, "This task cannot time out"); + return; + } + + // We don't cancel the entire operation if a single agent is disconnected unexpectedly. + // SinglePackageBackupRunner and SinglePackageBackupPreflight will receive the same + // callback and fail gracefully. The operation should then continue to the next package. + if (cancellationReason == CancellationReason.AGENT_DISCONNECTED) { + return; } if (mCancelled) { diff --git a/services/backup/java/com/android/server/backup/utils/BackupManagerMonitorDumpsysUtils.java b/services/backup/java/com/android/server/backup/utils/BackupManagerMonitorDumpsysUtils.java index fad59d23a6dc..855c72acd7ca 100644 --- a/services/backup/java/com/android/server/backup/utils/BackupManagerMonitorDumpsysUtils.java +++ b/services/backup/java/com/android/server/backup/utils/BackupManagerMonitorDumpsysUtils.java @@ -389,6 +389,8 @@ public class BackupManagerMonitorDumpsysUtils { "Agent failure during restore"; case BackupManagerMonitor.LOG_EVENT_ID_FAILED_TO_READ_DATA_FROM_TRANSPORT -> "Failed to read data from Transport"; + case BackupManagerMonitor.LOG_EVENT_ID_FULL_BACKUP_AGENT_PIPE_BROKEN -> + "LOG_EVENT_ID_FULL_BACKUP_AGENT_PIPE_BROKEN"; default -> "Unknown log event ID: " + code; }; return id; -- cgit v1.2.3-59-g8ed1b