From f305f4def072363087595a610043852127c399e2 Mon Sep 17 00:00:00 2001 From: Gavin Corkery Date: Wed, 27 Nov 2019 15:46:29 +0000 Subject: Add package failure flags to Package Watchdog This is a prerequisite for adding additional logging of the Watchdog-triggered rollback reason. Add flags which indicate the failure observed (native, crash, ANR, explicit health check). These will be used in the future by RollbackPackageHealthObserver to map the failure type to the (new) set of available logging metrics. Test: atest PackageWatchdogTest Bug: 138782888 Change-Id: I7e7c5e5399011e2761dada2b989a95c2013307e9 --- .../java/com/android/server/PackageWatchdog.java | 31 ++++- .../core/java/com/android/server/am/AppErrors.java | 6 +- .../rollback/RollbackPackageHealthObserver.java | 5 +- .../com/android/server/PackageWatchdogTest.java | 132 ++++++++++++++++----- 4 files changed, 132 insertions(+), 42 deletions(-) diff --git a/services/core/java/com/android/server/PackageWatchdog.java b/services/core/java/com/android/server/PackageWatchdog.java index 12debbff3e90..521b39305d9d 100644 --- a/services/core/java/com/android/server/PackageWatchdog.java +++ b/services/core/java/com/android/server/PackageWatchdog.java @@ -57,6 +57,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; @@ -81,6 +82,22 @@ public class PackageWatchdog { static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED = "watchdog_explicit_health_check_enabled"; + public static final int FAILURE_REASON_UNKNOWN = 0; + public static final int FAILURE_REASON_NATIVE_CRASH = 1; + public static final int FAILURE_REASON_EXPLICIT_HEALTH_CHECK = 2; + public static final int FAILURE_REASON_APP_CRASH = 3; + public static final int FAILURE_REASON_APP_NOT_RESPONDING = 4; + + @IntDef(prefix = { "FAILURE_REASON_" }, value = { + FAILURE_REASON_UNKNOWN, + FAILURE_REASON_NATIVE_CRASH, + FAILURE_REASON_EXPLICIT_HEALTH_CHECK, + FAILURE_REASON_APP_CRASH, + FAILURE_REASON_APP_NOT_RESPONDING + }) + @Retention(RetentionPolicy.SOURCE) + public @interface FailureReasons {} + // Duration to count package failures before it resets to 0 @VisibleForTesting static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS = @@ -305,14 +322,15 @@ public class PackageWatchdog { } /** - * Called when a process fails either due to a crash or ANR. + * Called when a process fails due to a crash, ANR or explicit health check. * *

For each package contained in the process, one registered observer with the least user * impact will be notified for mitigation. * *

This method could be called frequently if there is a severe problem on the device. */ - public void onPackageFailure(List packages) { + public void onPackageFailure(List packages, + @FailureReasons int failureReason) { mLongTaskHandler.post(() -> { synchronized (mLock) { if (mAllObservers.isEmpty()) { @@ -343,7 +361,7 @@ public class PackageWatchdog { // Execute action with least user impact if (currentObserverToNotify != null) { - currentObserverToNotify.execute(versionedPackage); + currentObserverToNotify.execute(versionedPackage, failureReason); } } } @@ -414,7 +432,7 @@ public class PackageWatchdog { * * @return {@code true} if action was executed successfully, {@code false} otherwise */ - boolean execute(VersionedPackage versionedPackage); + boolean execute(VersionedPackage versionedPackage, @FailureReasons int failureReason); // TODO(b/120598832): Ensure uniqueness? /** @@ -659,7 +677,8 @@ public class PackageWatchdog { while (it.hasNext()) { VersionedPackage versionedPkg = it.next().mPackage; Slog.i(TAG, "Explicit health check failed for package " + versionedPkg); - registeredObserver.execute(versionedPkg); + registeredObserver.execute(versionedPkg, + PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK); } } } @@ -770,7 +789,7 @@ public class PackageWatchdog { final List pkgList = Collections.singletonList(pkg); final long failureCount = getTriggerFailureCount(); for (int i = 0; i < failureCount; i++) { - onPackageFailure(pkgList); + onPackageFailure(pkgList, FAILURE_REASON_EXPLICIT_HEALTH_CHECK); } }); } diff --git a/services/core/java/com/android/server/am/AppErrors.java b/services/core/java/com/android/server/am/AppErrors.java index 51e1718d4c98..83a7341923fa 100644 --- a/services/core/java/com/android/server/am/AppErrors.java +++ b/services/core/java/com/android/server/am/AppErrors.java @@ -446,7 +446,8 @@ class AppErrors { RescueParty.noteAppCrash(mContext, r.uid); } - mPackageWatchdog.onPackageFailure(r.getPackageListWithVersionCode()); + mPackageWatchdog.onPackageFailure(r.getPackageListWithVersionCode(), + PackageWatchdog.FAILURE_REASON_APP_CRASH); } final int relaunchReason = r != null @@ -900,7 +901,8 @@ class AppErrors { } // Notify PackageWatchdog without the lock held if (packageList != null) { - mPackageWatchdog.onPackageFailure(packageList); + mPackageWatchdog.onPackageFailure(packageList, + PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING); } } diff --git a/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java b/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java index 83891f60d4f7..bd29d6a5450a 100644 --- a/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java +++ b/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java @@ -41,6 +41,7 @@ import android.util.StatsLog; import com.android.internal.R; import com.android.internal.annotations.GuardedBy; import com.android.server.PackageWatchdog; +import com.android.server.PackageWatchdog.FailureReasons; import com.android.server.PackageWatchdog.PackageHealthObserver; import com.android.server.PackageWatchdog.PackageHealthObserverImpact; @@ -106,7 +107,7 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve } @Override - public boolean execute(VersionedPackage failedPackage) { + public boolean execute(VersionedPackage failedPackage, @FailureReasons int rollbackReason) { RollbackManager rollbackManager = mContext.getSystemService(RollbackManager.class); VersionedPackage moduleMetadataPackage = getModuleMetadataPackage(); RollbackInfo rollback = getAvailableRollback(rollbackManager, failedPackage); @@ -371,7 +372,7 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve mNumberOfNativeCrashPollsRemaining--; // Check if native watchdog reported a crash if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) { - execute(getModuleMetadataPackage()); + execute(getModuleMetadataPackage(), PackageWatchdog.FAILURE_REASON_NATIVE_CRASH); // we stop polling after an attempt to execute rollback, regardless of whether the // attempt succeeds or not } else { diff --git a/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java b/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java index e30878157a26..ef8facec9752 100644 --- a/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java +++ b/tests/PackageWatchdog/src/com/android/server/PackageWatchdogTest.java @@ -118,7 +118,8 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer, Arrays.asList(APP_A), SHORT_DURATION); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // The failed packages should be the same as the registered ones to ensure registration is // done successfully @@ -135,7 +136,8 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer2, Arrays.asList(APP_A, APP_B), SHORT_DURATION); raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE), - new VersionedPackage(APP_B, VERSION_CODE))); + new VersionedPackage(APP_B, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // The failed packages should be the same as the registered ones to ensure registration is // done successfully @@ -151,7 +153,8 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer, Arrays.asList(APP_A), SHORT_DURATION); watchdog.unregisterHealthObserver(observer); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // We should have no failed packages to ensure unregistration is done successfully assertThat(observer.mHealthCheckFailedPackages).isEmpty(); @@ -167,7 +170,8 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer2, Arrays.asList(APP_A), SHORT_DURATION); watchdog.unregisterHealthObserver(observer2); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // observer1 should receive failed packages as intended. assertThat(observer1.mHealthCheckFailedPackages).containsExactly(APP_A); @@ -183,7 +187,8 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer, Arrays.asList(APP_A), SHORT_DURATION); moveTimeForwardAndDispatch(SHORT_DURATION); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // We should have no failed packages for the fatal failure is raised after expiration assertThat(observer.mHealthCheckFailedPackages).isEmpty(); @@ -199,7 +204,8 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer2, Arrays.asList(APP_A), LONG_DURATION); moveTimeForwardAndDispatch(SHORT_DURATION); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // We should have no failed packages for the fatal failure is raised after expiration assertThat(observer1.mHealthCheckFailedPackages).isEmpty(); @@ -226,7 +232,8 @@ public class PackageWatchdogTest { moveTimeForwardAndDispatch((SHORT_DURATION / 2) + 1); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify that we receive failed packages as expected for APP_A not expired assertThat(observer.mHealthCheckFailedPackages).containsExactly(APP_A); @@ -252,7 +259,8 @@ public class PackageWatchdogTest { watchdog2.registerHealthObserver(observer2); raiseFatalFailureAndDispatch(watchdog2, Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE), - new VersionedPackage(APP_B, VERSION_CODE))); + new VersionedPackage(APP_B, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // We should receive failed packages as expected to ensure observers are persisted and // resumed correctly @@ -274,7 +282,8 @@ public class PackageWatchdogTest { // Then fail APP_A below the threshold for (int i = 0; i < watchdog.getTriggerFailureCount() - 1; i++) { - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); } // Run handler so package failures are dispatched to observers @@ -301,7 +310,8 @@ public class PackageWatchdogTest { // Then fail APP_C (not observed) above the threshold raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_C, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_C, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify that observers are not notified assertThat(observer1.mHealthCheckFailedPackages).isEmpty(); @@ -331,7 +341,8 @@ public class PackageWatchdogTest { // Then fail APP_A (different version) above the threshold raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, differentVersionCode))); + Arrays.asList(new VersionedPackage(APP_A, differentVersionCode)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify that observers are not notified assertThat(observer.mHealthCheckFailedPackages).isEmpty(); @@ -368,7 +379,8 @@ public class PackageWatchdogTest { Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE), new VersionedPackage(APP_B, VERSION_CODE), new VersionedPackage(APP_C, VERSION_CODE), - new VersionedPackage(APP_D, VERSION_CODE))); + new VersionedPackage(APP_D, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify least impact observers are notifed of package failures List observerNonePackages = observerNone.mMitigatedPackages; @@ -411,7 +423,8 @@ public class PackageWatchdogTest { // Then fail APP_A above the threshold raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify only observerFirst is notifed assertThat(observerFirst.mMitigatedPackages).containsExactly(APP_A); @@ -424,7 +437,8 @@ public class PackageWatchdogTest { // Then fail APP_A again above the threshold raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify only observerSecond is notifed cos it has least impact assertThat(observerSecond.mMitigatedPackages).containsExactly(APP_A); @@ -437,7 +451,8 @@ public class PackageWatchdogTest { // Then fail APP_A again above the threshold raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify only observerFirst is notifed cos it has the only action assertThat(observerFirst.mMitigatedPackages).containsExactly(APP_A); @@ -450,7 +465,8 @@ public class PackageWatchdogTest { // Then fail APP_A again above the threshold raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify no observer is notified cos no actions left assertThat(observerFirst.mMitigatedPackages).isEmpty(); @@ -474,7 +490,8 @@ public class PackageWatchdogTest { // Then fail APP_A above the threshold raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // Verify only one observer is notifed assertThat(observer1.mMitigatedPackages).containsExactly(APP_A); @@ -746,13 +763,15 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer, Arrays.asList(APP_A), SHORT_DURATION); // Fail APP_A below the threshold which should not trigger package failures for (int i = 0; i < PackageWatchdog.DEFAULT_TRIGGER_FAILURE_COUNT - 1; i++) { - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); } mTestLooper.dispatchAll(); assertThat(observer.mHealthCheckFailedPackages).isEmpty(); // One more to trigger the package failure - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); assertThat(observer.mHealthCheckFailedPackages).containsExactly(APP_A); } @@ -773,20 +792,24 @@ public class PackageWatchdogTest { TestObserver observer = new TestObserver(OBSERVER_NAME_1); watchdog.startObservingHealth(observer, Arrays.asList(APP_A, APP_B), Long.MAX_VALUE); - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); moveTimeForwardAndDispatch(PackageWatchdog.DEFAULT_TRIGGER_FAILURE_DURATION_MS + 1); - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); // We shouldn't receive APP_A since the interval of 2 failures is greater than // DEFAULT_TRIGGER_FAILURE_DURATION_MS. assertThat(observer.mHealthCheckFailedPackages).isEmpty(); - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_B, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_B, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); moveTimeForwardAndDispatch(PackageWatchdog.DEFAULT_TRIGGER_FAILURE_DURATION_MS - 1); - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_B, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_B, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); // We should receive APP_B since the interval of 2 failures is less than @@ -809,7 +832,8 @@ public class PackageWatchdogTest { // small timeouts. moveTimeForwardAndDispatch(PackageWatchdog.DEFAULT_OBSERVING_DURATION_MS - 100); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // We should receive APP_A since the observer hasn't expired assertThat(observer.mHealthCheckFailedPackages).containsExactly(APP_A); @@ -827,7 +851,8 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer, Arrays.asList(APP_A), -1); moveTimeForwardAndDispatch(PackageWatchdog.DEFAULT_OBSERVING_DURATION_MS + 1); raiseFatalFailureAndDispatch(watchdog, - Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); // We should receive nothing since the observer has expired assertThat(observer.mHealthCheckFailedPackages).isEmpty(); @@ -850,22 +875,59 @@ public class PackageWatchdogTest { watchdog.startObservingHealth(observer, Arrays.asList(APP_A), Long.MAX_VALUE); // Raise 2 failures at t=0 and t=900 respectively - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); moveTimeForwardAndDispatch(900); - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); // Raise 2 failures at t=1100 moveTimeForwardAndDispatch(200); - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); - watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE))); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); + watchdog.onPackageFailure(Arrays.asList(new VersionedPackage(APP_A, VERSION_CODE)), + PackageWatchdog.FAILURE_REASON_UNKNOWN); mTestLooper.dispatchAll(); // We should receive APP_A since there are 3 failures within 1000ms window assertThat(observer.mHealthCheckFailedPackages).containsExactly(APP_A); } + /** Test that observers execute correctly for different failure reasons */ + @Test + public void testFailureReasons() { + PackageWatchdog watchdog = createWatchdog(); + TestObserver observer1 = new TestObserver(OBSERVER_NAME_1); + TestObserver observer2 = new TestObserver(OBSERVER_NAME_2); + TestObserver observer3 = new TestObserver(OBSERVER_NAME_3); + TestObserver observer4 = new TestObserver(OBSERVER_NAME_4); + + watchdog.startObservingHealth(observer1, Arrays.asList(APP_A), SHORT_DURATION); + watchdog.startObservingHealth(observer2, Arrays.asList(APP_B), SHORT_DURATION); + watchdog.startObservingHealth(observer3, Arrays.asList(APP_C), SHORT_DURATION); + watchdog.startObservingHealth(observer4, Arrays.asList(APP_D), SHORT_DURATION); + + raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_A, + VERSION_CODE)), PackageWatchdog.FAILURE_REASON_NATIVE_CRASH); + raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_B, + VERSION_CODE)), PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK); + raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_C, + VERSION_CODE)), PackageWatchdog.FAILURE_REASON_APP_CRASH); + raiseFatalFailureAndDispatch(watchdog, Arrays.asList(new VersionedPackage(APP_D, + VERSION_CODE)), PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING); + + assertThat(observer1.getLastFailureReason()).isEqualTo( + PackageWatchdog.FAILURE_REASON_NATIVE_CRASH); + assertThat(observer2.getLastFailureReason()).isEqualTo( + PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK); + assertThat(observer3.getLastFailureReason()).isEqualTo( + PackageWatchdog.FAILURE_REASON_APP_CRASH); + assertThat(observer4.getLastFailureReason()).isEqualTo( + PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING); + } + private void adoptShellPermissions(String... permissions) { InstrumentationRegistry .getInstrumentation() @@ -900,9 +962,9 @@ public class PackageWatchdogTest { /** Trigger package failures above the threshold. */ private void raiseFatalFailureAndDispatch(PackageWatchdog watchdog, - List packages) { + List packages, int failureReason) { for (int i = 0; i < watchdog.getTriggerFailureCount(); i++) { - watchdog.onPackageFailure(packages); + watchdog.onPackageFailure(packages, failureReason); } mTestLooper.dispatchAll(); } @@ -936,6 +998,7 @@ public class PackageWatchdogTest { private static class TestObserver implements PackageHealthObserver { private final String mName; private int mImpact; + private int mLastFailureReason; final List mHealthCheckFailedPackages = new ArrayList<>(); final List mMitigatedPackages = new ArrayList<>(); @@ -954,14 +1017,19 @@ public class PackageWatchdogTest { return mImpact; } - public boolean execute(VersionedPackage versionedPackage) { + public boolean execute(VersionedPackage versionedPackage, int failureReason) { mMitigatedPackages.add(versionedPackage.getPackageName()); + mLastFailureReason = failureReason; return true; } public String getName() { return mName; } + + public int getLastFailureReason() { + return mLastFailureReason; + } } private static class TestController extends ExplicitHealthCheckController { -- cgit v1.2.3-59-g8ed1b From dd1dabaef7dfae54b20f225ee407f182e86411ac Mon Sep 17 00:00:00 2001 From: Gavin Corkery Date: Wed, 27 Nov 2019 19:11:13 +0000 Subject: Log watchdog-initiated rollback reason To help with monitoring Mainline releases, log the reason for a watchdog-initiated rollback. This may be due to native crashes, app crashes, ANRs or explicit health check failures. Add a mapping from PackageWatchdog failure reason to the new metrics. Bug: 138782888 Test: atest PackageWatchdogTest Test: atest StatsdHostTestCases Change-Id: Ia3e73d955508297004591eac762555665c557b8a --- apex/statsd/aidl/android/os/IStatsManager.aidl | 2 +- cmds/statsd/src/StatsService.cpp | 8 +++- cmds/statsd/src/StatsService.h | 4 +- cmds/statsd/src/atoms.proto | 13 ++++++ core/java/android/util/StatsLog.java | 6 ++- .../rollback/RollbackPackageHealthObserver.java | 52 ++++++++++++++++++---- 6 files changed, 70 insertions(+), 15 deletions(-) diff --git a/apex/statsd/aidl/android/os/IStatsManager.aidl b/apex/statsd/aidl/android/os/IStatsManager.aidl index 5ebb9f2e4e90..cc62f07a3750 100644 --- a/apex/statsd/aidl/android/os/IStatsManager.aidl +++ b/apex/statsd/aidl/android/os/IStatsManager.aidl @@ -240,7 +240,7 @@ interface IStatsManager { * Logs an event for watchdog rollbacks. */ oneway void sendWatchdogRollbackOccurredAtom(in int rollbackType, in String packageName, - in long packageVersionCode); + in long packageVersionCode, in int rollbackReason, in String failingPackageName); /** * Returns the most recently registered experiment IDs. diff --git a/cmds/statsd/src/StatsService.cpp b/cmds/statsd/src/StatsService.cpp index d10a661c6d83..3c5ad4231133 100644 --- a/cmds/statsd/src/StatsService.cpp +++ b/cmds/statsd/src/StatsService.cpp @@ -1417,7 +1417,10 @@ Status StatsService::sendBinaryPushStateChangedAtom(const android::String16& tra Status StatsService::sendWatchdogRollbackOccurredAtom(const int32_t rollbackTypeIn, const android::String16& packageNameIn, - const int64_t packageVersionCodeIn) { + const int64_t packageVersionCodeIn, + const int32_t rollbackReasonIn, + const android::String16& + failingPackageNameIn) { // Note: We skip the usage stats op check here since we do not have a package name. // This is ok since we are overloading the usage_stats permission. // This method only sends data, it does not receive it. @@ -1439,7 +1442,8 @@ Status StatsService::sendWatchdogRollbackOccurredAtom(const int32_t rollbackType } android::util::stats_write(android::util::WATCHDOG_ROLLBACK_OCCURRED, - rollbackTypeIn, String8(packageNameIn).string(), packageVersionCodeIn); + rollbackTypeIn, String8(packageNameIn).string(), packageVersionCodeIn, + rollbackReasonIn, String8(failingPackageNameIn).string()); // Fast return to save disk read. if (rollbackTypeIn != android::util::WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_SUCCESS diff --git a/cmds/statsd/src/StatsService.h b/cmds/statsd/src/StatsService.h index 8c98e7b96936..50b1014f4e8a 100644 --- a/cmds/statsd/src/StatsService.h +++ b/cmds/statsd/src/StatsService.h @@ -214,7 +214,9 @@ public: virtual Status sendWatchdogRollbackOccurredAtom( const int32_t rollbackTypeIn, const android::String16& packageNameIn, - const int64_t packageVersionCodeIn) override; + const int64_t packageVersionCodeIn, + const int32_t rollbackReasonIn, + const android::String16& failingPackageNameIn) override; /** * Binder call to get registered experiment IDs. diff --git a/cmds/statsd/src/atoms.proto b/cmds/statsd/src/atoms.proto index 2efb78943812..2d7bf593fda2 100644 --- a/cmds/statsd/src/atoms.proto +++ b/cmds/statsd/src/atoms.proto @@ -1742,6 +1742,19 @@ message WatchdogRollbackOccurred { optional string package_name = 2; optional int32 package_version_code = 3; + + enum RollbackReasonType { + REASON_UNKNOWN = 0; + REASON_NATIVE_CRASH = 1; + REASON_EXPLICIT_HEALTH_CHECK = 2; + REASON_APP_CRASH = 3; + REASON_APP_NOT_RESPONDING = 4; + } + optional RollbackReasonType rollback_reason = 4; + + // Set by RollbackPackageHealthObserver to be the package that is failing when a rollback + // is initiated. Empty if the package is unknown. + optional string failing_package_name = 5; } /** diff --git a/core/java/android/util/StatsLog.java b/core/java/android/util/StatsLog.java index 64e15cfb7948..8cb5b05df685 100644 --- a/core/java/android/util/StatsLog.java +++ b/core/java/android/util/StatsLog.java @@ -179,6 +179,8 @@ public final class StatsLog extends StatsLogInternal { * @param rollbackType state of the rollback. * @param packageName package name being rolled back. * @param packageVersionCode version of the package being rolled back. + * @param rollbackReason reason the package is being rolled back. + * @param failingPackageName the package name causing the failure. * * @return True if the log request was sent to statsd. * @@ -186,7 +188,7 @@ public final class StatsLog extends StatsLogInternal { */ @RequiresPermission(allOf = {DUMP, PACKAGE_USAGE_STATS}) public static boolean logWatchdogRollbackOccurred(int rollbackType, String packageName, - long packageVersionCode) { + long packageVersionCode, int rollbackReason, String failingPackageName) { synchronized (sLogLock) { try { IStatsManager service = getIStatsManagerLocked(); @@ -198,7 +200,7 @@ public final class StatsLog extends StatsLogInternal { } service.sendWatchdogRollbackOccurredAtom(rollbackType, packageName, - packageVersionCode); + packageVersionCode, rollbackReason, failingPackageName); return true; } catch (RemoteException e) { sService = null; diff --git a/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java b/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java index bd29d6a5450a..a62616623cb5 100644 --- a/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java +++ b/services/core/java/com/android/server/rollback/RollbackPackageHealthObserver.java @@ -16,6 +16,13 @@ package com.android.server.rollback; +import static android.util.StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_APP_CRASH; +import static android.util.StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_APP_NOT_RESPONDING; +import static android.util.StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_EXPLICIT_HEALTH_CHECK; +import static android.util.StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_NATIVE_CRASH; +import static android.util.StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_UNKNOWN; + +import android.annotation.NonNull; import android.annotation.Nullable; import android.content.BroadcastReceiver; import android.content.Context; @@ -111,6 +118,7 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve RollbackManager rollbackManager = mContext.getSystemService(RollbackManager.class); VersionedPackage moduleMetadataPackage = getModuleMetadataPackage(); RollbackInfo rollback = getAvailableRollback(rollbackManager, failedPackage); + int reasonToLog = mapFailureReasonToMetric(rollbackReason); if (rollback == null) { Slog.w(TAG, "Expected rollback but no valid rollback found for package: [ " @@ -120,7 +128,8 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve } logEvent(moduleMetadataPackage, - StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_INITIATE); + StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_INITIATE, + reasonToLog, failedPackage.getPackageName()); LocalIntentReceiver rollbackReceiver = new LocalIntentReceiver((Intent result) -> { int status = result.getIntExtra(RollbackManager.EXTRA_STATUS, RollbackManager.STATUS_FAILURE); @@ -137,11 +146,13 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve moduleMetadataPackage); } else { logEvent(moduleMetadataPackage, - StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_SUCCESS); + StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_SUCCESS, + reasonToLog, failedPackage.getPackageName()); } } else { logEvent(moduleMetadataPackage, - StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_FAILURE); + StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_FAILURE, + reasonToLog, failedPackage.getPackageName()); } }); @@ -220,12 +231,14 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve } if (sessionInfo.isStagedSessionApplied()) { logEvent(oldModuleMetadataPackage, - StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_SUCCESS); + StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_SUCCESS, + WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_UNKNOWN, ""); } else if (sessionInfo.isStagedSessionReady()) { // TODO: What do for staged session ready but not applied } else { logEvent(oldModuleMetadataPackage, - StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_FAILURE); + StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_FAILURE, + WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_UNKNOWN, ""); } } @@ -304,12 +317,16 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve saveLastStagedRollbackId(rollbackId); logEvent(moduleMetadataPackage, StatsLog - .WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_BOOT_TRIGGERED); + .WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_BOOT_TRIGGERED, + WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_UNKNOWN, + ""); mContext.getSystemService(PowerManager.class).reboot("Rollback staged install"); } else if (sessionInfo.isStagedSessionFailed() && markStagedSessionHandled(rollbackId)) { logEvent(moduleMetadataPackage, - StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_FAILURE); + StatsLog.WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_TYPE__ROLLBACK_FAILURE, + WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_UNKNOWN, + ""); mContext.unregisterReceiver(listener); } } @@ -356,11 +373,12 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve return rollbackId; } - private static void logEvent(@Nullable VersionedPackage moduleMetadataPackage, int type) { + private static void logEvent(@Nullable VersionedPackage moduleMetadataPackage, int type, + int rollbackReason, @NonNull String failingPackageName) { Slog.i(TAG, "Watchdog event occurred of type: " + type); if (moduleMetadataPackage != null) { StatsLog.logWatchdogRollbackOccurred(type, moduleMetadataPackage.getPackageName(), - moduleMetadataPackage.getVersionCode()); + moduleMetadataPackage.getVersionCode(), rollbackReason, failingPackageName); } } @@ -393,4 +411,20 @@ public final class RollbackPackageHealthObserver implements PackageHealthObserve + "and mitigate native crashes"); mHandler.post(()->checkAndMitigateNativeCrashes()); } + + private int mapFailureReasonToMetric(@FailureReasons int failureReason) { + switch (failureReason) { + case PackageWatchdog.FAILURE_REASON_NATIVE_CRASH: + return WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_NATIVE_CRASH; + case PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK: + return WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_EXPLICIT_HEALTH_CHECK; + case PackageWatchdog.FAILURE_REASON_APP_CRASH: + return WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_APP_CRASH; + case PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING: + return WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_APP_NOT_RESPONDING; + default: + return WATCHDOG_ROLLBACK_OCCURRED__ROLLBACK_REASON__REASON_UNKNOWN; + } + } + } -- cgit v1.2.3-59-g8ed1b