diff options
4 files changed, 601 insertions, 6 deletions
diff --git a/services/core/java/com/android/server/PackageWatchdog.java b/services/core/java/com/android/server/PackageWatchdog.java new file mode 100644 index 000000000000..06dc9188d1ee --- /dev/null +++ b/services/core/java/com/android/server/PackageWatchdog.java @@ -0,0 +1,572 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.server; + +import android.content.Context; +import android.os.Environment; +import android.os.Handler; +import android.os.HandlerThread; +import android.os.Looper; +import android.os.Message; +import android.os.Process; +import android.os.SystemClock; +import android.text.TextUtils; +import android.util.ArrayMap; +import android.util.AtomicFile; +import android.util.Log; +import android.util.Slog; +import android.util.Xml; + +import com.android.internal.annotations.GuardedBy; +import com.android.internal.util.FastXmlSerializer; +import com.android.internal.util.XmlUtils; + +import libcore.io.IoUtils; + +import org.xmlpull.v1.XmlPullParser; +import org.xmlpull.v1.XmlPullParserException; +import org.xmlpull.v1.XmlSerializer; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * Monitors the health of packages on the system and notifies interested observers when packages + * fail. All registered observers will be notified until an observer takes a mitigation action. + */ +public class PackageWatchdog { + private static final String TAG = "PackageWatchdog"; + // Duration to count package failures before it resets to 0 + private static final int TRIGGER_DURATION_MS = 60000; + // Number of package failures within the duration above before we notify observers + private static final int TRIGGER_FAILURE_COUNT = 5; + private static final int DB_VERSION = 1; + private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog"; + private static final String TAG_PACKAGE = "package"; + private static final String TAG_OBSERVER = "observer"; + private static final String ATTR_VERSION = "version"; + private static final String ATTR_NAME = "name"; + private static final String ATTR_DURATION = "duration"; + private static final int MESSAGE_SAVE_FILE = 1; + + private static PackageWatchdog sPackageWatchdog; + + private final Object mLock = new Object(); + // System server context + private final Context mContext; + // Handler to run package cleanup runnables + private final Handler mTimerHandler; + private final HandlerThread mIoThread = new HandlerThread("package_watchdog_io", + Process.THREAD_PRIORITY_BACKGROUND); + private final Handler mIoHandler; + // Maps observer names to package observers that have been registered since the last boot + @GuardedBy("mLock") + final Map<String, PackageHealthObserver> mRegisteredObservers = new ArrayMap<>(); + // Maps observer names to internal observers (registered or not) loaded from file + @GuardedBy("mLock") + final Map<String, ObserverInternal> mAllObservers = new ArrayMap<>(); + // /data/system/ directory + private final File mSystemDir = new File(Environment.getDataDirectory(), "system"); + // File containing the XML data of monitored packages + private final AtomicFile mPolicyFile = + new AtomicFile(new File(mSystemDir, "package-watchdog.xml")); + // Runnable to prune monitored packages that have expired + private final Runnable mPackageCleanup; + // Last SystemClock#uptimeMillis a package clean up was executed. + // 0 if mPackageCleanup not running. + private long mUptimeAtLastRescheduleMs; + // Duration a package cleanup was last scheduled for. + // 0 if mPackageCleanup not running. + private long mDurationAtLastReschedule; + + private PackageWatchdog(Context context) { + mContext = context; + mTimerHandler = new Handler(Looper.myLooper()); + mIoThread.start(); + mIoHandler = new IoHandler(mIoThread.getLooper()); + mPackageCleanup = this::rescheduleCleanup; + loadFromFile(); + } + + /** Creates or gets singleton instance of PackageWatchdog. */ + public static synchronized PackageWatchdog getInstance(Context context) { + if (sPackageWatchdog == null) { + sPackageWatchdog = new PackageWatchdog(context); + } + return sPackageWatchdog; + } + + /** + * Registers {@code observer} to listen for package failures + * + * <p>Observers are expected to call this on boot. It does not specify any packages but + * it will resume observing any packages requested from a previous boot. + */ + public void registerHealthObserver(PackageHealthObserver observer) { + synchronized (mLock) { + mRegisteredObservers.put(observer.getName(), observer); + if (mDurationAtLastReschedule == 0) { + // Nothing running, schedule + rescheduleCleanup(); + } + } + } + + /** + * Starts observing the health of the {@code packages} for {@code observer} and notifies + * {@code observer} of any package failures within the monitoring duration. + * + * <p>If {@code observer} is already monitoring a package in {@code packageNames}, + * the monitoring window of that package will be reset to {@code hours}. + * + * @throws IllegalArgumentException if {@code packageNames} is empty + * or {@code hours} is less than 1 + */ + public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames, + int hours) { + if (packageNames.isEmpty() || hours < 1) { + throw new IllegalArgumentException("Observation not started, no packages specified" + + "or invalid hours"); + } + long durationMs = TimeUnit.HOURS.toMillis(hours); + List<MonitoredPackage> packages = new ArrayList<>(); + for (String packageName : packageNames) { + packages.add(new MonitoredPackage(packageName, durationMs)); + } + synchronized (mLock) { + ObserverInternal oldObserver = mAllObservers.get(observer.getName()); + if (oldObserver == null) { + Slog.d(TAG, observer.getName() + " started monitoring health of packages " + + packageNames); + mAllObservers.put(observer.getName(), + new ObserverInternal(observer.getName(), packages)); + } else { + Slog.d(TAG, observer.getName() + " added the following packages to monitor " + + packageNames); + oldObserver.updatePackages(packages); + } + } + registerHealthObserver(observer); + // Always reschedule because we may need to expire packages + // earlier than we are already scheduled for + rescheduleCleanup(); + sendIoMessage(MESSAGE_SAVE_FILE); + } + + /** + * Unregisters {@code observer} from listening to package failure. + * Additionally, this stops observing any packages that may have previously been observed + * even from a previous boot. + */ + public void unregisterHealthObserver(PackageHealthObserver observer) { + synchronized (mLock) { + mAllObservers.remove(observer.getName()); + mRegisteredObservers.remove(observer.getName()); + } + sendIoMessage(MESSAGE_SAVE_FILE); + } + + // TODO(zezeozue:) Accept current versionCodes of failing packages? + /** + * Called when a process fails either due to a crash or ANR. + * + * <p>All registered observers for the packages contained in the process will be notified in + * order of priority unitl an observer signifies that it has taken action and other observers + * should not notified. + * + * <p>This method could be called frequently if there is a severe problem on the device. + */ + public void onPackageFailure(String[] packages) { + synchronized (mLock) { + if (mRegisteredObservers.isEmpty()) { + return; + } + for (String packageName : packages) { + for (ObserverInternal observer : mAllObservers.values()) { + if (observer.onPackageFailure(packageName)) { + PackageHealthObserver activeObserver = + mRegisteredObservers.get(observer.mName); + if (activeObserver != null + && activeObserver.onHealthCheckFailed(packageName)) { + // Observer has handled, do not notify other observers + break; + } + } + } + } + } + } + + // TODO(zezeozue): Optimize write? Maybe only write a separate smaller file? + // This currently adds about 7ms extra to shutdown thread + /** Writes the package information to file during shutdown. */ + public void writeNow() { + if (!mAllObservers.isEmpty()) { + mIoHandler.removeMessages(MESSAGE_SAVE_FILE); + pruneObservers(SystemClock.uptimeMillis() - mUptimeAtLastRescheduleMs); + saveToFile(); + Slog.i(TAG, "Last write to update package durations"); + } + } + + /** Register instances of this interface to receive notifications on package failure. */ + public interface PackageHealthObserver { + /** + * Called when health check fails for the {@code packages}. + * @return {@code true} if action was taken and other observers should not be notified of + * this failure, {@code false} otherwise. + */ + boolean onHealthCheckFailed(String packageName); + + // TODO(zezeozue): Ensure uniqueness? + /** + * Identifier for the observer, should not change across device updates otherwise the + * watchdog may drop observing packages with the old name. + */ + String getName(); + } + + /** Reschedules handler to prune expired packages from observers. */ + private void rescheduleCleanup() { + synchronized (mLock) { + long nextDurationToScheduleMs = getEarliestPackageExpiryLocked(); + if (nextDurationToScheduleMs == Long.MAX_VALUE) { + Slog.i(TAG, "No monitored packages, ending package cleanup"); + mDurationAtLastReschedule = 0; + mUptimeAtLastRescheduleMs = 0; + return; + } + long uptimeMs = SystemClock.uptimeMillis(); + // O if mPackageCleanup not running + long elapsedDurationMs = mUptimeAtLastRescheduleMs == 0 + ? 0 : uptimeMs - mUptimeAtLastRescheduleMs; + // O if mPackageCleanup not running + long remainingDurationMs = mDurationAtLastReschedule - elapsedDurationMs; + + if (mUptimeAtLastRescheduleMs == 0 || nextDurationToScheduleMs < remainingDurationMs) { + // First schedule or an earlier reschedule + pruneObservers(elapsedDurationMs); + mTimerHandler.removeCallbacks(mPackageCleanup); + mTimerHandler.postDelayed(mPackageCleanup, nextDurationToScheduleMs); + mDurationAtLastReschedule = nextDurationToScheduleMs; + mUptimeAtLastRescheduleMs = uptimeMs; + } + } + } + + /** + * Returns the earliest time a package should expire. + * @returns Long#MAX_VALUE if there are no observed packages. + */ + private long getEarliestPackageExpiryLocked() { + long shortestDurationMs = Long.MAX_VALUE; + for (ObserverInternal observer : mAllObservers.values()) { + for (MonitoredPackage p : observer.mPackages.values()) { + if (p.mDurationMs < shortestDurationMs) { + shortestDurationMs = p.mDurationMs; + } + } + } + Slog.v(TAG, "Earliest package time is " + shortestDurationMs); + return shortestDurationMs; + } + + /** + * Removes {@code elapsedMs} milliseconds from all durations on monitored packages. + * Discards expired packages and discards observers without any packages. + */ + private void pruneObservers(long elapsedMs) { + if (elapsedMs == 0) { + return; + } + synchronized (mLock) { + Slog.d(TAG, "Removing expired packages after " + elapsedMs + "ms"); + Iterator<ObserverInternal> it = mAllObservers.values().iterator(); + while (it.hasNext()) { + ObserverInternal observer = it.next(); + if (!observer.updateMonitoringDurations(elapsedMs)) { + Slog.i(TAG, "Discarding observer " + observer.mName + ". All packages expired"); + it.remove(); + } + } + } + sendIoMessage(MESSAGE_SAVE_FILE); + } + + /** + * Loads mAllObservers from file. + * + * <p>Note that this is <b>not</b> thread safe and should only called be called + * from the constructor. + */ + private void loadFromFile() { + InputStream infile = null; + mAllObservers.clear(); + try { + infile = mPolicyFile.openRead(); + final XmlPullParser parser = Xml.newPullParser(); + parser.setInput(infile, StandardCharsets.UTF_8.name()); + XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG); + int outerDepth = parser.getDepth(); + while (XmlUtils.nextElementWithin(parser, outerDepth)) { + ObserverInternal observer = ObserverInternal.read(parser); + if (observer != null) { + mAllObservers.put(observer.mName, observer); + } + } + } catch (FileNotFoundException e) { + // Nothing to monitor + } catch (IOException e) { + Log.wtf(TAG, "Unable to read monitored packages", e); + } catch (NumberFormatException e) { + Log.wtf(TAG, "Unable to parse monitored package windows", e); + } catch (XmlPullParserException e) { + Log.wtf(TAG, "Unable to parse monitored packages", e); + } finally { + IoUtils.closeQuietly(infile); + } + } + + /** + * Persists mAllObservers to file and ignores threshold information. + * + * <p>Note that this is <b>not</b> thread safe and should only be called on the + * single threaded IoHandler. + */ + private boolean saveToFile() { + FileOutputStream stream; + try { + stream = mPolicyFile.startWrite(); + } catch (IOException e) { + Slog.w(TAG, "Cannot update monitored packages", e); + return false; + } + + try { + XmlSerializer out = new FastXmlSerializer(); + out.setOutput(stream, StandardCharsets.UTF_8.name()); + out.startDocument(null, true); + out.startTag(null, TAG_PACKAGE_WATCHDOG); + out.attribute(null, ATTR_VERSION, Integer.toString(DB_VERSION)); + for (ObserverInternal observer : mAllObservers.values()) { + observer.write(out); + } + out.endTag(null, TAG_PACKAGE_WATCHDOG); + out.endDocument(); + mPolicyFile.finishWrite(stream); + return true; + } catch (IOException e) { + Slog.w(TAG, "Failed to save monitored packages, restoring backup", e); + mPolicyFile.failWrite(stream); + return false; + } finally { + IoUtils.closeQuietly(stream); + } + } + + private void sendIoMessage(int what) { + if (!mIoHandler.hasMessages(what)) { + Message m = Message.obtain(mIoHandler, what); + mIoHandler.sendMessage(m); + } + } + + /** + * Represents an observer monitoring a set of packages along with the failure thresholds for + * each package. + */ + static class ObserverInternal { + public final String mName; + public final ArrayMap<String, MonitoredPackage> mPackages; + + ObserverInternal(String name, List<MonitoredPackage> packages) { + mName = name; + mPackages = new ArrayMap<>(); + updatePackages(packages); + } + + /** + * Writes important details to file. Doesn't persist any package failure thresholds. + * + * <p>Note that this method is <b>not</b> thread safe. It should only be called from + * #saveToFile which runs on a single threaded handler. + */ + public boolean write(XmlSerializer out) { + try { + out.startTag(null, TAG_OBSERVER); + out.attribute(null, ATTR_NAME, mName); + for (int i = 0; i < mPackages.size(); i++) { + MonitoredPackage p = mPackages.valueAt(i); + out.startTag(null, TAG_PACKAGE); + out.attribute(null, ATTR_NAME, p.mName); + out.attribute(null, ATTR_DURATION, String.valueOf(p.mDurationMs)); + out.endTag(null, TAG_PACKAGE); + } + out.endTag(null, TAG_OBSERVER); + return true; + } catch (IOException e) { + Slog.w(TAG, "Cannot save observer", e); + return false; + } + } + + public void updatePackages(List<MonitoredPackage> packages) { + synchronized (mName) { + for (MonitoredPackage p : packages) { + mPackages.put(p.mName, p); + } + } + } + + /** + * Reduces the monitoring durations of all packages observed by this observer by + * {@code elapsedMs}. If any duration is less than 0, the package is removed from + * observation. + * + * @returns {@code true} if there are still packages to be observed, {@code false} otherwise + */ + public boolean updateMonitoringDurations(long elapsedMs) { + List<MonitoredPackage> packages = new ArrayList<>(); + synchronized (mName) { + Iterator<MonitoredPackage> it = mPackages.values().iterator(); + while (it.hasNext()) { + MonitoredPackage p = it.next(); + long newDuration = p.mDurationMs - elapsedMs; + if (newDuration > 0) { + p.mDurationMs = newDuration; + } else { + it.remove(); + } + } + return !mPackages.isEmpty(); + } + } + + /** + * Increments failure counts of {@code packageName}. + * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise + */ + public boolean onPackageFailure(String packageName) { + synchronized (mName) { + MonitoredPackage p = mPackages.get(packageName); + if (p != null) { + return p.onFailure(); + } + return false; + } + } + + /** + * Returns one ObserverInternal from the {@code parser} and advances its state. + * + * <p>Note that this method is <b>not</b> thread safe. It should only be called from + * #loadFromFile which in turn is only called on construction of the + * singleton PackageWatchdog. + **/ + public static ObserverInternal read(XmlPullParser parser) { + String observerName = null; + if (TAG_OBSERVER.equals(parser.getName())) { + observerName = parser.getAttributeValue(null, ATTR_NAME); + if (TextUtils.isEmpty(observerName)) { + return null; + } + } + List<MonitoredPackage> packages = new ArrayList<>(); + int innerDepth = parser.getDepth(); + try { + while (XmlUtils.nextElementWithin(parser, innerDepth)) { + if (TAG_PACKAGE.equals(parser.getName())) { + String packageName = parser.getAttributeValue(null, ATTR_NAME); + long duration = Long.parseLong( + parser.getAttributeValue(null, ATTR_DURATION)); + if (!TextUtils.isEmpty(packageName)) { + packages.add(new MonitoredPackage(packageName, duration)); + } + } + } + } catch (IOException e) { + return null; + } catch (XmlPullParserException e) { + return null; + } + if (packages.isEmpty()) { + return null; + } + return new ObserverInternal(observerName, packages); + } + } + + /** Represents a package along with the time it should be monitored for. */ + static class MonitoredPackage { + public final String mName; + // System uptime duration to monitor package + public long mDurationMs; + // System uptime of first package failure + private long mUptimeStartMs; + // Number of failures since mUptimeStartMs + private int mFailures; + + MonitoredPackage(String name, long durationMs) { + mName = name; + mDurationMs = durationMs; + } + + /** + * Increment package failures or resets failure count depending on the last package failure. + * + * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise + */ + public synchronized boolean onFailure() { + final long now = SystemClock.uptimeMillis(); + final long duration = now - mUptimeStartMs; + if (duration > TRIGGER_DURATION_MS) { + // TODO(zezeozue): Reseting to 1 is not correct + // because there may be more than 1 failure in the last trigger window from now + // This is the RescueParty impl, will leave for now + mFailures = 1; + mUptimeStartMs = now; + } else { + mFailures++; + } + return mFailures >= TRIGGER_FAILURE_COUNT; + } + } + + private class IoHandler extends Handler { + IoHandler(Looper looper) { + super(looper); + } + + @Override + public void handleMessage(Message msg) { + switch (msg.what) { + case MESSAGE_SAVE_FILE: + saveToFile(); + break; + } + } + } +} diff --git a/services/core/java/com/android/server/am/ActivityManagerService.java b/services/core/java/com/android/server/am/ActivityManagerService.java index 1a5dd90b918a..16c236ef83ad 100644 --- a/services/core/java/com/android/server/am/ActivityManagerService.java +++ b/services/core/java/com/android/server/am/ActivityManagerService.java @@ -336,6 +336,7 @@ import com.android.server.IoThread; import com.android.server.LocalServices; import com.android.server.LockGuard; import com.android.server.NetworkManagementInternal; +import com.android.server.PackageWatchdog; import com.android.server.RescueParty; import com.android.server.ServiceThread; import com.android.server.SystemConfig; @@ -587,6 +588,7 @@ public class ActivityManagerService extends IActivityManager.Stub public final PendingIntentController mPendingIntentController; final AppErrors mAppErrors; + final PackageWatchdog mPackageWatchdog; /** * Indicates the maximum time spent waiting for the network rules to get updated. @@ -2209,6 +2211,7 @@ public class ActivityManagerService extends IActivityManager.Stub mContext = mInjector.getContext(); mUiContext = null; mAppErrors = null; + mPackageWatchdog = null; mActiveUids = new ActiveUids(this, false /* postChangesToAtm */); mAppOpsService = mInjector.getAppOpsService(null /* file */, null /* handler */); mBatteryStatsService = null; @@ -2275,7 +2278,8 @@ public class ActivityManagerService extends IActivityManager.Stub mServices = new ActiveServices(this); mProviderMap = new ProviderMap(this); - mAppErrors = new AppErrors(mUiContext, this); + mPackageWatchdog = PackageWatchdog.getInstance(mUiContext); + mAppErrors = new AppErrors(mUiContext, this, mPackageWatchdog); mActiveUids = new ActiveUids(this, true /* postChangesToAtm */); final File systemDir = SystemServiceManager.ensureSystemDir(); diff --git a/services/core/java/com/android/server/am/AppErrors.java b/services/core/java/com/android/server/am/AppErrors.java index 1c1daffceafe..a634b577f506 100644 --- a/services/core/java/com/android/server/am/AppErrors.java +++ b/services/core/java/com/android/server/am/AppErrors.java @@ -53,6 +53,7 @@ import android.util.proto.ProtoOutputStream; import com.android.internal.app.ProcessMap; import com.android.internal.logging.MetricsLogger; import com.android.internal.logging.nano.MetricsProto; +import com.android.server.PackageWatchdog; import com.android.server.RescueParty; import com.android.server.wm.WindowProcessController; @@ -69,6 +70,7 @@ class AppErrors { private final ActivityManagerService mService; private final Context mContext; + private final PackageWatchdog mPackageWatchdog; private ArraySet<String> mAppsNotReportingCrashes; @@ -93,10 +95,11 @@ class AppErrors { private final ProcessMap<BadProcessInfo> mBadProcesses = new ProcessMap<>(); - AppErrors(Context context, ActivityManagerService service) { + AppErrors(Context context, ActivityManagerService service, PackageWatchdog watchdog) { context.assertRuntimeOverlayThemable(); mService = service; mContext = context; + mPackageWatchdog = watchdog; } void writeToProto(ProtoOutputStream proto, long fieldId, String dumpPackage) { @@ -400,10 +403,16 @@ class AppErrors { longMsg = shortMsg; } - // If a persistent app is stuck in a crash loop, the device isn't very - // usable, so we want to consider sending out a rescue party. - if (r != null && r.isPersistent()) { - RescueParty.notePersistentAppCrash(mContext, r.uid); + if (r != null) { + if (r.isPersistent()) { + // If a persistent app is stuck in a crash loop, the device isn't very + // usable, so we want to consider sending out a rescue party. + RescueParty.notePersistentAppCrash(mContext, r.uid); + } else { + // If a non-persistent app is stuck in crash loop, we want to inform + // the package watchdog, maybe an update or experiment can be rolled back. + mPackageWatchdog.onPackageFailure(r.getPackageList()); + } } final int relaunchReason = r != null @@ -821,6 +830,7 @@ class AppErrors { void handleShowAnrUi(Message msg) { Dialog dialogToShow = null; + String[] packageList = null; synchronized (mService) { AppNotRespondingDialog.Data data = (AppNotRespondingDialog.Data) msg.obj; final ProcessRecord proc = data.proc; @@ -828,6 +838,9 @@ class AppErrors { Slog.e(TAG, "handleShowAnrUi: proc is null"); return; } + if (!proc.isPersistent()) { + packageList = proc.getPackageList(); + } if (proc.anrDialog != null) { Slog.e(TAG, "App already has anr dialog: " + proc); MetricsLogger.action(mContext, MetricsProto.MetricsEvent.ACTION_APP_ANR, @@ -851,6 +864,10 @@ class AppErrors { if (dialogToShow != null) { dialogToShow.show(); } + // Notify PackageWatchdog without the lock held + if (packageList != null) { + mPackageWatchdog.onPackageFailure(packageList); + } } /** diff --git a/services/core/java/com/android/server/pm/PackageManagerService.java b/services/core/java/com/android/server/pm/PackageManagerService.java index a5d5e8c73fb0..136c7c91749e 100644 --- a/services/core/java/com/android/server/pm/PackageManagerService.java +++ b/services/core/java/com/android/server/pm/PackageManagerService.java @@ -296,6 +296,7 @@ import com.android.server.EventLogTags; import com.android.server.FgThread; import com.android.server.LocalServices; import com.android.server.LockGuard; +import com.android.server.PackageWatchdog; import com.android.server.ServiceThread; import com.android.server.SystemConfig; import com.android.server.SystemServerInitThreadPool; @@ -9492,6 +9493,7 @@ public class PackageManagerService extends IPackageManager.Stub mPackageUsage.writeNow(mPackages); mCompilerStats.writeNow(); mDexManager.writePackageDexUsageNow(); + PackageWatchdog.getInstance(mContext).writeNow(); // This is the last chance to write out pending restriction settings synchronized (mPackages) { |