From 6b1afebdaca6c27d49a243c4283e5e2e4924de8c Mon Sep 17 00:00:00 2001 From: Dianne Hackborn Date: Tue, 31 Aug 2010 15:40:21 -0700 Subject: Improve debug output when an ANR happens. - Collect data at better times. - Collect per-thread CPU usage as soon as possible after the ANR, and print in log. - Based on new per-thread CPU usage, limit the number of processes we collect stacks from to not include inactive not interesting procs. - Improve the way ProcessStats compute and reports its data. Change-Id: I12b17fb47d593d175be69bb792c1f57179bf4fdf --- services/java/com/android/server/ProcessStats.java | 258 ++++++++++++++------- services/java/com/android/server/Watchdog.java | 16 +- .../android/server/am/ActivityManagerService.java | 97 ++++++-- 3 files changed, 260 insertions(+), 111 deletions(-) diff --git a/services/java/com/android/server/ProcessStats.java b/services/java/com/android/server/ProcessStats.java index a02c4e78b15f..0c8c2fd40d00 100644 --- a/services/java/com/android/server/ProcessStats.java +++ b/services/java/com/android/server/ProcessStats.java @@ -39,7 +39,7 @@ public class ProcessStats { private static final int[] PROCESS_STATS_FORMAT = new int[] { PROC_SPACE_TERM, - PROC_SPACE_TERM, + PROC_SPACE_TERM|PROC_PARENS, PROC_SPACE_TERM, PROC_SPACE_TERM, PROC_SPACE_TERM, @@ -75,16 +75,21 @@ public class ProcessStats { PROC_SPACE_TERM, PROC_SPACE_TERM, PROC_SPACE_TERM, + PROC_SPACE_TERM|PROC_OUT_LONG, // 9: minor faults PROC_SPACE_TERM, - PROC_SPACE_TERM, - PROC_SPACE_TERM, + PROC_SPACE_TERM|PROC_OUT_LONG, // 11: major faults PROC_SPACE_TERM, PROC_SPACE_TERM|PROC_OUT_LONG, // 13: utime PROC_SPACE_TERM|PROC_OUT_LONG // 14: stime }; - private final String[] mProcessFullStatsStringData = new String[3]; - private final long[] mProcessFullStatsData = new long[3]; + static final int PROCESS_FULL_STAT_MINOR_FAULTS = 1; + static final int PROCESS_FULL_STAT_MAJOR_FAULTS = 2; + static final int PROCESS_FULL_STAT_UTIME = 3; + static final int PROCESS_FULL_STAT_STIME = 4; + + private final String[] mProcessFullStatsStringData = new String[5]; + private final long[] mProcessFullStatsData = new long[5]; private static final int[] SYSTEM_CPU_FORMAT = new int[] { PROC_SPACE_TERM|PROC_COMBINE, @@ -116,6 +121,9 @@ public class ProcessStats { private long mCurrentSampleTime; private long mLastSampleTime; + private long mCurrentSampleRealTime; + private long mLastSampleRealTime; + private long mBaseUserTime; private long mBaseSystemTime; private long mBaseIoWaitTime; @@ -167,6 +175,9 @@ public class ProcessStats { public String name; int nameWidth; + public long base_uptime; + public long rel_uptime; + public long base_utime; public long base_stime; public int rel_utime; @@ -178,6 +189,7 @@ public class ProcessStats { public int rel_majfaults; public boolean active; + public boolean working; public boolean added; public boolean removed; @@ -211,8 +223,7 @@ public class ProcessStats { private final static Comparator sLoadComparator = new Comparator() { public final int - compare(Stats sta, Stats stb) - { + compare(Stats sta, Stats stb) { int ta = sta.rel_utime + sta.rel_stime; int tb = stb.rel_utime + stb.rel_stime; if (ta != tb) { @@ -241,31 +252,17 @@ public class ProcessStats { } public void init() { + if (DEBUG) Slog.v(TAG, "Init: " + this); mFirst = true; update(); } public void update() { + if (DEBUG) Slog.v(TAG, "Update: " + this); mLastSampleTime = mCurrentSampleTime; mCurrentSampleTime = SystemClock.uptimeMillis(); - - final float[] loadAverages = mLoadAverageData; - if (Process.readProcFile("/proc/loadavg", LOAD_AVERAGE_FORMAT, - null, null, loadAverages)) { - float load1 = loadAverages[0]; - float load5 = loadAverages[1]; - float load15 = loadAverages[2]; - if (load1 != mLoad1 || load5 != mLoad5 || load15 != mLoad15) { - mLoad1 = load1; - mLoad5 = load5; - mLoad15 = load15; - onLoadChanged(load1, load5, load15); - } - } - - mCurPids = collectStats("/proc", -1, mFirst, mCurPids, - mProcStats, mWorkingProcs); - mFirst = false; + mLastSampleRealTime = mCurrentSampleRealTime; + mCurrentSampleRealTime = SystemClock.elapsedRealtime(); final long[] sysCpu = mSystemCpuData; if (Process.readProcFile("/proc/stat", SYSTEM_CPU_FORMAT, @@ -288,7 +285,7 @@ public class ProcessStats { mRelSoftIrqTime = (int)(softirqtime - mBaseSoftIrqTime); mRelIdleTime = (int)(idletime - mBaseIdleTime); - if (false) { + if (DEBUG) { Slog.i("Load", "Total U:" + sysCpu[0] + " N:" + sysCpu[1] + " S:" + sysCpu[2] + " I:" + sysCpu[3] + " W:" + sysCpu[4] + " Q:" + sysCpu[5] @@ -305,16 +302,32 @@ public class ProcessStats { mBaseIdleTime = idletime; } + mCurPids = collectStats("/proc", -1, mFirst, mCurPids, mProcStats); + + final float[] loadAverages = mLoadAverageData; + if (Process.readProcFile("/proc/loadavg", LOAD_AVERAGE_FORMAT, + null, null, loadAverages)) { + float load1 = loadAverages[0]; + float load5 = loadAverages[1]; + float load15 = loadAverages[2]; + if (load1 != mLoad1 || load5 != mLoad5 || load15 != mLoad15) { + mLoad1 = load1; + mLoad5 = load5; + mLoad15 = load15; + onLoadChanged(load1, load5, load15); + } + } + + if (DEBUG) Slog.i(TAG, "*** TIME TO COLLECT STATS: " + + (SystemClock.uptimeMillis()-mCurrentSampleTime)); + mWorkingProcsSorted = false; mFirst = false; } private int[] collectStats(String statsFile, int parentPid, boolean first, - int[] curPids, ArrayList allProcs, - ArrayList workingProcs) { + int[] curPids, ArrayList allProcs) { - workingProcs.clear(); - int[] pids = Process.getPids(statsFile, curPids); int NP = (pids == null) ? 0 : pids.length; int NS = allProcs.size(); @@ -330,8 +343,13 @@ public class ProcessStats { if (st != null && st.pid == pid) { // Update an existing process... st.added = false; + st.working = false; curStatsIndex++; - if (localLOGV) Slog.v(TAG, "Existing pid " + pid + ": " + st); + if (DEBUG) Slog.v(TAG, "Existing " + + (parentPid < 0 ? "process" : "thread") + + " pid " + pid + ": " + st); + + final long uptime = SystemClock.uptimeMillis(); final long[] procStats = mProcessStatsData; if (!Process.readProcFile(st.statFile.toString(), @@ -363,11 +381,18 @@ public class ProcessStats { getName(st, st.cmdlineFile); if (st.threadStats != null) { mCurThreadPids = collectStats(st.threadsDir, pid, false, - mCurThreadPids, st.threadStats, - st.workingThreads); + mCurThreadPids, st.threadStats); } } + if (DEBUG) Slog.v("Load", "Stats changed " + st.name + " pid=" + st.pid + + " utime=" + utime + "-" + st.base_utime + + " stime=" + stime + "-" + st.base_stime + + " minfaults=" + minfaults + "-" + st.base_minfaults + + " majfaults=" + majfaults + "-" + st.base_majfaults); + + st.rel_uptime = uptime - st.base_uptime; + st.base_uptime = uptime; st.rel_utime = (int)(utime - st.base_utime); st.rel_stime = (int)(stime - st.base_stime); st.base_utime = utime; @@ -376,10 +401,7 @@ public class ProcessStats { st.rel_majfaults = (int)(majfaults - st.base_majfaults); st.base_minfaults = minfaults; st.base_majfaults = majfaults; - //Slog.i("Load", "Stats changed " + name + " pid=" + st.pid - // + " name=" + st.name + " utime=" + utime - // + " stime=" + stime); - workingProcs.add(st); + st.working = true; continue; } @@ -389,18 +411,21 @@ public class ProcessStats { allProcs.add(curStatsIndex, st); curStatsIndex++; NS++; - if (localLOGV) Slog.v(TAG, "New pid " + pid + ": " + st); + if (DEBUG) Slog.v(TAG, "New " + + (parentPid < 0 ? "process" : "thread") + + " pid " + pid + ": " + st); final String[] procStatsString = mProcessFullStatsStringData; final long[] procStats = mProcessFullStatsData; + st.base_uptime = SystemClock.uptimeMillis(); if (Process.readProcFile(st.statFile.toString(), PROCESS_FULL_STATS_FORMAT, procStatsString, procStats, null)) { - st.baseName = parentPid < 0 - ? procStatsString[0] : Integer.toString(pid); - st.base_utime = 0; //procStats[1]; - st.base_stime = 0; //procStats[2]; - st.base_minfaults = st.base_majfaults = 0; + st.baseName = procStatsString[0]; + st.base_minfaults = procStats[PROCESS_FULL_STAT_MINOR_FAULTS]; + st.base_majfaults = procStats[PROCESS_FULL_STAT_MAJOR_FAULTS]; + st.base_utime = procStats[PROCESS_FULL_STAT_UTIME]; + st.base_stime = procStats[PROCESS_FULL_STAT_STIME]; } else { st.baseName = ""; st.base_utime = st.base_stime = 0; @@ -409,24 +434,26 @@ public class ProcessStats { if (parentPid < 0) { getName(st, st.cmdlineFile); - } else { - st.name = st.baseName; - st.nameWidth = onMeasureProcessName(st.name); if (st.threadStats != null) { mCurThreadPids = collectStats(st.threadsDir, pid, true, - mCurThreadPids, st.threadStats, - st.workingThreads); + mCurThreadPids, st.threadStats); } + } else { + st.name = st.baseName; + st.nameWidth = onMeasureProcessName(st.name); } + + if (DEBUG) Slog.v("Load", "Stats added " + st.name + " pid=" + st.pid + + " utime=" + st.base_utime + " stime=" + st.base_stime + + " minfaults=" + st.base_minfaults + " majfaults=" + st.base_majfaults); - //Slog.i("Load", "New process: " + st.pid + " " + st.name); st.rel_utime = 0; st.rel_stime = 0; st.rel_minfaults = 0; st.rel_majfaults = 0; st.added = true; if (!first) { - workingProcs.add(st); + st.working = true; } continue; } @@ -437,10 +464,12 @@ public class ProcessStats { st.rel_minfaults = 0; st.rel_majfaults = 0; st.removed = true; - workingProcs.add(st); + st.working = true; allProcs.remove(curStatsIndex); NS--; - if (localLOGV) Slog.v(TAG, "Removed pid " + st.pid + ": " + st); + if (DEBUG) Slog.v(TAG, "Removed " + + (parentPid < 0 ? "process" : "thread") + + " pid " + pid + ": " + st); // Decrement the loop counter so that we process the current pid // again the next time through the loop. i--; @@ -455,7 +484,7 @@ public class ProcessStats { st.rel_minfaults = 0; st.rel_majfaults = 0; st.removed = true; - workingProcs.add(st); + st.working = true; allProcs.remove(curStatsIndex); NS--; if (localLOGV) Slog.v(TAG, "Removed pid " + st.pid + ": " + st); @@ -569,11 +598,34 @@ public class ProcessStats { / (mRelUserTime+mRelSystemTime+mRelIrqTime+mRelIdleTime); } - final public int countWorkingStats() { + final void buildWorkingProcs() { if (!mWorkingProcsSorted) { + mWorkingProcs.clear(); + final int N = mProcStats.size(); + for (int i=0; i 1) { + stats.workingThreads.clear(); + final int M = stats.threadStats.size(); + for (int j=0; j mLastSampleTime) { + pw.print(now-mLastSampleTime); + pw.print("ms to "); + pw.print(now-mCurrentSampleTime); + pw.print("ms ago"); + } else { + pw.print(mLastSampleTime-now); + pw.print("ms to "); + pw.print(mCurrentSampleTime-now); + pw.print("ms later"); + } + + long sampleTime = mCurrentSampleTime - mLastSampleTime; + long sampleRealTime = mCurrentSampleRealTime - mLastSampleRealTime; + long percAwake = (sampleTime*100) / sampleRealTime; + if (percAwake != 100) { + pw.print(" with "); + pw.print(percAwake); + pw.print("% awake"); + } + pw.println(":"); final int totalTime = mRelUserTime + mRelSystemTime + mRelIoWaitTime + mRelIrqTime + mRelSoftIrqTime + mRelIdleTime; + if (DEBUG) Slog.i(TAG, "totalTime " + totalTime + " over sample time " + + (mCurrentSampleTime-mLastSampleTime)); + int N = mWorkingProcs.size(); for (int i=0; i= 0) { + pw.print(pid); + pw.print("/"); + } pw.print(label); pw.print(": "); - if (totalTime == 0) totalTime = 1; - pw.print(((user+system+iowait+irq+softIrq)*100)/totalTime); - pw.print("% = "); - pw.print((user*100)/totalTime); + printRatio(pw, user, totalTime); pw.print("% user + "); - pw.print((system*100)/totalTime); + printRatio(pw, system, totalTime); pw.print("% kernel"); if (iowait > 0) { pw.print(" + "); - pw.print((iowait*100)/totalTime); + printRatio(pw, iowait, totalTime); pw.print("% iowait"); } if (irq > 0) { pw.print(" + "); - pw.print((irq*100)/totalTime); + printRatio(pw, irq, totalTime); pw.print("% irq"); } if (softIrq > 0) { pw.print(" + "); - pw.print((softIrq*100)/totalTime); + printRatio(pw, softIrq, totalTime); pw.print("% softirq"); } if (minFaults > 0 || majFaults > 0) { @@ -696,8 +787,8 @@ public class ProcessStats { } private void getName(Stats st, String cmdlineFile) { - String newName = st.baseName; - if (st.baseName == null || st.baseName.equals("app_process")) { + String newName = st.name; + if (st.name == null || st.name.equals("app_process")) { String cmdName = readFile(cmdlineFile, '\0'); if (cmdName != null && cmdName.length() > 1) { newName = cmdName; @@ -706,6 +797,9 @@ public class ProcessStats { newName = newName.substring(i+1); } } + if (newName == null) { + newName = st.baseName; + } } if (st.name == null || !newName.equals(st.name)) { st.name = newName; diff --git a/services/java/com/android/server/Watchdog.java b/services/java/com/android/server/Watchdog.java index a7420938445d..1b885f50108b 100644 --- a/services/java/com/android/server/Watchdog.java +++ b/services/java/com/android/server/Watchdog.java @@ -39,9 +39,6 @@ import android.util.Log; import android.util.Slog; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; @@ -113,8 +110,6 @@ public class Watchdog extends Thread { public void handleMessage(Message msg) { switch (msg.what) { case MONITOR: { - long now = SystemClock.uptimeMillis(); - // See if we should force a reboot. int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval : Settings.Secure.getInt( @@ -418,9 +413,9 @@ public class Watchdog extends Thread { if (!waitedHalf) { // We've waited half the deadlock-detection interval. Pull a stack // trace and wait another half. - ArrayList pids = new ArrayList(); + ArrayList pids = new ArrayList(); pids.add(Process.myPid()); - File stack = ActivityManagerService.dumpStackTraces(true, pids); + ActivityManagerService.dumpStackTraces(true, pids, null, null); waitedHalf = true; continue; } @@ -430,15 +425,16 @@ public class Watchdog extends Thread { // First collect stack traces from all threads of the system process. // Then kill this process so that the system will restart. - String name = (mCurrentMonitor != null) ? mCurrentMonitor.getClass().getName() : "null"; + String name = (mCurrentMonitor != null) ? + mCurrentMonitor.getClass().getName() : "null"; EventLog.writeEvent(EventLogTags.WATCHDOG, name); - ArrayList pids = new ArrayList(); + ArrayList pids = new ArrayList(); pids.add(Process.myPid()); if (mPhonePid > 0) pids.add(mPhonePid); // Pass !waitedHalf so that just in case we somehow wind up here without having // dumped the halfway stacks, we properly re-initialize the trace file. - File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids); + File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids, null, null); // Give some extra time to make sure the stack traces get written. // The system's been hanging for a minute, another second or two won't hurt much. diff --git a/services/java/com/android/server/am/ActivityManagerService.java b/services/java/com/android/server/am/ActivityManagerService.java index 5d5e862f8eb8..30dc5ea84ec3 100644 --- a/services/java/com/android/server/am/ActivityManagerService.java +++ b/services/java/com/android/server/am/ActivityManagerService.java @@ -1355,7 +1355,9 @@ public final class ActivityManagerService extends ActivityManagerNative implemen @Override protected void dump(FileDescriptor fd, PrintWriter pw, String[] args) { synchronized (mActivityManagerService.mProcessStatsThread) { - pw.print(mActivityManagerService.mProcessStats.printCurrentState()); + pw.print(mActivityManagerService.mProcessStats.printCurrentLoad()); + pw.print(mActivityManagerService.mProcessStats.printCurrentState( + SystemClock.uptimeMillis())); } } } @@ -2644,10 +2646,12 @@ public final class ActivityManagerService extends ActivityManagerNative implemen * @param clearTraces causes the dump file to be erased prior to the new * traces being written, if true; when false, the new traces will be * appended to any existing file content. - * @param pids of dalvik VM processes to dump stack traces for + * @param firstPids of dalvik VM processes to dump stack traces for first + * @param lastPids of dalvik VM processes to dump stack traces for last * @return file containing stack traces, or null if no dump file is configured */ - public static File dumpStackTraces(boolean clearTraces, ArrayList pids) { + public static File dumpStackTraces(boolean clearTraces, ArrayList firstPids, + ProcessStats processStats, SparseArray lastPids) { String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null); if (tracesPath == null || tracesPath.length() == 0) { return null; @@ -2675,25 +2679,69 @@ public final class ActivityManagerService extends ActivityManagerNative implemen try { observer.startWatching(); - int num = pids.size(); - for (int i = 0; i < num; i++) { - synchronized (observer) { - Process.sendSignal(pids.get(i), Process.SIGNAL_QUIT); - observer.wait(200); // Wait for write-close, give up after 200msec + + // First collect all of the stacks of the most important pids. + try { + int num = firstPids.size(); + for (int i = 0; i < num; i++) { + synchronized (observer) { + Process.sendSignal(firstPids.get(i), Process.SIGNAL_QUIT); + observer.wait(200); // Wait for write-close, give up after 200msec + } } + } catch (InterruptedException e) { + Log.wtf(TAG, e); } - } catch (InterruptedException e) { - Log.wtf(TAG, e); + + // Next measure CPU usage. + if (processStats != null) { + processStats.init(); + System.gc(); + processStats.update(); + try { + synchronized (processStats) { + processStats.wait(500); // measure over 1/2 second. + } + } catch (InterruptedException e) { + } + processStats.update(); + + // We'll take the stack crawls of just the top apps using CPU. + final int N = processStats.countWorkingStats(); + int numProcs = 0; + for (int i=0; i= 0) { + numProcs++; + try { + synchronized (observer) { + Process.sendSignal(stats.pid, Process.SIGNAL_QUIT); + observer.wait(200); // Wait for write-close, give up after 200msec + } + } catch (InterruptedException e) { + Log.wtf(TAG, e); + } + + } + } + } + + return tracesFile; + } finally { observer.stopWatching(); } - - return tracesFile; } final void appNotResponding(ProcessRecord app, ActivityRecord activity, ActivityRecord parent, final String annotation) { - ArrayList pids = new ArrayList(20); + ArrayList firstPids = new ArrayList(5); + SparseArray lastPids = new SparseArray(20); + + long anrTime = SystemClock.uptimeMillis(); + if (MONITOR_CPU_USAGE) { + updateCpuStatsNow(); + } synchronized (this) { // PowerManager.reboot() can block for a long time, so ignore ANRs while shutting down. @@ -2717,24 +2765,32 @@ public final class ActivityManagerService extends ActivityManagerNative implemen annotation); // Dump thread traces as quickly as we can, starting with "interesting" processes. - pids.add(app.pid); + firstPids.add(app.pid); int parentPid = app.pid; if (parent != null && parent.app != null && parent.app.pid > 0) parentPid = parent.app.pid; - if (parentPid != app.pid) pids.add(parentPid); + if (parentPid != app.pid) firstPids.add(parentPid); - if (MY_PID != app.pid && MY_PID != parentPid) pids.add(MY_PID); + if (MY_PID != app.pid && MY_PID != parentPid) firstPids.add(MY_PID); for (int i = mLruProcesses.size() - 1; i >= 0; i--) { ProcessRecord r = mLruProcesses.get(i); if (r != null && r.thread != null) { int pid = r.pid; - if (pid > 0 && pid != app.pid && pid != parentPid && pid != MY_PID) pids.add(pid); + if (pid > 0 && pid != app.pid && pid != parentPid && pid != MY_PID) { + if (r.persistent) { + firstPids.add(pid); + } else { + lastPids.put(pid, Boolean.TRUE); + } + } } } } - File tracesFile = dumpStackTraces(true, pids); + final ProcessStats processStats = new ProcessStats(true); + + File tracesFile = dumpStackTraces(true, firstPids, processStats, lastPids); // Log the ANR to the main log. StringBuilder info = mStringBuilder; @@ -2755,11 +2811,14 @@ public final class ActivityManagerService extends ActivityManagerNative implemen if (MONITOR_CPU_USAGE) { updateCpuStatsNow(); synchronized (mProcessStatsThread) { - cpuInfo = mProcessStats.printCurrentState(); + cpuInfo = mProcessStats.printCurrentState(anrTime); } + info.append(processStats.printCurrentLoad()); info.append(cpuInfo); } + info.append(processStats.printCurrentState(anrTime)); + Slog.e(TAG, info.toString()); if (tracesFile == null) { // There is no trace file, so dump (only) the alleged culprit's threads to the log -- cgit v1.2.3-59-g8ed1b