diff options
| author | 2017-10-13 15:42:32 -0700 | |
|---|---|---|
| committer | 2017-10-17 00:14:32 -0700 | |
| commit | 4d1ad408736f28229c3f625de6873e3862adf6bf (patch) | |
| tree | 00030b8ed16556a45978b4ccdc28c2b1e1b2fe42 | |
| parent | 0d2d6b6a863cfd64a6f1d3c1a33eed198d7612a2 (diff) | |
Add crash recovery logic to HardwarePropertiesManagerService
The HardwarePropertiesManagerService-JNI communicates with the thermal
HAL to expose temperature and other thermal data to Java services. When
Thermal HAL died, there was no recovery mechanism in place. This change
adds that recovery mechanism.
Bug: 67769672
Test:
1) Rebooted the device multiple times and checked that VrCore is able
to get the thermal information correctly.
2) Enter and exit VR mode multiple times killing thermalHal by design
and ensure that we're getting thermal temperatures correctly.
10-16 10:05:19.605 1099 1600 E HardwarePropertiesManagerService-JNI:
ThermalHAL just died
...
10-16 10:05:29.761 3459 3459 D ThermalWarningManager: THERMAL:
ThermalInfo: [current temp=33.0, VR throttling temp (soft exit)=52.0, VR
hard exit temp=54.0, VR warning temp=51.0, last throttling warning wall
time=n/a, last soft 'exit VR' flow wall time=n/a, last hard 'exit VR'
flow wall time=n/a]
Change-Id: I55d8eae31526e1bd1a232afea5bd02cb0afca142
Signed-off-by: Karthik Ravi Shankar <karthikrs@google.com>
| -rw-r--r-- | services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp | 71 |
1 files changed, 58 insertions, 13 deletions
diff --git a/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp b/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp index 5f67ac1dfe8f..ed79352bba21 100644 --- a/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp +++ b/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp @@ -30,6 +30,8 @@ namespace android { +using android::hidl::base::V1_0::IBase; +using hardware::hidl_death_recipient; using hardware::hidl_vec; using hardware::thermal::V1_0::CoolingDevice; using hardware::thermal::V1_0::CpuUsage; @@ -58,7 +60,22 @@ static struct { jfloat gUndefinedTemperature; -static sp<IThermal> gThermalModule; +static void getThermalHalLocked(); +static std::mutex gThermalHalMutex; +static sp<IThermal> gThermalHal = nullptr; + +// struct ThermalHalDeathRecipient; +struct ThermalHalDeathRecipient : virtual public hidl_death_recipient { + // hidl_death_recipient interface + virtual void serviceDied(uint64_t cookie, const wp<IBase>& who) override { + std::lock_guard<std::mutex> lock(gThermalHalMutex); + ALOGE("ThermalHAL just died"); + gThermalHal = nullptr; + getThermalHalLocked(); + } +}; + +sp<ThermalHalDeathRecipient> gThermalHalDeathRecipient = nullptr; // ---------------------------------------------------------------------------- @@ -66,25 +83,50 @@ float finalizeTemperature(float temperature) { return isnan(temperature) ? gUndefinedTemperature : temperature; } -static void nativeInit(JNIEnv* env, jobject obj) { - // TODO(b/31632518) - if (gThermalModule == nullptr) { - gThermalModule = IThermal::getService(); +// The caller must be holding gThermalHalMutex. +static void getThermalHalLocked() { + if (gThermalHal != nullptr) { + return; } - if (gThermalModule == nullptr) { + gThermalHal = IThermal::getService(); + + if (gThermalHal == nullptr) { ALOGE("Unable to get Thermal service."); + } else { + if (gThermalHalDeathRecipient == nullptr) { + gThermalHalDeathRecipient = new ThermalHalDeathRecipient(); + } + hardware::Return<bool> linked = gThermalHal->linkToDeath( + gThermalHalDeathRecipient, 0x451F /* cookie */); + if (!linked.isOk()) { + ALOGE("Transaction error in linking to ThermalHAL death: %s", + linked.description().c_str()); + gThermalHal = nullptr; + } else if (!linked) { + ALOGW("Unable to link to ThermalHal death notifications"); + gThermalHal = nullptr; + } else { + ALOGD("Link to death notification successful"); + } } } +static void nativeInit(JNIEnv* env, jobject obj) { + std::lock_guard<std::mutex> lock(gThermalHalMutex); + getThermalHalLocked(); +} + static jfloatArray nativeGetFanSpeeds(JNIEnv *env, jclass /* clazz */) { - if (gThermalModule == nullptr) { + std::lock_guard<std::mutex> lock(gThermalHalMutex); + getThermalHalLocked(); + if (gThermalHal == nullptr) { ALOGE("Couldn't get fan speeds because of HAL error."); return env->NewFloatArray(0); } hidl_vec<CoolingDevice> list; - Return<void> ret = gThermalModule->getCoolingDevices( + Return<void> ret = gThermalHal->getCoolingDevices( [&list](ThermalStatus status, hidl_vec<CoolingDevice> devices) { if (status.code == ThermalStatusCode::SUCCESS) { list = std::move(devices); @@ -109,12 +151,14 @@ static jfloatArray nativeGetFanSpeeds(JNIEnv *env, jclass /* clazz */) { static jfloatArray nativeGetDeviceTemperatures(JNIEnv *env, jclass /* clazz */, int type, int source) { - if (gThermalModule == nullptr) { + std::lock_guard<std::mutex> lock(gThermalHalMutex); + getThermalHalLocked(); + if (gThermalHal == nullptr) { ALOGE("Couldn't get device temperatures because of HAL error."); return env->NewFloatArray(0); } hidl_vec<Temperature> list; - Return<void> ret = gThermalModule->getTemperatures( + Return<void> ret = gThermalHal->getTemperatures( [&list](ThermalStatus status, hidl_vec<Temperature> temperatures) { if (status.code == ThermalStatusCode::SUCCESS) { list = std::move(temperatures); @@ -154,12 +198,14 @@ static jfloatArray nativeGetDeviceTemperatures(JNIEnv *env, jclass /* clazz */, } static jobjectArray nativeGetCpuUsages(JNIEnv *env, jclass /* clazz */) { - if (gThermalModule == nullptr || !gCpuUsageInfoClassInfo.initMethod) { + std::lock_guard<std::mutex> lock(gThermalHalMutex); + getThermalHalLocked(); + if (gThermalHal == nullptr || !gCpuUsageInfoClassInfo.initMethod) { ALOGE("Couldn't get CPU usages because of HAL error."); return env->NewObjectArray(0, gCpuUsageInfoClassInfo.clazz, nullptr); } hidl_vec<CpuUsage> list; - Return<void> ret = gThermalModule->getCpuUsages( + Return<void> ret = gThermalHal->getCpuUsages( [&list](ThermalStatus status, hidl_vec<CpuUsage> cpuUsages) { if (status.code == ThermalStatusCode::SUCCESS) { list = std::move(cpuUsages); @@ -202,7 +248,6 @@ static const JNINativeMethod gHardwarePropertiesManagerServiceMethods[] = { }; int register_android_server_HardwarePropertiesManagerService(JNIEnv* env) { - gThermalModule = nullptr; int res = jniRegisterNativeMethods(env, "com/android/server/HardwarePropertiesManagerService", gHardwarePropertiesManagerServiceMethods, NELEM(gHardwarePropertiesManagerServiceMethods)); |