summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Karthik Ravi Shankar <karthikrs@google.com> 2017-10-13 15:42:32 -0700
committer Karthik Ravi Shankar <karthikrs@google.com> 2017-10-17 00:14:32 -0700
commit4d1ad408736f28229c3f625de6873e3862adf6bf (patch)
tree00030b8ed16556a45978b4ccdc28c2b1e1b2fe42
parent0d2d6b6a863cfd64a6f1d3c1a33eed198d7612a2 (diff)
Add crash recovery logic to HardwarePropertiesManagerService
The HardwarePropertiesManagerService-JNI communicates with the thermal HAL to expose temperature and other thermal data to Java services. When Thermal HAL died, there was no recovery mechanism in place. This change adds that recovery mechanism. Bug: 67769672 Test: 1) Rebooted the device multiple times and checked that VrCore is able to get the thermal information correctly. 2) Enter and exit VR mode multiple times killing thermalHal by design and ensure that we're getting thermal temperatures correctly. 10-16 10:05:19.605 1099 1600 E HardwarePropertiesManagerService-JNI: ThermalHAL just died ... 10-16 10:05:29.761 3459 3459 D ThermalWarningManager: THERMAL: ThermalInfo: [current temp=33.0, VR throttling temp (soft exit)=52.0, VR hard exit temp=54.0, VR warning temp=51.0, last throttling warning wall time=n/a, last soft 'exit VR' flow wall time=n/a, last hard 'exit VR' flow wall time=n/a] Change-Id: I55d8eae31526e1bd1a232afea5bd02cb0afca142 Signed-off-by: Karthik Ravi Shankar <karthikrs@google.com>
-rw-r--r--services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp71
1 files changed, 58 insertions, 13 deletions
diff --git a/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp b/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp
index 5f67ac1dfe8f..ed79352bba21 100644
--- a/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp
+++ b/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp
@@ -30,6 +30,8 @@
namespace android {
+using android::hidl::base::V1_0::IBase;
+using hardware::hidl_death_recipient;
using hardware::hidl_vec;
using hardware::thermal::V1_0::CoolingDevice;
using hardware::thermal::V1_0::CpuUsage;
@@ -58,7 +60,22 @@ static struct {
jfloat gUndefinedTemperature;
-static sp<IThermal> gThermalModule;
+static void getThermalHalLocked();
+static std::mutex gThermalHalMutex;
+static sp<IThermal> gThermalHal = nullptr;
+
+// struct ThermalHalDeathRecipient;
+struct ThermalHalDeathRecipient : virtual public hidl_death_recipient {
+ // hidl_death_recipient interface
+ virtual void serviceDied(uint64_t cookie, const wp<IBase>& who) override {
+ std::lock_guard<std::mutex> lock(gThermalHalMutex);
+ ALOGE("ThermalHAL just died");
+ gThermalHal = nullptr;
+ getThermalHalLocked();
+ }
+};
+
+sp<ThermalHalDeathRecipient> gThermalHalDeathRecipient = nullptr;
// ----------------------------------------------------------------------------
@@ -66,25 +83,50 @@ float finalizeTemperature(float temperature) {
return isnan(temperature) ? gUndefinedTemperature : temperature;
}
-static void nativeInit(JNIEnv* env, jobject obj) {
- // TODO(b/31632518)
- if (gThermalModule == nullptr) {
- gThermalModule = IThermal::getService();
+// The caller must be holding gThermalHalMutex.
+static void getThermalHalLocked() {
+ if (gThermalHal != nullptr) {
+ return;
}
- if (gThermalModule == nullptr) {
+ gThermalHal = IThermal::getService();
+
+ if (gThermalHal == nullptr) {
ALOGE("Unable to get Thermal service.");
+ } else {
+ if (gThermalHalDeathRecipient == nullptr) {
+ gThermalHalDeathRecipient = new ThermalHalDeathRecipient();
+ }
+ hardware::Return<bool> linked = gThermalHal->linkToDeath(
+ gThermalHalDeathRecipient, 0x451F /* cookie */);
+ if (!linked.isOk()) {
+ ALOGE("Transaction error in linking to ThermalHAL death: %s",
+ linked.description().c_str());
+ gThermalHal = nullptr;
+ } else if (!linked) {
+ ALOGW("Unable to link to ThermalHal death notifications");
+ gThermalHal = nullptr;
+ } else {
+ ALOGD("Link to death notification successful");
+ }
}
}
+static void nativeInit(JNIEnv* env, jobject obj) {
+ std::lock_guard<std::mutex> lock(gThermalHalMutex);
+ getThermalHalLocked();
+}
+
static jfloatArray nativeGetFanSpeeds(JNIEnv *env, jclass /* clazz */) {
- if (gThermalModule == nullptr) {
+ std::lock_guard<std::mutex> lock(gThermalHalMutex);
+ getThermalHalLocked();
+ if (gThermalHal == nullptr) {
ALOGE("Couldn't get fan speeds because of HAL error.");
return env->NewFloatArray(0);
}
hidl_vec<CoolingDevice> list;
- Return<void> ret = gThermalModule->getCoolingDevices(
+ Return<void> ret = gThermalHal->getCoolingDevices(
[&list](ThermalStatus status, hidl_vec<CoolingDevice> devices) {
if (status.code == ThermalStatusCode::SUCCESS) {
list = std::move(devices);
@@ -109,12 +151,14 @@ static jfloatArray nativeGetFanSpeeds(JNIEnv *env, jclass /* clazz */) {
static jfloatArray nativeGetDeviceTemperatures(JNIEnv *env, jclass /* clazz */, int type,
int source) {
- if (gThermalModule == nullptr) {
+ std::lock_guard<std::mutex> lock(gThermalHalMutex);
+ getThermalHalLocked();
+ if (gThermalHal == nullptr) {
ALOGE("Couldn't get device temperatures because of HAL error.");
return env->NewFloatArray(0);
}
hidl_vec<Temperature> list;
- Return<void> ret = gThermalModule->getTemperatures(
+ Return<void> ret = gThermalHal->getTemperatures(
[&list](ThermalStatus status, hidl_vec<Temperature> temperatures) {
if (status.code == ThermalStatusCode::SUCCESS) {
list = std::move(temperatures);
@@ -154,12 +198,14 @@ static jfloatArray nativeGetDeviceTemperatures(JNIEnv *env, jclass /* clazz */,
}
static jobjectArray nativeGetCpuUsages(JNIEnv *env, jclass /* clazz */) {
- if (gThermalModule == nullptr || !gCpuUsageInfoClassInfo.initMethod) {
+ std::lock_guard<std::mutex> lock(gThermalHalMutex);
+ getThermalHalLocked();
+ if (gThermalHal == nullptr || !gCpuUsageInfoClassInfo.initMethod) {
ALOGE("Couldn't get CPU usages because of HAL error.");
return env->NewObjectArray(0, gCpuUsageInfoClassInfo.clazz, nullptr);
}
hidl_vec<CpuUsage> list;
- Return<void> ret = gThermalModule->getCpuUsages(
+ Return<void> ret = gThermalHal->getCpuUsages(
[&list](ThermalStatus status, hidl_vec<CpuUsage> cpuUsages) {
if (status.code == ThermalStatusCode::SUCCESS) {
list = std::move(cpuUsages);
@@ -202,7 +248,6 @@ static const JNINativeMethod gHardwarePropertiesManagerServiceMethods[] = {
};
int register_android_server_HardwarePropertiesManagerService(JNIEnv* env) {
- gThermalModule = nullptr;
int res = jniRegisterNativeMethods(env, "com/android/server/HardwarePropertiesManagerService",
gHardwarePropertiesManagerServiceMethods,
NELEM(gHardwarePropertiesManagerServiceMethods));