blob: 8be50ffe3a1390a7c68234db0090a8e4cee2c5a6 [file] [log] [blame]
/****************************************************************************
*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All rights reserved
*
****************************************************************************/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/version.h>
#include <linux/kmod.h>
#include <linux/notifier.h>
#include "scsc_mx_impl.h"
#include "miframman.h"
#include "mifmboxman.h"
#include "mxman.h"
#include "srvman.h"
#include "mxmgmt_transport.h"
#include "mxlog.h"
#include "mxlogger.h"
#include "fw_panic_record.h"
#include "panicmon.h"
#include "mxproc.h"
#include "mxsyserr.h"
#include "scsc/scsc_log_collector.h"
#include <scsc/scsc_release.h>
#include <scsc/scsc_mx.h>
#include <scsc/scsc_logring.h>
/* If limits below are exceeded, a service level reset will be raised to level 7 */
#define SYSERR_RESET_HISTORY_SIZE (4)
/* Minimum time between system error service resets (ms) */
#define SYSERR_RESET_MIN_INTERVAL (300000)
/* No more then SYSERR_RESET_HISTORY_SIZE system error service resets in this period (ms)*/
#define SYSERR_RESET_MONITOR_PERIOD (3600000)
/* Time stamps of last service resets in jiffies */
static unsigned long syserr_reset_history[SYSERR_RESET_HISTORY_SIZE] = {0};
static int syserr_reset_history_index;
static uint syserr_reset_min_interval = SYSERR_RESET_MIN_INTERVAL;
module_param(syserr_reset_min_interval, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(syserr_reset_min_interval, "Minimum time between system error service resets (ms)");
static uint syserr_reset_monitor_period = SYSERR_RESET_MONITOR_PERIOD;
module_param(syserr_reset_monitor_period, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(syserr_reset_monitor_period, "No more then 4 system error service resets in this period (ms)");
void mx_syserr_init(void)
{
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_INIT: syserr_reset_min_interval %lu syserr_reset_monitor_period %lu\n",
syserr_reset_min_interval, syserr_reset_monitor_period);
}
void mx_syserr_handler(struct mxman *mxman, const void *message)
{
const struct mx_syserr_msg *msg = (const struct mx_syserr_msg *)message;
struct srvman *srvman;
struct mx_syserr_decode decode;
srvman = scsc_mx_get_srvman(mxman->mx);
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND len: %u, ts: 0x%08X, tf: 0x%08X, str: 0x%x, code: 0x%08x, p0: 0x%x, p1: 0x%x\n",
msg->syserr.length,
msg->syserr.slow_clock,
msg->syserr.fast_clock,
msg->syserr.string_index,
msg->syserr.syserr_code,
msg->syserr.param[0],
msg->syserr.param[1]);
decode.subsys = (u8) ((msg->syserr.syserr_code >> SYSERR_SUB_SYSTEM_POSN) & SYSERR_SUB_SYSTEM_MASK);
decode.level = (u8) ((msg->syserr.syserr_code >> SYSERR_LEVEL_POSN) & SYSERR_LEVEL_MASK);
decode.type = (u8) ((msg->syserr.syserr_code >> SYSERR_TYPE_POSN) & SYSERR_TYPE_MASK);
decode.subcode = (u16) ((msg->syserr.syserr_code >> SYSERR_SUB_CODE_POSN) & SYSERR_SUB_CODE_MASK);
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND Subsys %d, Level %d, Type %d, Subcode 0x%04x\n",
decode.subsys, decode.level, decode.type, decode.subcode);
/* Level 1 just gets logged without bothering anyone else */
if (decode.level == MX_SYSERR_LEVEL_1) {
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x log only\n",
msg->syserr.syserr_code);
return;
}
/* Ignore if panic reset in progress */
if (srvman_in_error_safe(srvman) || (mxman->mxman_state == MXMAN_STATE_FAILED)) {
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x ignored (reset in progess)\n",
msg->syserr.syserr_code);
return;
}
/* Ignore any system errors for the same sub-system if recovery is in progress */
if ((mxman->syserr_recovery_in_progress) && (mxman->last_syserr.subsys == decode.subsys)) {
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x ignored (recovery in progess)\n",
msg->syserr.syserr_code);
return;
}
/* Let affected sevices escalate if needed - this also checks if only one sub-system is running
* and handles race conditions with shutting down service
*/
decode.level = srvman_notify_sub_system(srvman, &decode);
if (decode.level >= MX_SYSERR_LEVEL_5) {
unsigned long now = jiffies;
int i;
/* We use 0 as a NULL timestamp so avoid this */
now = (now) ? now : 1;
if ((decode.level >= MX_SYSERR_LEVEL_7) || (mxman->syserr_recovery_in_progress)) {
/* If full reset has been requested or a service restart is needed and one is
* already in progress, trigger a full reset
*/
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset\n",
msg->syserr.syserr_code);
mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__);
return;
}
/* last_syserr_recovery_time is always zero-ed before we restart the chip */
if (mxman->last_syserr_recovery_time) {
/* Have we had a too recent system error service reset
* Chance of false positive here is low enough to be acceptable
*/
if ((syserr_reset_min_interval) && (time_in_range(now, mxman->last_syserr_recovery_time,
mxman->last_syserr_recovery_time + msecs_to_jiffies(syserr_reset_min_interval)))) {
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset (less than %dms after last)\n",
msg->syserr.syserr_code, syserr_reset_min_interval);
mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__);
return;
} else if (syserr_reset_monitor_period) {
/* Have we had too many system error service resets in one period? */
/* This will be the case if all our stored history was in this period */
bool out_of_danger_period_found = false;
for (i = 0; (i < SYSERR_RESET_HISTORY_SIZE) && (!out_of_danger_period_found); i++)
out_of_danger_period_found = ((!syserr_reset_history[i]) ||
(!time_in_range(now, syserr_reset_history[i],
syserr_reset_history[i] + msecs_to_jiffies(syserr_reset_monitor_period))));
if (!out_of_danger_period_found) {
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset (too many within %dms)\n",
msg->syserr.syserr_code, syserr_reset_monitor_period);
mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__);
return;
}
}
} else
/* First syserr service reset since chip was (re)started - zap history */
for (i = 0; i < SYSERR_RESET_HISTORY_SIZE; i++)
syserr_reset_history[i] = 0;
/* Otherwise trigger recovery of the affected subservices */
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered service recovery\n",
msg->syserr.syserr_code);
syserr_reset_history[syserr_reset_history_index++ % SYSERR_RESET_HISTORY_SIZE] = now;
mxman->last_syserr_recovery_time = now;
mxman_syserr(mxman, &decode);
}
#ifdef CONFIG_SCSC_WLBTD
#if IS_ENABLED(CONFIG_SCSC_LOG_COLLECTION)
/* Trigger sable log collection */
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x requested log collection\n", msg->syserr.syserr_code);
scsc_log_collector_schedule_collection(SCSC_LOG_SYS_ERR, decode.subcode);
#endif
#endif
}