blob: 9b2ea7e71c06df9b302f1f017b90b66e3d403bd0 [file] [log] [blame]
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +05301/*
2 * Machine check exception handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce: " fmt
24
Michael Ellermanccd3cd32017-08-08 16:39:24 +100025#include <linux/hardirq.h>
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +053026#include <linux/types.h>
27#include <linux/ptrace.h>
28#include <linux/percpu.h>
29#include <linux/export.h>
Mahesh Salgaonkar30c82632014-01-14 15:45:09 +053030#include <linux/irq_work.h>
Michael Ellermanccd3cd32017-08-08 16:39:24 +100031
32#include <asm/machdep.h>
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +053033#include <asm/mce.h>
34
35static DEFINE_PER_CPU(int, mce_nest_count);
36static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +053038/* Queue for delayed MCE events. */
39static DEFINE_PER_CPU(int, mce_queue_count);
40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41
Mahesh Salgaonkar30c82632014-01-14 15:45:09 +053042static void machine_check_process_queued_event(struct irq_work *work);
Daniel Axtens635218c2016-01-06 11:45:50 +110043static struct irq_work mce_event_process_work = {
Mahesh Salgaonkar30c82632014-01-14 15:45:09 +053044 .func = machine_check_process_queued_event,
45};
46
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +053047static void mce_set_error_info(struct machine_check_event *mce,
48 struct mce_error_info *mce_err)
49{
50 mce->error_type = mce_err->error_type;
51 switch (mce_err->error_type) {
52 case MCE_ERROR_TYPE_UE:
53 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
54 break;
55 case MCE_ERROR_TYPE_SLB:
56 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
57 break;
58 case MCE_ERROR_TYPE_ERAT:
59 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
60 break;
61 case MCE_ERROR_TYPE_TLB:
62 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
63 break;
Nicholas Piggin7b9f71f92017-02-28 12:00:48 +100064 case MCE_ERROR_TYPE_USER:
65 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
66 break;
67 case MCE_ERROR_TYPE_RA:
68 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
69 break;
70 case MCE_ERROR_TYPE_LINK:
71 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
72 break;
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +053073 case MCE_ERROR_TYPE_UNKNOWN:
74 default:
75 break;
76 }
77}
78
79/*
80 * Decode and save high level MCE information into per cpu buffer which
81 * is an array of machine_check_event structure.
82 */
83void save_mce_event(struct pt_regs *regs, long handled,
84 struct mce_error_info *mce_err,
Mahesh Salgaonkar55672ec2013-12-16 10:46:24 +053085 uint64_t nip, uint64_t addr)
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +053086{
Daniel Axtensffb2d782015-05-12 13:23:59 +100087 int index = __this_cpu_inc_return(mce_nest_count) - 1;
Christoph Lameter69111ba2014-10-21 15:23:25 -050088 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +053089
90 /*
91 * Return if we don't have enough space to log mce event.
92 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
93 * the check below will stop buffer overrun.
94 */
95 if (index >= MAX_MC_EVT)
96 return;
97
98 /* Populate generic machine check info */
99 mce->version = MCE_V1;
Mahesh Salgaonkar55672ec2013-12-16 10:46:24 +0530100 mce->srr0 = nip;
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530101 mce->srr1 = regs->msr;
102 mce->gpr3 = regs->gpr[3];
103 mce->in_use = 1;
104
Mahesh Salgaonkarc74dd882016-08-09 10:39:13 +0530105 /* Mark it recovered if we have handled it and MSR(RI=1). */
106 if (handled && (regs->msr & MSR_RI))
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530107 mce->disposition = MCE_DISPOSITION_RECOVERED;
108 else
109 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
Nicholas Pigginc1bbf382017-02-28 12:00:47 +1000110
111 mce->initiator = mce_err->initiator;
112 mce->severity = mce_err->severity;
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530113
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530114 /*
115 * Populate the mce error_type and type-specific error_type.
116 */
117 mce_set_error_info(mce, mce_err);
118
119 if (!addr)
120 return;
121
122 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
123 mce->u.tlb_error.effective_address_provided = true;
124 mce->u.tlb_error.effective_address = addr;
125 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
126 mce->u.slb_error.effective_address_provided = true;
127 mce->u.slb_error.effective_address = addr;
128 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
129 mce->u.erat_error.effective_address_provided = true;
130 mce->u.erat_error.effective_address = addr;
Nicholas Piggin7b9f71f92017-02-28 12:00:48 +1000131 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
132 mce->u.user_error.effective_address_provided = true;
133 mce->u.user_error.effective_address = addr;
134 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
135 mce->u.ra_error.effective_address_provided = true;
136 mce->u.ra_error.effective_address = addr;
137 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
138 mce->u.link_error.effective_address_provided = true;
139 mce->u.link_error.effective_address = addr;
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530140 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
141 mce->u.ue_error.effective_address_provided = true;
142 mce->u.ue_error.effective_address = addr;
143 }
144 return;
145}
146
147/*
148 * get_mce_event:
149 * mce Pointer to machine_check_event structure to be filled.
150 * release Flag to indicate whether to free the event slot or not.
151 * 0 <= do not release the mce event. Caller will invoke
152 * release_mce_event() once event has been consumed.
153 * 1 <= release the slot.
154 *
155 * return 1 = success
156 * 0 = failure
157 *
158 * get_mce_event() will be called by platform specific machine check
159 * handle routine and in KVM.
160 * When we call get_mce_event(), we are still in interrupt context and
161 * preemption will not be scheduled until ret_from_expect() routine
162 * is called.
163 */
164int get_mce_event(struct machine_check_event *mce, bool release)
165{
Christoph Lameter69111ba2014-10-21 15:23:25 -0500166 int index = __this_cpu_read(mce_nest_count) - 1;
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530167 struct machine_check_event *mc_evt;
168 int ret = 0;
169
170 /* Sanity check */
171 if (index < 0)
172 return ret;
173
174 /* Check if we have MCE info to process. */
175 if (index < MAX_MC_EVT) {
Christoph Lameter69111ba2014-10-21 15:23:25 -0500176 mc_evt = this_cpu_ptr(&mce_event[index]);
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530177 /* Copy the event structure and release the original */
178 if (mce)
179 *mce = *mc_evt;
180 if (release)
181 mc_evt->in_use = 0;
182 ret = 1;
183 }
184 /* Decrement the count to free the slot. */
185 if (release)
Christoph Lameter69111ba2014-10-21 15:23:25 -0500186 __this_cpu_dec(mce_nest_count);
Mahesh Salgaonkar36df96f2013-10-30 20:05:40 +0530187
188 return ret;
189}
190
191void release_mce_event(void)
192{
193 get_mce_event(NULL, true);
194}
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530195
196/*
197 * Queue up the MCE event which then can be handled later.
198 */
199void machine_check_queue_event(void)
200{
201 int index;
202 struct machine_check_event evt;
203
204 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
205 return;
206
Daniel Axtensffb2d782015-05-12 13:23:59 +1000207 index = __this_cpu_inc_return(mce_queue_count) - 1;
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530208 /* If queue is full, just return for now. */
209 if (index >= MAX_MC_EVT) {
Christoph Lameter69111ba2014-10-21 15:23:25 -0500210 __this_cpu_dec(mce_queue_count);
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530211 return;
212 }
Christoph Lameter69111ba2014-10-21 15:23:25 -0500213 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
Mahesh Salgaonkar30c82632014-01-14 15:45:09 +0530214
215 /* Queue irq work to process this event later. */
216 irq_work_queue(&mce_event_process_work);
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530217}
218
219/*
220 * process pending MCE event from the mce event queue. This function will be
221 * called during syscall exit.
222 */
Mahesh Salgaonkar30c82632014-01-14 15:45:09 +0530223static void machine_check_process_queued_event(struct irq_work *work)
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530224{
225 int index;
226
Mahesh Salgaonkard93b0ac2017-04-18 22:08:17 +0530227 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
228
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530229 /*
230 * For now just print it to console.
231 * TODO: log this error event to FSP or nvram.
232 */
Christoph Lameter69111ba2014-10-21 15:23:25 -0500233 while (__this_cpu_read(mce_queue_count) > 0) {
234 index = __this_cpu_read(mce_queue_count) - 1;
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530235 machine_check_print_event_info(
Michael Ellerman63f44d62017-04-03 15:29:34 +1000236 this_cpu_ptr(&mce_event_queue[index]), false);
Christoph Lameter69111ba2014-10-21 15:23:25 -0500237 __this_cpu_dec(mce_queue_count);
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530238 }
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530239}
240
Michael Ellerman63f44d62017-04-03 15:29:34 +1000241void machine_check_print_event_info(struct machine_check_event *evt,
242 bool user_mode)
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530243{
244 const char *level, *sevstr, *subtype;
245 static const char *mc_ue_types[] = {
246 "Indeterminate",
247 "Instruction fetch",
248 "Page table walk ifetch",
249 "Load/Store",
250 "Page table walk Load/Store",
251 };
252 static const char *mc_slb_types[] = {
253 "Indeterminate",
254 "Parity",
255 "Multihit",
256 };
257 static const char *mc_erat_types[] = {
258 "Indeterminate",
259 "Parity",
260 "Multihit",
261 };
262 static const char *mc_tlb_types[] = {
263 "Indeterminate",
264 "Parity",
265 "Multihit",
266 };
Nicholas Piggin7b9f71f92017-02-28 12:00:48 +1000267 static const char *mc_user_types[] = {
268 "Indeterminate",
269 "tlbie(l) invalid",
270 };
271 static const char *mc_ra_types[] = {
272 "Indeterminate",
273 "Instruction fetch (bad)",
Nicholas Piggin90df4bf2017-05-29 16:26:44 +1000274 "Instruction fetch (foreign)",
Nicholas Piggin7b9f71f92017-02-28 12:00:48 +1000275 "Page table walk ifetch (bad)",
276 "Page table walk ifetch (foreign)",
277 "Load (bad)",
278 "Store (bad)",
279 "Page table walk Load/Store (bad)",
280 "Page table walk Load/Store (foreign)",
281 "Load/Store (foreign)",
282 };
283 static const char *mc_link_types[] = {
284 "Indeterminate",
285 "Instruction fetch (timeout)",
286 "Page table walk ifetch (timeout)",
287 "Load (timeout)",
288 "Store (timeout)",
289 "Page table walk Load/Store (timeout)",
290 };
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530291
292 /* Print things out */
293 if (evt->version != MCE_V1) {
294 pr_err("Machine Check Exception, Unknown event version %d !\n",
295 evt->version);
296 return;
297 }
298 switch (evt->severity) {
299 case MCE_SEV_NO_ERROR:
300 level = KERN_INFO;
301 sevstr = "Harmless";
302 break;
303 case MCE_SEV_WARNING:
304 level = KERN_WARNING;
305 sevstr = "";
306 break;
307 case MCE_SEV_ERROR_SYNC:
308 level = KERN_ERR;
309 sevstr = "Severe";
310 break;
311 case MCE_SEV_FATAL:
312 default:
313 level = KERN_ERR;
314 sevstr = "Fatal";
315 break;
316 }
317
318 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
319 evt->disposition == MCE_DISPOSITION_RECOVERED ?
Nicholas Pigginfc844272017-03-14 22:36:43 +1000320 "Recovered" : "Not recovered");
Michael Ellerman63f44d62017-04-03 15:29:34 +1000321
322 if (user_mode) {
323 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
324 evt->srr0, current->pid, current->comm);
325 } else {
326 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
327 (void *)evt->srr0);
328 }
329
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530330 printk("%s Initiator: %s\n", level,
331 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
332 switch (evt->error_type) {
333 case MCE_ERROR_TYPE_UE:
334 subtype = evt->u.ue_error.ue_error_type <
335 ARRAY_SIZE(mc_ue_types) ?
336 mc_ue_types[evt->u.ue_error.ue_error_type]
337 : "Unknown";
338 printk("%s Error type: UE [%s]\n", level, subtype);
339 if (evt->u.ue_error.effective_address_provided)
340 printk("%s Effective address: %016llx\n",
341 level, evt->u.ue_error.effective_address);
342 if (evt->u.ue_error.physical_address_provided)
Masanari Iidac01e0152016-04-20 00:27:33 +0900343 printk("%s Physical address: %016llx\n",
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530344 level, evt->u.ue_error.physical_address);
345 break;
346 case MCE_ERROR_TYPE_SLB:
347 subtype = evt->u.slb_error.slb_error_type <
348 ARRAY_SIZE(mc_slb_types) ?
349 mc_slb_types[evt->u.slb_error.slb_error_type]
350 : "Unknown";
351 printk("%s Error type: SLB [%s]\n", level, subtype);
352 if (evt->u.slb_error.effective_address_provided)
353 printk("%s Effective address: %016llx\n",
354 level, evt->u.slb_error.effective_address);
355 break;
356 case MCE_ERROR_TYPE_ERAT:
357 subtype = evt->u.erat_error.erat_error_type <
358 ARRAY_SIZE(mc_erat_types) ?
359 mc_erat_types[evt->u.erat_error.erat_error_type]
360 : "Unknown";
361 printk("%s Error type: ERAT [%s]\n", level, subtype);
362 if (evt->u.erat_error.effective_address_provided)
363 printk("%s Effective address: %016llx\n",
364 level, evt->u.erat_error.effective_address);
365 break;
366 case MCE_ERROR_TYPE_TLB:
367 subtype = evt->u.tlb_error.tlb_error_type <
368 ARRAY_SIZE(mc_tlb_types) ?
369 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
370 : "Unknown";
371 printk("%s Error type: TLB [%s]\n", level, subtype);
372 if (evt->u.tlb_error.effective_address_provided)
373 printk("%s Effective address: %016llx\n",
374 level, evt->u.tlb_error.effective_address);
375 break;
Nicholas Piggin7b9f71f92017-02-28 12:00:48 +1000376 case MCE_ERROR_TYPE_USER:
377 subtype = evt->u.user_error.user_error_type <
378 ARRAY_SIZE(mc_user_types) ?
379 mc_user_types[evt->u.user_error.user_error_type]
380 : "Unknown";
381 printk("%s Error type: User [%s]\n", level, subtype);
382 if (evt->u.user_error.effective_address_provided)
383 printk("%s Effective address: %016llx\n",
384 level, evt->u.user_error.effective_address);
385 break;
386 case MCE_ERROR_TYPE_RA:
387 subtype = evt->u.ra_error.ra_error_type <
388 ARRAY_SIZE(mc_ra_types) ?
389 mc_ra_types[evt->u.ra_error.ra_error_type]
390 : "Unknown";
391 printk("%s Error type: Real address [%s]\n", level, subtype);
392 if (evt->u.ra_error.effective_address_provided)
393 printk("%s Effective address: %016llx\n",
394 level, evt->u.ra_error.effective_address);
395 break;
396 case MCE_ERROR_TYPE_LINK:
397 subtype = evt->u.link_error.link_error_type <
398 ARRAY_SIZE(mc_link_types) ?
399 mc_link_types[evt->u.link_error.link_error_type]
400 : "Unknown";
401 printk("%s Error type: Link [%s]\n", level, subtype);
402 if (evt->u.link_error.effective_address_provided)
403 printk("%s Effective address: %016llx\n",
404 level, evt->u.link_error.effective_address);
405 break;
Mahesh Salgaonkarb5ff4212013-10-30 20:05:49 +0530406 default:
407 case MCE_ERROR_TYPE_UNKNOWN:
408 printk("%s Error type: Unknown\n", level);
409 break;
410 }
411}
Mahesh Salgaonkar8aa586c2017-05-11 16:33:12 +0530412EXPORT_SYMBOL_GPL(machine_check_print_event_info);
Mahesh Salgaonkarb63a0ff2013-10-30 20:06:13 +0530413
414uint64_t get_mce_fault_addr(struct machine_check_event *evt)
415{
416 switch (evt->error_type) {
417 case MCE_ERROR_TYPE_UE:
418 if (evt->u.ue_error.effective_address_provided)
419 return evt->u.ue_error.effective_address;
420 break;
421 case MCE_ERROR_TYPE_SLB:
422 if (evt->u.slb_error.effective_address_provided)
423 return evt->u.slb_error.effective_address;
424 break;
425 case MCE_ERROR_TYPE_ERAT:
426 if (evt->u.erat_error.effective_address_provided)
427 return evt->u.erat_error.effective_address;
428 break;
429 case MCE_ERROR_TYPE_TLB:
430 if (evt->u.tlb_error.effective_address_provided)
431 return evt->u.tlb_error.effective_address;
432 break;
Nicholas Piggin7b9f71f92017-02-28 12:00:48 +1000433 case MCE_ERROR_TYPE_USER:
434 if (evt->u.user_error.effective_address_provided)
435 return evt->u.user_error.effective_address;
436 break;
437 case MCE_ERROR_TYPE_RA:
438 if (evt->u.ra_error.effective_address_provided)
439 return evt->u.ra_error.effective_address;
440 break;
441 case MCE_ERROR_TYPE_LINK:
442 if (evt->u.link_error.effective_address_provided)
443 return evt->u.link_error.effective_address;
444 break;
Mahesh Salgaonkarb63a0ff2013-10-30 20:06:13 +0530445 default:
446 case MCE_ERROR_TYPE_UNKNOWN:
447 break;
448 }
449 return 0;
450}
451EXPORT_SYMBOL(get_mce_fault_addr);
Michael Ellermanccd3cd32017-08-08 16:39:24 +1000452
453/*
454 * This function is called in real mode. Strictly no printk's please.
455 *
456 * regs->nip and regs->msr contains srr0 and ssr1.
457 */
458long machine_check_early(struct pt_regs *regs)
459{
460 long handled = 0;
461
462 __this_cpu_inc(irq_stat.mce_exceptions);
463
464 if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
465 handled = cur_cpu_spec->machine_check_early(regs);
466 return handled;
467}
468
469long hmi_exception_realmode(struct pt_regs *regs)
470{
471 __this_cpu_inc(irq_stat.hmi_exceptions);
472
473 wait_for_subcore_guest_exit();
474
475 if (ppc_md.hmi_exception_early)
476 ppc_md.hmi_exception_early(regs);
477
478 wait_for_tb_resync();
479
480 return 0;
481}