James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 1 | /*: |
| 2 | * Hibernate support specific for ARM64 |
| 3 | * |
| 4 | * Derived from work on ARM hibernation support by: |
| 5 | * |
| 6 | * Ubuntu project, hibernation support for mach-dove |
| 7 | * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) |
| 8 | * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) |
| 9 | * https://lkml.org/lkml/2010/6/18/4 |
| 10 | * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html |
| 11 | * https://patchwork.kernel.org/patch/96442/ |
| 12 | * |
| 13 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> |
| 14 | * |
| 15 | * License terms: GNU General Public License (GPL) version 2 |
| 16 | */ |
| 17 | #define pr_fmt(x) "hibernate: " x |
| 18 | #include <linux/kvm_host.h> |
| 19 | #include <linux/mm.h> |
James Morse | 1fe492c | 2016-04-27 17:47:13 +0100 | [diff] [blame] | 20 | #include <linux/notifier.h> |
James Morse | 82869ac | 2016-04-27 17:47:12 +0100 | [diff] [blame] | 21 | #include <linux/pm.h> |
| 22 | #include <linux/sched.h> |
| 23 | #include <linux/suspend.h> |
| 24 | #include <linux/utsname.h> |
| 25 | #include <linux/version.h> |
| 26 | |
| 27 | #include <asm/barrier.h> |
| 28 | #include <asm/cacheflush.h> |
| 29 | #include <asm/irqflags.h> |
| 30 | #include <asm/memory.h> |
| 31 | #include <asm/mmu_context.h> |
| 32 | #include <asm/pgalloc.h> |
| 33 | #include <asm/pgtable.h> |
| 34 | #include <asm/pgtable-hwdef.h> |
| 35 | #include <asm/sections.h> |
| 36 | #include <asm/suspend.h> |
| 37 | #include <asm/virt.h> |
| 38 | |
| 39 | /* |
| 40 | * Hibernate core relies on this value being 0 on resume, and marks it |
| 41 | * __nosavedata assuming it will keep the resume kernel's '0' value. This |
| 42 | * doesn't happen with either KASLR. |
| 43 | * |
| 44 | * defined as "__visible int in_suspend __nosavedata" in |
| 45 | * kernel/power/hibernate.c |
| 46 | */ |
| 47 | extern int in_suspend; |
| 48 | |
| 49 | /* Find a symbols alias in the linear map */ |
| 50 | #define LMADDR(x) phys_to_virt(virt_to_phys(x)) |
| 51 | |
| 52 | /* Do we need to reset el2? */ |
| 53 | #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) |
| 54 | |
| 55 | /* |
| 56 | * Start/end of the hibernate exit code, this must be copied to a 'safe' |
| 57 | * location in memory, and executed from there. |
| 58 | */ |
| 59 | extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; |
| 60 | |
| 61 | /* temporary el2 vectors in the __hibernate_exit_text section. */ |
| 62 | extern char hibernate_el2_vectors[]; |
| 63 | |
| 64 | /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ |
| 65 | extern char __hyp_stub_vectors[]; |
| 66 | |
| 67 | /* |
| 68 | * Values that may not change over hibernate/resume. We put the build number |
| 69 | * and date in here so that we guarantee not to resume with a different |
| 70 | * kernel. |
| 71 | */ |
| 72 | struct arch_hibernate_hdr_invariants { |
| 73 | char uts_version[__NEW_UTS_LEN + 1]; |
| 74 | }; |
| 75 | |
| 76 | /* These values need to be know across a hibernate/restore. */ |
| 77 | static struct arch_hibernate_hdr { |
| 78 | struct arch_hibernate_hdr_invariants invariants; |
| 79 | |
| 80 | /* These are needed to find the relocated kernel if built with kaslr */ |
| 81 | phys_addr_t ttbr1_el1; |
| 82 | void (*reenter_kernel)(void); |
| 83 | |
| 84 | /* |
| 85 | * We need to know where the __hyp_stub_vectors are after restore to |
| 86 | * re-configure el2. |
| 87 | */ |
| 88 | phys_addr_t __hyp_stub_vectors; |
| 89 | } resume_hdr; |
| 90 | |
| 91 | static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) |
| 92 | { |
| 93 | memset(i, 0, sizeof(*i)); |
| 94 | memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); |
| 95 | } |
| 96 | |
| 97 | int pfn_is_nosave(unsigned long pfn) |
| 98 | { |
| 99 | unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); |
| 100 | unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); |
| 101 | |
| 102 | return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); |
| 103 | } |
| 104 | |
| 105 | void notrace save_processor_state(void) |
| 106 | { |
| 107 | WARN_ON(num_online_cpus() != 1); |
| 108 | } |
| 109 | |
| 110 | void notrace restore_processor_state(void) |
| 111 | { |
| 112 | } |
| 113 | |
| 114 | int arch_hibernation_header_save(void *addr, unsigned int max_size) |
| 115 | { |
| 116 | struct arch_hibernate_hdr *hdr = addr; |
| 117 | |
| 118 | if (max_size < sizeof(*hdr)) |
| 119 | return -EOVERFLOW; |
| 120 | |
| 121 | arch_hdr_invariants(&hdr->invariants); |
| 122 | hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); |
| 123 | hdr->reenter_kernel = _cpu_resume; |
| 124 | |
| 125 | /* We can't use __hyp_get_vectors() because kvm may still be loaded */ |
| 126 | if (el2_reset_needed()) |
| 127 | hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); |
| 128 | else |
| 129 | hdr->__hyp_stub_vectors = 0; |
| 130 | |
| 131 | return 0; |
| 132 | } |
| 133 | EXPORT_SYMBOL(arch_hibernation_header_save); |
| 134 | |
| 135 | int arch_hibernation_header_restore(void *addr) |
| 136 | { |
| 137 | struct arch_hibernate_hdr_invariants invariants; |
| 138 | struct arch_hibernate_hdr *hdr = addr; |
| 139 | |
| 140 | arch_hdr_invariants(&invariants); |
| 141 | if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { |
| 142 | pr_crit("Hibernate image not generated by this kernel!\n"); |
| 143 | return -EINVAL; |
| 144 | } |
| 145 | |
| 146 | resume_hdr = *hdr; |
| 147 | |
| 148 | return 0; |
| 149 | } |
| 150 | EXPORT_SYMBOL(arch_hibernation_header_restore); |
| 151 | |
| 152 | /* |
| 153 | * Copies length bytes, starting at src_start into an new page, |
| 154 | * perform cache maintentance, then maps it at the specified address low |
| 155 | * address as executable. |
| 156 | * |
| 157 | * This is used by hibernate to copy the code it needs to execute when |
| 158 | * overwriting the kernel text. This function generates a new set of page |
| 159 | * tables, which it loads into ttbr0. |
| 160 | * |
| 161 | * Length is provided as we probably only want 4K of data, even on a 64K |
| 162 | * page system. |
| 163 | */ |
| 164 | static int create_safe_exec_page(void *src_start, size_t length, |
| 165 | unsigned long dst_addr, |
| 166 | phys_addr_t *phys_dst_addr, |
| 167 | void *(*allocator)(gfp_t mask), |
| 168 | gfp_t mask) |
| 169 | { |
| 170 | int rc = 0; |
| 171 | pgd_t *pgd; |
| 172 | pud_t *pud; |
| 173 | pmd_t *pmd; |
| 174 | pte_t *pte; |
| 175 | unsigned long dst = (unsigned long)allocator(mask); |
| 176 | |
| 177 | if (!dst) { |
| 178 | rc = -ENOMEM; |
| 179 | goto out; |
| 180 | } |
| 181 | |
| 182 | memcpy((void *)dst, src_start, length); |
| 183 | flush_icache_range(dst, dst + length); |
| 184 | |
| 185 | pgd = pgd_offset_raw(allocator(mask), dst_addr); |
| 186 | if (pgd_none(*pgd)) { |
| 187 | pud = allocator(mask); |
| 188 | if (!pud) { |
| 189 | rc = -ENOMEM; |
| 190 | goto out; |
| 191 | } |
| 192 | pgd_populate(&init_mm, pgd, pud); |
| 193 | } |
| 194 | |
| 195 | pud = pud_offset(pgd, dst_addr); |
| 196 | if (pud_none(*pud)) { |
| 197 | pmd = allocator(mask); |
| 198 | if (!pmd) { |
| 199 | rc = -ENOMEM; |
| 200 | goto out; |
| 201 | } |
| 202 | pud_populate(&init_mm, pud, pmd); |
| 203 | } |
| 204 | |
| 205 | pmd = pmd_offset(pud, dst_addr); |
| 206 | if (pmd_none(*pmd)) { |
| 207 | pte = allocator(mask); |
| 208 | if (!pte) { |
| 209 | rc = -ENOMEM; |
| 210 | goto out; |
| 211 | } |
| 212 | pmd_populate_kernel(&init_mm, pmd, pte); |
| 213 | } |
| 214 | |
| 215 | pte = pte_offset_kernel(pmd, dst_addr); |
| 216 | set_pte(pte, __pte(virt_to_phys((void *)dst) | |
| 217 | pgprot_val(PAGE_KERNEL_EXEC))); |
| 218 | |
| 219 | /* Load our new page tables */ |
| 220 | asm volatile("msr ttbr0_el1, %0;" |
| 221 | "isb;" |
| 222 | "tlbi vmalle1is;" |
| 223 | "dsb ish;" |
| 224 | "isb" : : "r"(virt_to_phys(pgd))); |
| 225 | |
| 226 | *phys_dst_addr = virt_to_phys((void *)dst); |
| 227 | |
| 228 | out: |
| 229 | return rc; |
| 230 | } |
| 231 | |
| 232 | |
| 233 | int swsusp_arch_suspend(void) |
| 234 | { |
| 235 | int ret = 0; |
| 236 | unsigned long flags; |
| 237 | struct sleep_stack_data state; |
| 238 | |
| 239 | local_dbg_save(flags); |
| 240 | |
| 241 | if (__cpu_suspend_enter(&state)) { |
| 242 | ret = swsusp_save(); |
| 243 | } else { |
| 244 | /* Clean kernel to PoC for secondary core startup */ |
| 245 | __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); |
| 246 | |
| 247 | /* |
| 248 | * Tell the hibernation core that we've just restored |
| 249 | * the memory |
| 250 | */ |
| 251 | in_suspend = 0; |
| 252 | |
| 253 | __cpu_suspend_exit(); |
| 254 | } |
| 255 | |
| 256 | local_dbg_restore(flags); |
| 257 | |
| 258 | return ret; |
| 259 | } |
| 260 | |
| 261 | static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, |
| 262 | unsigned long end) |
| 263 | { |
| 264 | pte_t *src_pte; |
| 265 | pte_t *dst_pte; |
| 266 | unsigned long addr = start; |
| 267 | |
| 268 | dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); |
| 269 | if (!dst_pte) |
| 270 | return -ENOMEM; |
| 271 | pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); |
| 272 | dst_pte = pte_offset_kernel(dst_pmd, start); |
| 273 | |
| 274 | src_pte = pte_offset_kernel(src_pmd, start); |
| 275 | do { |
| 276 | if (!pte_none(*src_pte)) |
| 277 | /* |
| 278 | * Resume will overwrite areas that may be marked |
| 279 | * read only (code, rodata). Clear the RDONLY bit from |
| 280 | * the temporary mappings we use during restore. |
| 281 | */ |
| 282 | set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); |
| 283 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); |
| 284 | |
| 285 | return 0; |
| 286 | } |
| 287 | |
| 288 | static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, |
| 289 | unsigned long end) |
| 290 | { |
| 291 | pmd_t *src_pmd; |
| 292 | pmd_t *dst_pmd; |
| 293 | unsigned long next; |
| 294 | unsigned long addr = start; |
| 295 | |
| 296 | if (pud_none(*dst_pud)) { |
| 297 | dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); |
| 298 | if (!dst_pmd) |
| 299 | return -ENOMEM; |
| 300 | pud_populate(&init_mm, dst_pud, dst_pmd); |
| 301 | } |
| 302 | dst_pmd = pmd_offset(dst_pud, start); |
| 303 | |
| 304 | src_pmd = pmd_offset(src_pud, start); |
| 305 | do { |
| 306 | next = pmd_addr_end(addr, end); |
| 307 | if (pmd_none(*src_pmd)) |
| 308 | continue; |
| 309 | if (pmd_table(*src_pmd)) { |
| 310 | if (copy_pte(dst_pmd, src_pmd, addr, next)) |
| 311 | return -ENOMEM; |
| 312 | } else { |
| 313 | set_pmd(dst_pmd, |
| 314 | __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); |
| 315 | } |
| 316 | } while (dst_pmd++, src_pmd++, addr = next, addr != end); |
| 317 | |
| 318 | return 0; |
| 319 | } |
| 320 | |
| 321 | static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, |
| 322 | unsigned long end) |
| 323 | { |
| 324 | pud_t *dst_pud; |
| 325 | pud_t *src_pud; |
| 326 | unsigned long next; |
| 327 | unsigned long addr = start; |
| 328 | |
| 329 | if (pgd_none(*dst_pgd)) { |
| 330 | dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); |
| 331 | if (!dst_pud) |
| 332 | return -ENOMEM; |
| 333 | pgd_populate(&init_mm, dst_pgd, dst_pud); |
| 334 | } |
| 335 | dst_pud = pud_offset(dst_pgd, start); |
| 336 | |
| 337 | src_pud = pud_offset(src_pgd, start); |
| 338 | do { |
| 339 | next = pud_addr_end(addr, end); |
| 340 | if (pud_none(*src_pud)) |
| 341 | continue; |
| 342 | if (pud_table(*(src_pud))) { |
| 343 | if (copy_pmd(dst_pud, src_pud, addr, next)) |
| 344 | return -ENOMEM; |
| 345 | } else { |
| 346 | set_pud(dst_pud, |
| 347 | __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); |
| 348 | } |
| 349 | } while (dst_pud++, src_pud++, addr = next, addr != end); |
| 350 | |
| 351 | return 0; |
| 352 | } |
| 353 | |
| 354 | static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, |
| 355 | unsigned long end) |
| 356 | { |
| 357 | unsigned long next; |
| 358 | unsigned long addr = start; |
| 359 | pgd_t *src_pgd = pgd_offset_k(start); |
| 360 | |
| 361 | dst_pgd = pgd_offset_raw(dst_pgd, start); |
| 362 | do { |
| 363 | next = pgd_addr_end(addr, end); |
| 364 | if (pgd_none(*src_pgd)) |
| 365 | continue; |
| 366 | if (copy_pud(dst_pgd, src_pgd, addr, next)) |
| 367 | return -ENOMEM; |
| 368 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); |
| 369 | |
| 370 | return 0; |
| 371 | } |
| 372 | |
| 373 | /* |
| 374 | * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). |
| 375 | * |
| 376 | * Memory allocated by get_safe_page() will be dealt with by the hibernate code, |
| 377 | * we don't need to free it here. |
| 378 | */ |
| 379 | int swsusp_arch_resume(void) |
| 380 | { |
| 381 | int rc = 0; |
| 382 | void *zero_page; |
| 383 | size_t exit_size; |
| 384 | pgd_t *tmp_pg_dir; |
| 385 | void *lm_restore_pblist; |
| 386 | phys_addr_t phys_hibernate_exit; |
| 387 | void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, |
| 388 | void *, phys_addr_t, phys_addr_t); |
| 389 | |
| 390 | /* |
| 391 | * Locate the exit code in the bottom-but-one page, so that *NULL |
| 392 | * still has disastrous affects. |
| 393 | */ |
| 394 | hibernate_exit = (void *)PAGE_SIZE; |
| 395 | exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; |
| 396 | /* |
| 397 | * Copy swsusp_arch_suspend_exit() to a safe page. This will generate |
| 398 | * a new set of ttbr0 page tables and load them. |
| 399 | */ |
| 400 | rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, |
| 401 | (unsigned long)hibernate_exit, |
| 402 | &phys_hibernate_exit, |
| 403 | (void *)get_safe_page, GFP_ATOMIC); |
| 404 | if (rc) { |
| 405 | pr_err("Failed to create safe executable page for hibernate_exit code."); |
| 406 | goto out; |
| 407 | } |
| 408 | |
| 409 | /* |
| 410 | * The hibernate exit text contains a set of el2 vectors, that will |
| 411 | * be executed at el2 with the mmu off in order to reload hyp-stub. |
| 412 | */ |
| 413 | __flush_dcache_area(hibernate_exit, exit_size); |
| 414 | |
| 415 | /* |
| 416 | * Restoring the memory image will overwrite the ttbr1 page tables. |
| 417 | * Create a second copy of just the linear map, and use this when |
| 418 | * restoring. |
| 419 | */ |
| 420 | tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); |
| 421 | if (!tmp_pg_dir) { |
| 422 | pr_err("Failed to allocate memory for temporary page tables."); |
| 423 | rc = -ENOMEM; |
| 424 | goto out; |
| 425 | } |
| 426 | rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); |
| 427 | if (rc) |
| 428 | goto out; |
| 429 | |
| 430 | /* |
| 431 | * Since we only copied the linear map, we need to find restore_pblist's |
| 432 | * linear map address. |
| 433 | */ |
| 434 | lm_restore_pblist = LMADDR(restore_pblist); |
| 435 | |
| 436 | /* |
| 437 | * KASLR will cause the el2 vectors to be in a different location in |
| 438 | * the resumed kernel. Load hibernate's temporary copy into el2. |
| 439 | * |
| 440 | * We can skip this step if we booted at EL1, or are running with VHE. |
| 441 | */ |
| 442 | if (el2_reset_needed()) { |
| 443 | phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ |
| 444 | el2_vectors += hibernate_el2_vectors - |
| 445 | __hibernate_exit_text_start; /* offset */ |
| 446 | |
| 447 | __hyp_set_vectors(el2_vectors); |
| 448 | } |
| 449 | |
| 450 | /* |
| 451 | * We need a zero page that is zero before & after resume in order to |
| 452 | * to break before make on the ttbr1 page tables. |
| 453 | */ |
| 454 | zero_page = (void *)get_safe_page(GFP_ATOMIC); |
| 455 | |
| 456 | hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, |
| 457 | resume_hdr.reenter_kernel, lm_restore_pblist, |
| 458 | resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); |
| 459 | |
| 460 | out: |
| 461 | return rc; |
| 462 | } |
James Morse | 1fe492c | 2016-04-27 17:47:13 +0100 | [diff] [blame] | 463 | |
| 464 | static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, |
| 465 | unsigned long action, void *ptr) |
| 466 | { |
| 467 | if (action == PM_HIBERNATION_PREPARE && |
| 468 | cpumask_first(cpu_online_mask) != 0) { |
| 469 | pr_warn("CPU0 is offline.\n"); |
| 470 | return notifier_from_errno(-ENODEV); |
| 471 | } |
| 472 | |
| 473 | return NOTIFY_OK; |
| 474 | } |
| 475 | |
| 476 | static int __init check_boot_cpu_online_init(void) |
| 477 | { |
| 478 | /* |
| 479 | * Set this pm_notifier callback with a lower priority than |
| 480 | * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be |
| 481 | * called earlier to disable cpu hotplug before the cpu online check. |
| 482 | */ |
| 483 | pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); |
| 484 | |
| 485 | return 0; |
| 486 | } |
| 487 | core_initcall(check_boot_cpu_online_init); |