/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ | |
#include <linux/linkage.h> | |
#include <asm/dwarf2.h> | |
ALIGN | |
copy_page_c: | |
CFI_STARTPROC | |
movl $4096/8,%ecx | |
rep movsq | |
ret | |
CFI_ENDPROC | |
ENDPROC(copy_page_c) | |
/* Don't use streaming store because it's better when the target | |
ends up in cache. */ | |
/* Could vary the prefetch distance based on SMP/UP */ | |
ENTRY(copy_page) | |
CFI_STARTPROC | |
subq $3*8,%rsp | |
CFI_ADJUST_CFA_OFFSET 3*8 | |
movq %rbx,(%rsp) | |
CFI_REL_OFFSET rbx, 0 | |
movq %r12,1*8(%rsp) | |
CFI_REL_OFFSET r12, 1*8 | |
movq %r13,2*8(%rsp) | |
CFI_REL_OFFSET r13, 2*8 | |
movl $(4096/64)-5,%ecx | |
.p2align 4 | |
.Loop64: | |
dec %rcx | |
movq (%rsi), %rax | |
movq 8 (%rsi), %rbx | |
movq 16 (%rsi), %rdx | |
movq 24 (%rsi), %r8 | |
movq 32 (%rsi), %r9 | |
movq 40 (%rsi), %r10 | |
movq 48 (%rsi), %r11 | |
movq 56 (%rsi), %r12 | |
prefetcht0 5*64(%rsi) | |
movq %rax, (%rdi) | |
movq %rbx, 8 (%rdi) | |
movq %rdx, 16 (%rdi) | |
movq %r8, 24 (%rdi) | |
movq %r9, 32 (%rdi) | |
movq %r10, 40 (%rdi) | |
movq %r11, 48 (%rdi) | |
movq %r12, 56 (%rdi) | |
leaq 64 (%rsi), %rsi | |
leaq 64 (%rdi), %rdi | |
jnz .Loop64 | |
movl $5,%ecx | |
.p2align 4 | |
.Loop2: | |
decl %ecx | |
movq (%rsi), %rax | |
movq 8 (%rsi), %rbx | |
movq 16 (%rsi), %rdx | |
movq 24 (%rsi), %r8 | |
movq 32 (%rsi), %r9 | |
movq 40 (%rsi), %r10 | |
movq 48 (%rsi), %r11 | |
movq 56 (%rsi), %r12 | |
movq %rax, (%rdi) | |
movq %rbx, 8 (%rdi) | |
movq %rdx, 16 (%rdi) | |
movq %r8, 24 (%rdi) | |
movq %r9, 32 (%rdi) | |
movq %r10, 40 (%rdi) | |
movq %r11, 48 (%rdi) | |
movq %r12, 56 (%rdi) | |
leaq 64(%rdi),%rdi | |
leaq 64(%rsi),%rsi | |
jnz .Loop2 | |
movq (%rsp),%rbx | |
CFI_RESTORE rbx | |
movq 1*8(%rsp),%r12 | |
CFI_RESTORE r12 | |
movq 2*8(%rsp),%r13 | |
CFI_RESTORE r13 | |
addq $3*8,%rsp | |
CFI_ADJUST_CFA_OFFSET -3*8 | |
ret | |
.Lcopy_page_end: | |
CFI_ENDPROC | |
ENDPROC(copy_page) | |
/* Some CPUs run faster using the string copy instructions. | |
It is also a lot simpler. Use this when possible */ | |
#include <asm/cpufeature.h> | |
.section .altinstr_replacement,"ax" | |
1: .byte 0xeb /* jmp <disp8> */ | |
.byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ | |
2: | |
.previous | |
.section .altinstructions,"a" | |
.align 8 | |
.quad copy_page | |
.quad 1b | |
.word X86_FEATURE_REP_GOOD | |
.byte .Lcopy_page_end - copy_page | |
.byte 2b - 1b | |
.previous |