| /* memset.S: optimised assembly memset |
| * |
| * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or (at your option) any later version. |
| */ |
| |
| |
| .text |
| .p2align 4 |
| |
| ############################################################################### |
| # |
| # void *memset(void *p, char ch, size_t count) |
| # |
| # - NOTE: must not use any stack. exception detection performs function return |
| # to caller's fixup routine, aborting the remainder of the set |
| # GR4, GR7, GR8, and GR11 must be managed |
| # |
| ############################################################################### |
| .globl memset,__memset_end |
| .type memset,@function |
| memset: |
| orcc.p gr10,gr0,gr5,icc3 ; GR5 = count |
| andi gr9,#0xff,gr9 |
| or.p gr8,gr0,gr4 ; GR4 = address |
| beqlr icc3,#0 |
| |
| # conditionally write a byte to 2b-align the address |
| setlos.p #1,gr6 |
| andicc gr4,#1,gr0,icc0 |
| ckne icc0,cc7 |
| cstb.p gr9,@(gr4,gr0) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cadd.p gr4,gr6,gr4 ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # conditionally write a word to 4b-align the address |
| andicc.p gr4,#2,gr0,icc0 |
| subicc gr5,#2,gr0,icc1 |
| setlos.p #2,gr6 |
| ckne icc0,cc7 |
| slli.p gr9,#8,gr12 ; need to double up the pattern |
| cknc icc1,cc5 |
| or.p gr9,gr12,gr12 |
| andcr cc7,cc5,cc7 |
| |
| csth.p gr12,@(gr4,gr0) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cadd.p gr4,gr6,gr4 ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # conditionally write a dword to 8b-align the address |
| andicc.p gr4,#4,gr0,icc0 |
| subicc gr5,#4,gr0,icc1 |
| setlos.p #4,gr6 |
| ckne icc0,cc7 |
| slli.p gr12,#16,gr13 ; need to quadruple-up the pattern |
| cknc icc1,cc5 |
| or.p gr13,gr12,gr12 |
| andcr cc7,cc5,cc7 |
| |
| cst.p gr12,@(gr4,gr0) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cadd.p gr4,gr6,gr4 ,cc7,#1 |
| beqlr icc3,#0 |
| |
| or.p gr12,gr12,gr13 ; need to octuple-up the pattern |
| |
| # the address is now 8b-aligned - loop around writing 64b chunks |
| setlos #8,gr7 |
| subi.p gr4,#8,gr4 ; store with update index does weird stuff |
| setlos #64,gr6 |
| |
| subicc gr5,#64,gr0,icc0 |
| 0: cknc icc0,cc7 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| subicc gr5,#64,gr0,icc0 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| beqlr icc3,#0 |
| bnc icc0,#2,0b |
| |
| # now do 32-byte remnant |
| subicc.p gr5,#32,gr0,icc0 |
| setlos #32,gr6 |
| cknc icc0,cc7 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| setlos #16,gr6 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| subicc gr5,#16,gr0,icc0 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # now do 16-byte remnant |
| cknc icc0,cc7 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| beqlr icc3,#0 |
| |
| # now do 8-byte remnant |
| subicc gr5,#8,gr0,icc1 |
| cknc icc1,cc7 |
| cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| setlos.p #4,gr7 |
| beqlr icc3,#0 |
| |
| # now do 4-byte remnant |
| subicc gr5,#4,gr0,icc0 |
| addi.p gr4,#4,gr4 |
| cknc icc0,cc7 |
| cstu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| subicc.p gr5,#2,gr0,icc1 |
| beqlr icc3,#0 |
| |
| # now do 2-byte remnant |
| setlos #2,gr7 |
| addi.p gr4,#2,gr4 |
| cknc icc1,cc7 |
| csthu.p gr12,@(gr4,gr7) ,cc7,#1 |
| csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| subicc.p gr5,#1,gr0,icc0 |
| beqlr icc3,#0 |
| |
| # now do 1-byte remnant |
| setlos #0,gr7 |
| addi.p gr4,#2,gr4 |
| cknc icc0,cc7 |
| cstb.p gr12,@(gr4,gr0) ,cc7,#1 |
| bralr |
| __memset_end: |
| |
| .size memset, __memset_end-memset |
| |
| ############################################################################### |
| # |
| # clear memory in userspace |
| # - return the number of bytes that could not be cleared (0 on complete success) |
| # |
| # long __memset_user(void *p, size_t count) |
| # |
| ############################################################################### |
| .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler |
| .type __memset_user,@function |
| __memset_user: |
| movsg lr,gr11 |
| |
| # abuse memset to do the dirty work |
| or.p gr9,gr9,gr10 |
| setlos #0,gr9 |
| call memset |
| __memset_user_error_lr: |
| jmpl.p @(gr11,gr0) |
| setlos #0,gr8 |
| |
| # deal any exception generated by memset |
| # GR4 - memset's address tracking pointer |
| # GR7 - memset's step value (index register for store insns) |
| # GR8 - memset's original start address |
| # GR10 - memset's original count |
| __memset_user_error_handler: |
| add.p gr4,gr7,gr4 |
| add gr8,gr10,gr8 |
| jmpl.p @(gr11,gr0) |
| sub gr8,gr4,gr8 ; we return the amount left uncleared |
| |
| .size __memset_user, .-__memset_user |