| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "asm_support_x86.S" |
| |
| #define MEMCMP __memcmp16 |
| |
| /* int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count); */ |
| |
| #ifndef L |
| # define L(label) .L##label |
| #endif |
| |
| #define CFI_PUSH(REG) \ |
| CFI_ADJUST_CFA_OFFSET(4); \ |
| CFI_REL_OFFSET(REG, 0) |
| |
| #define CFI_POP(REG) \ |
| CFI_ADJUST_CFA_OFFSET(-4); \ |
| CFI_RESTORE(REG) |
| |
| #define PUSH(REG) pushl REG; CFI_PUSH (REG) |
| #define POP(REG) popl REG; CFI_POP (REG) |
| |
| #define PARMS 4 |
| #define BLK1 PARMS |
| #define BLK2 BLK1+4 |
| #define LEN BLK2+4 |
| #define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret |
| #define RETURN RETURN_END; CFI_RESTORE_STATE; CFI_REMEMBER_STATE |
| |
| DEFINE_FUNCTION MEMCMP |
| movl LEN(%esp), %ecx |
| |
| shl $1, %ecx |
| jz L(zero) |
| |
| movl BLK1(%esp), %eax |
| cmp $48, %ecx |
| movl BLK2(%esp), %edx |
| jae L(48bytesormore) |
| |
| PUSH (%ebx) |
| add %ecx, %edx |
| add %ecx, %eax |
| jmp L(less48bytes) |
| |
| CFI_POP (%ebx) |
| |
| .p2align 4 |
| L(zero): |
| xor %eax, %eax |
| ret |
| |
| .p2align 4 |
| L(48bytesormore): |
| PUSH (%ebx) |
| PUSH (%esi) |
| PUSH (%edi) |
| CFI_REMEMBER_STATE |
| movdqu (%eax), %xmm3 |
| movdqu (%edx), %xmm0 |
| movl %eax, %edi |
| movl %edx, %esi |
| pcmpeqb %xmm0, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 16(%edi), %edi |
| |
| sub $0xffff, %edx |
| lea 16(%esi), %esi |
| jnz L(less16bytes) |
| mov %edi, %edx |
| and $0xf, %edx |
| xor %edx, %edi |
| sub %edx, %esi |
| add %edx, %ecx |
| mov %esi, %edx |
| and $0xf, %edx |
| jz L(shr_0) |
| xor %edx, %esi |
| |
| cmp $0, %edx |
| je L(shr_0) |
| cmp $2, %edx |
| je L(shr_2) |
| cmp $4, %edx |
| je L(shr_4) |
| cmp $6, %edx |
| je L(shr_6) |
| cmp $8, %edx |
| je L(shr_8) |
| cmp $10, %edx |
| je L(shr_10) |
| cmp $12, %edx |
| je L(shr_12) |
| jmp L(shr_14) |
| |
| .p2align 4 |
| L(shr_0): |
| cmp $80, %ecx |
| jae L(shr_0_gobble) |
| lea -48(%ecx), %ecx |
| xor %eax, %eax |
| movaps (%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| movaps 16(%esi), %xmm2 |
| pcmpeqb 16(%edi), %xmm2 |
| pand %xmm1, %xmm2 |
| pmovmskb %xmm2, %edx |
| add $32, %edi |
| add $32, %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea (%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_0_gobble): |
| lea -48(%ecx), %ecx |
| movdqa (%esi), %xmm0 |
| xor %eax, %eax |
| pcmpeqb (%edi), %xmm0 |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm2 |
| pcmpeqb 16(%edi), %xmm2 |
| L(shr_0_gobble_loop): |
| pand %xmm0, %xmm2 |
| sub $32, %ecx |
| pmovmskb %xmm2, %edx |
| movdqa %xmm0, %xmm1 |
| movdqa 32(%esi), %xmm0 |
| movdqa 48(%esi), %xmm2 |
| sbb $0xffff, %edx |
| pcmpeqb 32(%edi), %xmm0 |
| pcmpeqb 48(%edi), %xmm2 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| jz L(shr_0_gobble_loop) |
| |
| pand %xmm0, %xmm2 |
| cmp $0, %ecx |
| jge L(shr_0_gobble_loop_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_0_gobble_loop_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm2, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea (%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_2): |
| cmp $80, %ecx |
| lea -48(%ecx), %ecx |
| mov %edx, %eax |
| jae L(shr_2_gobble) |
| |
| movdqa 16(%esi), %xmm1 |
| movdqa %xmm1, %xmm2 |
| palignr $2,(%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $2,%xmm2, %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| pand %xmm1, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea 2(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_2_gobble): |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm0 |
| palignr $2,(%esi), %xmm0 |
| pcmpeqb (%edi), %xmm0 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $2,16(%esi), %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| L(shr_2_gobble_loop): |
| pand %xmm0, %xmm3 |
| sub $32, %ecx |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| |
| movdqa 64(%esi), %xmm3 |
| palignr $2,48(%esi), %xmm3 |
| sbb $0xffff, %edx |
| movdqa 48(%esi), %xmm0 |
| palignr $2,32(%esi), %xmm0 |
| pcmpeqb 32(%edi), %xmm0 |
| lea 32(%esi), %esi |
| pcmpeqb 48(%edi), %xmm3 |
| |
| lea 32(%edi), %edi |
| jz L(shr_2_gobble_loop) |
| pand %xmm0, %xmm3 |
| |
| cmp $0, %ecx |
| jge L(shr_2_gobble_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_2_gobble_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea 2(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_4): |
| cmp $80, %ecx |
| lea -48(%ecx), %ecx |
| mov %edx, %eax |
| jae L(shr_4_gobble) |
| |
| movdqa 16(%esi), %xmm1 |
| movdqa %xmm1, %xmm2 |
| palignr $4,(%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $4,%xmm2, %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| pand %xmm1, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea 4(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_4_gobble): |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm0 |
| palignr $4,(%esi), %xmm0 |
| pcmpeqb (%edi), %xmm0 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $4,16(%esi), %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| L(shr_4_gobble_loop): |
| pand %xmm0, %xmm3 |
| sub $32, %ecx |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| |
| movdqa 64(%esi), %xmm3 |
| palignr $4,48(%esi), %xmm3 |
| sbb $0xffff, %edx |
| movdqa 48(%esi), %xmm0 |
| palignr $4,32(%esi), %xmm0 |
| pcmpeqb 32(%edi), %xmm0 |
| lea 32(%esi), %esi |
| pcmpeqb 48(%edi), %xmm3 |
| |
| lea 32(%edi), %edi |
| jz L(shr_4_gobble_loop) |
| pand %xmm0, %xmm3 |
| |
| cmp $0, %ecx |
| jge L(shr_4_gobble_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_4_gobble_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea 4(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_6): |
| cmp $80, %ecx |
| lea -48(%ecx), %ecx |
| mov %edx, %eax |
| jae L(shr_6_gobble) |
| |
| movdqa 16(%esi), %xmm1 |
| movdqa %xmm1, %xmm2 |
| palignr $6,(%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $6,%xmm2, %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| pand %xmm1, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea 6(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_6_gobble): |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm0 |
| palignr $6,(%esi), %xmm0 |
| pcmpeqb (%edi), %xmm0 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $6,16(%esi), %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| L(shr_6_gobble_loop): |
| pand %xmm0, %xmm3 |
| sub $32, %ecx |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| |
| movdqa 64(%esi), %xmm3 |
| palignr $6,48(%esi), %xmm3 |
| sbb $0xffff, %edx |
| movdqa 48(%esi), %xmm0 |
| palignr $6,32(%esi), %xmm0 |
| pcmpeqb 32(%edi), %xmm0 |
| lea 32(%esi), %esi |
| pcmpeqb 48(%edi), %xmm3 |
| |
| lea 32(%edi), %edi |
| jz L(shr_6_gobble_loop) |
| pand %xmm0, %xmm3 |
| |
| cmp $0, %ecx |
| jge L(shr_6_gobble_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_6_gobble_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea 6(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_8): |
| cmp $80, %ecx |
| lea -48(%ecx), %ecx |
| mov %edx, %eax |
| jae L(shr_8_gobble) |
| |
| movdqa 16(%esi), %xmm1 |
| movdqa %xmm1, %xmm2 |
| palignr $8,(%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $8,%xmm2, %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| pand %xmm1, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea 8(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_8_gobble): |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm0 |
| palignr $8,(%esi), %xmm0 |
| pcmpeqb (%edi), %xmm0 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $8,16(%esi), %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| L(shr_8_gobble_loop): |
| pand %xmm0, %xmm3 |
| sub $32, %ecx |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| |
| movdqa 64(%esi), %xmm3 |
| palignr $8,48(%esi), %xmm3 |
| sbb $0xffff, %edx |
| movdqa 48(%esi), %xmm0 |
| palignr $8,32(%esi), %xmm0 |
| pcmpeqb 32(%edi), %xmm0 |
| lea 32(%esi), %esi |
| pcmpeqb 48(%edi), %xmm3 |
| |
| lea 32(%edi), %edi |
| jz L(shr_8_gobble_loop) |
| pand %xmm0, %xmm3 |
| |
| cmp $0, %ecx |
| jge L(shr_8_gobble_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_8_gobble_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea 8(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_10): |
| cmp $80, %ecx |
| lea -48(%ecx), %ecx |
| mov %edx, %eax |
| jae L(shr_10_gobble) |
| |
| movdqa 16(%esi), %xmm1 |
| movdqa %xmm1, %xmm2 |
| palignr $10, (%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $10,%xmm2, %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| pand %xmm1, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea 10(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_10_gobble): |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm0 |
| palignr $10, (%esi), %xmm0 |
| pcmpeqb (%edi), %xmm0 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $10, 16(%esi), %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| L(shr_10_gobble_loop): |
| pand %xmm0, %xmm3 |
| sub $32, %ecx |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| |
| movdqa 64(%esi), %xmm3 |
| palignr $10,48(%esi), %xmm3 |
| sbb $0xffff, %edx |
| movdqa 48(%esi), %xmm0 |
| palignr $10,32(%esi), %xmm0 |
| pcmpeqb 32(%edi), %xmm0 |
| lea 32(%esi), %esi |
| pcmpeqb 48(%edi), %xmm3 |
| |
| lea 32(%edi), %edi |
| jz L(shr_10_gobble_loop) |
| pand %xmm0, %xmm3 |
| |
| cmp $0, %ecx |
| jge L(shr_10_gobble_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_10_gobble_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea 10(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_12): |
| cmp $80, %ecx |
| lea -48(%ecx), %ecx |
| mov %edx, %eax |
| jae L(shr_12_gobble) |
| |
| movdqa 16(%esi), %xmm1 |
| movdqa %xmm1, %xmm2 |
| palignr $12, (%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $12, %xmm2, %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| pand %xmm1, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea 12(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_12_gobble): |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm0 |
| palignr $12, (%esi), %xmm0 |
| pcmpeqb (%edi), %xmm0 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $12, 16(%esi), %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| L(shr_12_gobble_loop): |
| pand %xmm0, %xmm3 |
| sub $32, %ecx |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| |
| movdqa 64(%esi), %xmm3 |
| palignr $12,48(%esi), %xmm3 |
| sbb $0xffff, %edx |
| movdqa 48(%esi), %xmm0 |
| palignr $12,32(%esi), %xmm0 |
| pcmpeqb 32(%edi), %xmm0 |
| lea 32(%esi), %esi |
| pcmpeqb 48(%edi), %xmm3 |
| |
| lea 32(%edi), %edi |
| jz L(shr_12_gobble_loop) |
| pand %xmm0, %xmm3 |
| |
| cmp $0, %ecx |
| jge L(shr_12_gobble_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_12_gobble_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea 12(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_14): |
| cmp $80, %ecx |
| lea -48(%ecx), %ecx |
| mov %edx, %eax |
| jae L(shr_14_gobble) |
| |
| movdqa 16(%esi), %xmm1 |
| movdqa %xmm1, %xmm2 |
| palignr $14, (%esi), %xmm1 |
| pcmpeqb (%edi), %xmm1 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $14, %xmm2, %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| pand %xmm1, %xmm3 |
| pmovmskb %xmm3, %edx |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| lea (%ecx, %edi,1), %eax |
| lea 14(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(shr_14_gobble): |
| sub $32, %ecx |
| movdqa 16(%esi), %xmm0 |
| palignr $14, (%esi), %xmm0 |
| pcmpeqb (%edi), %xmm0 |
| |
| movdqa 32(%esi), %xmm3 |
| palignr $14, 16(%esi), %xmm3 |
| pcmpeqb 16(%edi), %xmm3 |
| |
| L(shr_14_gobble_loop): |
| pand %xmm0, %xmm3 |
| sub $32, %ecx |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| |
| movdqa 64(%esi), %xmm3 |
| palignr $14,48(%esi), %xmm3 |
| sbb $0xffff, %edx |
| movdqa 48(%esi), %xmm0 |
| palignr $14,32(%esi), %xmm0 |
| pcmpeqb 32(%edi), %xmm0 |
| lea 32(%esi), %esi |
| pcmpeqb 48(%edi), %xmm3 |
| |
| lea 32(%edi), %edi |
| jz L(shr_14_gobble_loop) |
| pand %xmm0, %xmm3 |
| |
| cmp $0, %ecx |
| jge L(shr_14_gobble_next) |
| inc %edx |
| add $32, %ecx |
| L(shr_14_gobble_next): |
| test %edx, %edx |
| jnz L(exit) |
| |
| pmovmskb %xmm3, %edx |
| movdqa %xmm0, %xmm1 |
| lea 32(%edi), %edi |
| lea 32(%esi), %esi |
| sub $0xffff, %edx |
| jnz L(exit) |
| |
| lea (%ecx, %edi,1), %eax |
| lea 14(%ecx, %esi,1), %edx |
| POP (%edi) |
| POP (%esi) |
| jmp L(less48bytes) |
| |
| CFI_RESTORE_STATE |
| CFI_REMEMBER_STATE |
| .p2align 4 |
| L(exit): |
| pmovmskb %xmm1, %ebx |
| sub $0xffff, %ebx |
| jz L(first16bytes) |
| lea -16(%esi), %esi |
| lea -16(%edi), %edi |
| mov %ebx, %edx |
| |
| L(first16bytes): |
| add %eax, %esi |
| L(less16bytes): |
| test %dl, %dl |
| jz L(next_four_words) |
| test $15, %dl |
| jz L(second_two_words) |
| test $3, %dl |
| jz L(second_word) |
| movzwl -16(%edi), %eax |
| movzwl -16(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(second_word): |
| movzwl -14(%edi), %eax |
| movzwl -14(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(second_two_words): |
| test $63, %dl |
| jz L(fourth_word) |
| movzwl -12(%edi), %eax |
| movzwl -12(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(fourth_word): |
| movzwl -10(%edi), %eax |
| movzwl -10(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(next_four_words): |
| test $15, %dh |
| jz L(fourth_two_words) |
| test $3, %dh |
| jz L(sixth_word) |
| movzwl -8(%edi), %eax |
| movzwl -8(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(sixth_word): |
| movzwl -6(%edi), %eax |
| movzwl -6(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(fourth_two_words): |
| test $63, %dh |
| jz L(eighth_word) |
| movzwl -4(%edi), %eax |
| movzwl -4(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| .p2align 4 |
| L(eighth_word): |
| movzwl -2(%edi), %eax |
| movzwl -2(%esi), %ebx |
| subl %ebx, %eax |
| RETURN |
| |
| |
| CFI_PUSH (%ebx) |
| |
| .p2align 4 |
| L(more8bytes): |
| cmp $16, %ecx |
| jae L(more16bytes) |
| cmp $8, %ecx |
| je L(8bytes) |
| cmp $10, %ecx |
| je L(10bytes) |
| cmp $12, %ecx |
| je L(12bytes) |
| jmp L(14bytes) |
| |
| .p2align 4 |
| L(more16bytes): |
| cmp $24, %ecx |
| jae L(more24bytes) |
| cmp $16, %ecx |
| je L(16bytes) |
| cmp $18, %ecx |
| je L(18bytes) |
| cmp $20, %ecx |
| je L(20bytes) |
| jmp L(22bytes) |
| |
| .p2align 4 |
| L(more24bytes): |
| cmp $32, %ecx |
| jae L(more32bytes) |
| cmp $24, %ecx |
| je L(24bytes) |
| cmp $26, %ecx |
| je L(26bytes) |
| cmp $28, %ecx |
| je L(28bytes) |
| jmp L(30bytes) |
| |
| .p2align 4 |
| L(more32bytes): |
| cmp $40, %ecx |
| jae L(more40bytes) |
| cmp $32, %ecx |
| je L(32bytes) |
| cmp $34, %ecx |
| je L(34bytes) |
| cmp $36, %ecx |
| je L(36bytes) |
| jmp L(38bytes) |
| |
| .p2align 4 |
| L(less48bytes): |
| cmp $8, %ecx |
| jae L(more8bytes) |
| cmp $2, %ecx |
| je L(2bytes) |
| cmp $4, %ecx |
| je L(4bytes) |
| jmp L(6bytes) |
| |
| .p2align 4 |
| L(more40bytes): |
| cmp $40, %ecx |
| je L(40bytes) |
| cmp $42, %ecx |
| je L(42bytes) |
| cmp $44, %ecx |
| je L(44bytes) |
| jmp L(46bytes) |
| |
| .p2align 4 |
| L(46bytes): |
| movzwl -46(%eax), %ecx |
| movzwl -46(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(44bytes): |
| movzwl -44(%eax), %ecx |
| movzwl -44(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(42bytes): |
| movzwl -42(%eax), %ecx |
| movzwl -42(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(40bytes): |
| movzwl -40(%eax), %ecx |
| movzwl -40(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(38bytes): |
| movzwl -38(%eax), %ecx |
| movzwl -38(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(36bytes): |
| movzwl -36(%eax), %ecx |
| movzwl -36(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(34bytes): |
| movzwl -34(%eax), %ecx |
| movzwl -34(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(32bytes): |
| movzwl -32(%eax), %ecx |
| movzwl -32(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(30bytes): |
| movzwl -30(%eax), %ecx |
| movzwl -30(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(28bytes): |
| movzwl -28(%eax), %ecx |
| movzwl -28(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(26bytes): |
| movzwl -26(%eax), %ecx |
| movzwl -26(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(24bytes): |
| movzwl -24(%eax), %ecx |
| movzwl -24(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(22bytes): |
| movzwl -22(%eax), %ecx |
| movzwl -22(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(20bytes): |
| movzwl -20(%eax), %ecx |
| movzwl -20(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(18bytes): |
| movzwl -18(%eax), %ecx |
| movzwl -18(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(16bytes): |
| movzwl -16(%eax), %ecx |
| movzwl -16(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(14bytes): |
| movzwl -14(%eax), %ecx |
| movzwl -14(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(12bytes): |
| movzwl -12(%eax), %ecx |
| movzwl -12(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(10bytes): |
| movzwl -10(%eax), %ecx |
| movzwl -10(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(8bytes): |
| movzwl -8(%eax), %ecx |
| movzwl -8(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(6bytes): |
| movzwl -6(%eax), %ecx |
| movzwl -6(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(4bytes): |
| movzwl -4(%eax), %ecx |
| movzwl -4(%edx), %ebx |
| subl %ebx, %ecx |
| jne L(memcmp16_exit) |
| L(2bytes): |
| movzwl -2(%eax), %eax |
| movzwl -2(%edx), %ebx |
| subl %ebx, %eax |
| POP (%ebx) |
| ret |
| CFI_PUSH (%ebx) |
| |
| .p2align 4 |
| L(memcmp16_exit): |
| POP (%ebx) |
| mov %ecx, %eax |
| ret |
| END_FUNCTION MEMCMP |