Adhemerval Zanella | bb3c109 | 2019-08-26 14:44:36 -0300 | [diff] [blame] | 1 | /* |
| 2 | * memchr - scan memory for a character |
| 3 | * |
Szabolcs Nagy | 1eb5d7c | 2023-01-24 13:24:09 +0000 | [diff] [blame^] | 4 | * Copyright (c) 2010-2022, Arm Limited. |
Szabolcs Nagy | 189dfef | 2022-02-10 10:32:35 +0000 | [diff] [blame] | 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | /* |
| 9 | Written by Dave Gilbert <david.gilbert@linaro.org> |
| 10 | |
| 11 | This __memchr_arm routine is optimised on a Cortex-A9 and should work on |
| 12 | all ARMv7 processors. It has a fast past for short sizes, and has |
| 13 | an optimised path for large data sets; the worst case is finding the |
| 14 | match early in a large data set. |
| 15 | |
| 16 | */ |
| 17 | |
| 18 | @ 2011-02-07 david.gilbert@linaro.org |
| 19 | @ Extracted from local git a5b438d861 |
| 20 | @ 2011-07-14 david.gilbert@linaro.org |
| 21 | @ Import endianness fix from local git ea786f1b |
| 22 | @ 2011-12-07 david.gilbert@linaro.org |
| 23 | @ Removed unneeded cbz from align loop |
| 24 | |
| 25 | .syntax unified |
Victor Do Nascimento | 3f5c5bc | 2022-08-03 11:48:38 +0100 | [diff] [blame] | 26 | #if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'M' |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 27 | /* keep config inherited from -march= */ |
| 28 | #else |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 29 | .arch armv7-a |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 30 | #endif |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 31 | |
| 32 | @ this lets us check a flag in a 00/ff byte easily in either endianness |
| 33 | #ifdef __ARMEB__ |
| 34 | #define CHARTSTMASK(c) 1<<(31-(c*8)) |
| 35 | #else |
| 36 | #define CHARTSTMASK(c) 1<<(c*8) |
| 37 | #endif |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 38 | .thumb |
Szabolcs Nagy | 67a49ee | 2022-11-17 11:13:22 +0000 | [diff] [blame] | 39 | #include "asmdefs.h" |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 40 | |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 41 | |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 42 | @ --------------------------------------------------------------------------- |
| 43 | .thumb_func |
| 44 | .align 2 |
| 45 | .p2align 4,,15 |
| 46 | .global __memchr_arm |
| 47 | .type __memchr_arm,%function |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 48 | .fnstart |
| 49 | .cfi_startproc |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 50 | __memchr_arm: |
| 51 | @ r0 = start of memory to scan |
| 52 | @ r1 = character to look for |
| 53 | @ r2 = length |
| 54 | @ returns r0 = pointer to character or NULL if not found |
Victor Do Nascimento | 5c72615 | 2022-08-22 12:44:49 +0100 | [diff] [blame] | 55 | prologue |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 56 | and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char |
| 57 | |
| 58 | cmp r2,#16 @ If it's short don't bother with anything clever |
| 59 | blt 20f |
| 60 | |
| 61 | tst r0, #7 @ If it's already aligned skip the next bit |
| 62 | beq 10f |
| 63 | |
| 64 | @ Work up to an aligned point |
| 65 | 5: |
| 66 | ldrb r3, [r0],#1 |
| 67 | subs r2, r2, #1 |
| 68 | cmp r3, r1 |
| 69 | beq 50f @ If it matches exit found |
| 70 | tst r0, #7 |
| 71 | bne 5b @ If not aligned yet then do next byte |
| 72 | |
| 73 | 10: |
| 74 | @ At this point, we are aligned, we know we have at least 8 bytes to work with |
| 75 | push {r4,r5,r6,r7} |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 76 | .cfi_adjust_cfa_offset 16 |
Victor Do Nascimento | 7780a64 | 2022-10-21 17:21:11 +0100 | [diff] [blame] | 77 | .cfi_rel_offset 4, 0 |
| 78 | .cfi_rel_offset 5, 4 |
| 79 | .cfi_rel_offset 6, 8 |
| 80 | .cfi_rel_offset 7, 12 |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 81 | orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes |
| 82 | orr r1, r1, r1, lsl #16 |
| 83 | bic r4, r2, #7 @ Number of double words to work with |
| 84 | mvns r7, #0 @ all F's |
| 85 | movs r3, #0 |
| 86 | |
| 87 | 15: |
| 88 | ldmia r0!,{r5,r6} |
| 89 | subs r4, r4, #8 |
| 90 | eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target |
| 91 | eor r6,r6, r1 |
| 92 | uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
| 93 | sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
| 94 | uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
| 95 | sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
| 96 | cbnz r6, 60f |
| 97 | bne 15b @ (Flags from the subs above) If not run out of bytes then go around again |
| 98 | |
| 99 | pop {r4,r5,r6,r7} |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 100 | .cfi_restore 7 |
| 101 | .cfi_restore 6 |
| 102 | .cfi_restore 5 |
| 103 | .cfi_restore 4 |
| 104 | .cfi_adjust_cfa_offset -16 |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 105 | and r1,r1,#0xff @ Get r1 back to a single character from the expansion above |
| 106 | and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done |
| 107 | |
| 108 | 20: |
| 109 | cbz r2, 40f @ 0 length or hit the end already then not found |
| 110 | |
| 111 | 21: @ Post aligned section, or just a short call |
| 112 | ldrb r3,[r0],#1 |
| 113 | subs r2,r2,#1 |
| 114 | eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub |
| 115 | cbz r3, 50f |
| 116 | bne 21b @ on r2 flags |
| 117 | |
| 118 | 40: |
Victor Do Nascimento | 7780a64 | 2022-10-21 17:21:11 +0100 | [diff] [blame] | 119 | .cfi_remember_state |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 120 | movs r0,#0 @ not found |
Victor Do Nascimento | 5c72615 | 2022-08-22 12:44:49 +0100 | [diff] [blame] | 121 | epilogue |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 122 | |
| 123 | 50: |
Victor Do Nascimento | 7780a64 | 2022-10-21 17:21:11 +0100 | [diff] [blame] | 124 | .cfi_restore_state |
| 125 | .cfi_remember_state |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 126 | subs r0,r0,#1 @ found |
Victor Do Nascimento | 5c72615 | 2022-08-22 12:44:49 +0100 | [diff] [blame] | 127 | epilogue |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 128 | |
| 129 | 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was |
| 130 | @ r0 points to the start of the double word after the one that was tested |
| 131 | @ r5 has the 00/ff pattern for the first word, r6 has the chained value |
Victor Do Nascimento | 7d205b8 | 2022-12-07 14:54:18 +0000 | [diff] [blame] | 132 | .cfi_restore_state @ Standard post-prologue state |
| 133 | .cfi_adjust_cfa_offset 16 |
| 134 | .cfi_rel_offset 4, 0 |
| 135 | .cfi_rel_offset 5, 4 |
| 136 | .cfi_rel_offset 6, 8 |
| 137 | .cfi_rel_offset 7, 12 |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 138 | cmp r5, #0 |
| 139 | itte eq |
| 140 | moveq r5, r6 @ the end is in the 2nd word |
| 141 | subeq r0,r0,#3 @ Points to 2nd byte of 2nd word |
| 142 | subne r0,r0,#7 @ or 2nd byte of 1st word |
| 143 | |
| 144 | @ r0 currently points to the 3rd byte of the word containing the hit |
| 145 | tst r5, # CHARTSTMASK(0) @ 1st character |
| 146 | bne 61f |
| 147 | adds r0,r0,#1 |
| 148 | tst r5, # CHARTSTMASK(1) @ 2nd character |
| 149 | ittt eq |
| 150 | addeq r0,r0,#1 |
| 151 | tsteq r5, # (3<<15) @ 2nd & 3rd character |
| 152 | @ If not the 3rd must be the last one |
| 153 | addeq r0,r0,#1 |
| 154 | |
| 155 | 61: |
Victor Do Nascimento | 7780a64 | 2022-10-21 17:21:11 +0100 | [diff] [blame] | 156 | pop {r4,r5,r6,r7} |
| 157 | .cfi_restore 7 |
| 158 | .cfi_restore 6 |
| 159 | .cfi_restore 5 |
| 160 | .cfi_restore 4 |
| 161 | .cfi_adjust_cfa_offset -16 |
Victor Do Nascimento | 9776149 | 2022-07-04 15:27:25 +0100 | [diff] [blame] | 162 | subs r0,r0,#1 |
Victor Do Nascimento | 7780a64 | 2022-10-21 17:21:11 +0100 | [diff] [blame] | 163 | epilogue |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 164 | .cfi_endproc |
Victor Do Nascimento | ffb6461 | 2022-08-22 12:39:57 +0100 | [diff] [blame] | 165 | .cantunwind |
Victor Do Nascimento | 8bf2238 | 2022-06-22 15:07:31 +0100 | [diff] [blame] | 166 | .fnend |
Adhemerval Zanella | f658cca | 2019-08-05 15:36:40 -0300 | [diff] [blame] | 167 | |
| 168 | .size __memchr_arm, . - __memchr_arm |