blob: 823d6013eb35faafd406da04f7e687974443354a [file] [log] [blame]
Adhemerval Zanellabb3c1092019-08-26 14:44:36 -03001/*
2 * memchr - scan memory for a character
3 *
Szabolcs Nagy1eb5d7c2023-01-24 13:24:09 +00004 * Copyright (c) 2010-2022, Arm Limited.
Szabolcs Nagy189dfef2022-02-10 10:32:35 +00005 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -03006 */
7
8/*
9 Written by Dave Gilbert <david.gilbert@linaro.org>
10
11 This __memchr_arm routine is optimised on a Cortex-A9 and should work on
12 all ARMv7 processors. It has a fast past for short sizes, and has
13 an optimised path for large data sets; the worst case is finding the
14 match early in a large data set.
15
16 */
17
18@ 2011-02-07 david.gilbert@linaro.org
19@ Extracted from local git a5b438d861
20@ 2011-07-14 david.gilbert@linaro.org
21@ Import endianness fix from local git ea786f1b
22@ 2011-12-07 david.gilbert@linaro.org
23@ Removed unneeded cbz from align loop
24
25 .syntax unified
Victor Do Nascimento3f5c5bc2022-08-03 11:48:38 +010026#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'M'
Victor Do Nascimento8bf22382022-06-22 15:07:31 +010027 /* keep config inherited from -march= */
28#else
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030029 .arch armv7-a
Victor Do Nascimento8bf22382022-06-22 15:07:31 +010030#endif
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030031
32@ this lets us check a flag in a 00/ff byte easily in either endianness
33#ifdef __ARMEB__
34#define CHARTSTMASK(c) 1<<(31-(c*8))
35#else
36#define CHARTSTMASK(c) 1<<(c*8)
37#endif
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030038 .thumb
Szabolcs Nagy67a49ee2022-11-17 11:13:22 +000039#include "asmdefs.h"
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030040
Victor Do Nascimento8bf22382022-06-22 15:07:31 +010041
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030042@ ---------------------------------------------------------------------------
43 .thumb_func
44 .align 2
45 .p2align 4,,15
46 .global __memchr_arm
47 .type __memchr_arm,%function
Victor Do Nascimento8bf22382022-06-22 15:07:31 +010048 .fnstart
49 .cfi_startproc
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030050__memchr_arm:
51 @ r0 = start of memory to scan
52 @ r1 = character to look for
53 @ r2 = length
54 @ returns r0 = pointer to character or NULL if not found
Victor Do Nascimento5c726152022-08-22 12:44:49 +010055 prologue
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030056 and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
57
58 cmp r2,#16 @ If it's short don't bother with anything clever
59 blt 20f
60
61 tst r0, #7 @ If it's already aligned skip the next bit
62 beq 10f
63
64 @ Work up to an aligned point
655:
66 ldrb r3, [r0],#1
67 subs r2, r2, #1
68 cmp r3, r1
69 beq 50f @ If it matches exit found
70 tst r0, #7
71 bne 5b @ If not aligned yet then do next byte
72
7310:
74 @ At this point, we are aligned, we know we have at least 8 bytes to work with
75 push {r4,r5,r6,r7}
Victor Do Nascimento8bf22382022-06-22 15:07:31 +010076 .cfi_adjust_cfa_offset 16
Victor Do Nascimento7780a642022-10-21 17:21:11 +010077 .cfi_rel_offset 4, 0
78 .cfi_rel_offset 5, 4
79 .cfi_rel_offset 6, 8
80 .cfi_rel_offset 7, 12
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -030081 orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
82 orr r1, r1, r1, lsl #16
83 bic r4, r2, #7 @ Number of double words to work with
84 mvns r7, #0 @ all F's
85 movs r3, #0
86
8715:
88 ldmia r0!,{r5,r6}
89 subs r4, r4, #8
90 eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
91 eor r6,r6, r1
92 uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
93 sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
94 uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
95 sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
96 cbnz r6, 60f
97 bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
98
99 pop {r4,r5,r6,r7}
Victor Do Nascimento8bf22382022-06-22 15:07:31 +0100100 .cfi_restore 7
101 .cfi_restore 6
102 .cfi_restore 5
103 .cfi_restore 4
104 .cfi_adjust_cfa_offset -16
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -0300105 and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
106 and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
107
10820:
109 cbz r2, 40f @ 0 length or hit the end already then not found
110
11121: @ Post aligned section, or just a short call
112 ldrb r3,[r0],#1
113 subs r2,r2,#1
114 eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
115 cbz r3, 50f
116 bne 21b @ on r2 flags
117
11840:
Victor Do Nascimento7780a642022-10-21 17:21:11 +0100119 .cfi_remember_state
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -0300120 movs r0,#0 @ not found
Victor Do Nascimento5c726152022-08-22 12:44:49 +0100121 epilogue
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -0300122
12350:
Victor Do Nascimento7780a642022-10-21 17:21:11 +0100124 .cfi_restore_state
125 .cfi_remember_state
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -0300126 subs r0,r0,#1 @ found
Victor Do Nascimento5c726152022-08-22 12:44:49 +0100127 epilogue
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -0300128
12960: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
130 @ r0 points to the start of the double word after the one that was tested
131 @ r5 has the 00/ff pattern for the first word, r6 has the chained value
Victor Do Nascimento7d205b82022-12-07 14:54:18 +0000132 .cfi_restore_state @ Standard post-prologue state
133 .cfi_adjust_cfa_offset 16
134 .cfi_rel_offset 4, 0
135 .cfi_rel_offset 5, 4
136 .cfi_rel_offset 6, 8
137 .cfi_rel_offset 7, 12
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -0300138 cmp r5, #0
139 itte eq
140 moveq r5, r6 @ the end is in the 2nd word
141 subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
142 subne r0,r0,#7 @ or 2nd byte of 1st word
143
144 @ r0 currently points to the 3rd byte of the word containing the hit
145 tst r5, # CHARTSTMASK(0) @ 1st character
146 bne 61f
147 adds r0,r0,#1
148 tst r5, # CHARTSTMASK(1) @ 2nd character
149 ittt eq
150 addeq r0,r0,#1
151 tsteq r5, # (3<<15) @ 2nd & 3rd character
152 @ If not the 3rd must be the last one
153 addeq r0,r0,#1
154
15561:
Victor Do Nascimento7780a642022-10-21 17:21:11 +0100156 pop {r4,r5,r6,r7}
157 .cfi_restore 7
158 .cfi_restore 6
159 .cfi_restore 5
160 .cfi_restore 4
161 .cfi_adjust_cfa_offset -16
Victor Do Nascimento97761492022-07-04 15:27:25 +0100162 subs r0,r0,#1
Victor Do Nascimento7780a642022-10-21 17:21:11 +0100163 epilogue
Victor Do Nascimento8bf22382022-06-22 15:07:31 +0100164 .cfi_endproc
Victor Do Nascimentoffb64612022-08-22 12:39:57 +0100165 .cantunwind
Victor Do Nascimento8bf22382022-06-22 15:07:31 +0100166 .fnend
Adhemerval Zanellaf658cca2019-08-05 15:36:40 -0300167
168 .size __memchr_arm, . - __memchr_arm