blob: 77235797f7c54fe5af374120f76362148b11ce0f [file] [log] [blame]
/*
* strlen - calculate the length of a string.
*
* Copyright (c) 2020-2022, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
*
* ARMv8-a, AArch64, Advanced SIMD.
* MTE compatible.
*/
#include "asmdefs.h"
#define srcin x0
#define result x0
#define src x1
#define synd x2
#define tmp x3
#define shift x4
#define data q0
#define vdata v0
#define vhas_nul v1
#define vend v2
#define dend d2
/* Core algorithm:
Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
four bits per byte using the shrn instruction. A count trailing zeros then
identifies the first zero byte. */
ENTRY (__strlen_aarch64_mte)
PTR_ARG (0)
bic src, srcin, 15
ld1 {vdata.16b}, [src]
cmeq vhas_nul.16b, vdata.16b, 0
lsl shift, srcin, 2
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov synd, dend
lsr synd, synd, shift
cbz synd, L(loop)
rbit synd, synd
clz result, synd
lsr result, result, 2
ret
.p2align 5
L(loop):
ldr data, [src, 16]
cmeq vhas_nul.16b, vdata.16b, 0
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
fmov synd, dend
cbnz synd, L(loop_end)
ldr data, [src, 32]!
cmeq vhas_nul.16b, vdata.16b, 0
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
fmov synd, dend
cbz synd, L(loop)
sub src, src, 16
L(loop_end):
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
sub result, src, srcin
fmov synd, dend
#ifndef __AARCH64EB__
rbit synd, synd
#endif
add result, result, 16
clz tmp, synd
add result, result, tmp, lsr 2
ret
END (__strlen_aarch64_mte)