Remove memcmp and memset from bundled musl and wasi

This commit is contained in:
Yefeng Li 2025-08-25 18:07:07 +01:00 committed by Alex Rønne Petersen
parent 42eb1329b1
commit aae5560712
10 changed files with 0 additions and 537 deletions

View file

@ -1,115 +0,0 @@
/*
* memset - fill memory with a constant byte
*
* Copyright (c) 2012-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* Assumptions:
*
* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
*
*/
#define dstin x0
#define val x1
#define valw w1
#define count x2
#define dst x3
#define dstend x4
#define zva_val x5
.global memset
.type memset,%function
memset:
dup v0.16B, valw
add dstend, dstin, count
cmp count, 96
b.hi .Lset_long
cmp count, 16
b.hs .Lset_medium
mov val, v0.D[0]
/* Set 0..15 bytes. */
tbz count, 3, 1f
str val, [dstin]
str val, [dstend, -8]
ret
nop
1: tbz count, 2, 2f
str valw, [dstin]
str valw, [dstend, -4]
ret
2: cbz count, 3f
strb valw, [dstin]
tbz count, 1, 3f
strh valw, [dstend, -2]
3: ret
/* Set 17..96 bytes. */
.Lset_medium:
str q0, [dstin]
tbnz count, 6, .Lset96
str q0, [dstend, -16]
tbz count, 5, 1f
str q0, [dstin, 16]
str q0, [dstend, -32]
1: ret
.p2align 4
/* Set 64..96 bytes. Write 64 bytes from the start and
32 bytes from the end. */
.Lset96:
str q0, [dstin, 16]
stp q0, q0, [dstin, 32]
stp q0, q0, [dstend, -32]
ret
.p2align 4
.Lset_long:
and valw, valw, 255
bic dst, dstin, 15
str q0, [dstin]
cmp count, 160
ccmp valw, 0, 0, hs
b.ne .Lno_zva
#ifndef SKIP_ZVA_CHECK
mrs zva_val, dczid_el0
and zva_val, zva_val, 31
cmp zva_val, 4 /* ZVA size is 64 bytes. */
b.ne .Lno_zva
#endif
str q0, [dst, 16]
stp q0, q0, [dst, 32]
bic dst, dst, 63
sub count, dstend, dst /* Count is now 64 too large. */
sub count, count, 128 /* Adjust count and bias for loop. */
.p2align 4
.Lzva_loop:
add dst, dst, 64
dc zva, dst
subs count, count, 64
b.hi .Lzva_loop
stp q0, q0, [dstend, -64]
stp q0, q0, [dstend, -32]
ret
.Lno_zva:
sub count, dstend, dst /* Count is 16 too large. */
sub dst, dst, 16 /* Dst is biased by -32. */
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
.Lno_zva_loop:
stp q0, q0, [dst, 32]
stp q0, q0, [dst, 64]!
subs count, count, 64
b.hi .Lno_zva_loop
stp q0, q0, [dstend, -64]
stp q0, q0, [dstend, -32]
ret
.size memset,.-memset

View file

@ -1,31 +0,0 @@
.syntax unified
.global __aeabi_memclr8
.global __aeabi_memclr4
.global __aeabi_memclr
.global __aeabi_memset8
.global __aeabi_memset4
.global __aeabi_memset
.type __aeabi_memclr8,%function
.type __aeabi_memclr4,%function
.type __aeabi_memclr,%function
.type __aeabi_memset8,%function
.type __aeabi_memset4,%function
.type __aeabi_memset,%function
__aeabi_memclr8:
__aeabi_memclr4:
__aeabi_memclr:
movs r2, #0
__aeabi_memset8:
__aeabi_memset4:
__aeabi_memset:
cmp r1, #0
beq 2f
adds r1, r0, r1
1: strb r2, [r0]
adds r0, r0, #1
cmp r1, r0
bne 1b
2: bx lr

View file

@ -1,76 +0,0 @@
.global memset
.type memset,@function
memset:
mov 12(%esp),%ecx
cmp $62,%ecx
ja 2f
mov 8(%esp),%dl
mov 4(%esp),%eax
test %ecx,%ecx
jz 1f
mov %dl,%dh
mov %dl,(%eax)
mov %dl,-1(%eax,%ecx)
cmp $2,%ecx
jbe 1f
mov %dx,1(%eax)
mov %dx,(-1-2)(%eax,%ecx)
cmp $6,%ecx
jbe 1f
shl $16,%edx
mov 8(%esp),%dl
mov 8(%esp),%dh
mov %edx,(1+2)(%eax)
mov %edx,(-1-2-4)(%eax,%ecx)
cmp $14,%ecx
jbe 1f
mov %edx,(1+2+4)(%eax)
mov %edx,(1+2+4+4)(%eax)
mov %edx,(-1-2-4-8)(%eax,%ecx)
mov %edx,(-1-2-4-4)(%eax,%ecx)
cmp $30,%ecx
jbe 1f
mov %edx,(1+2+4+8)(%eax)
mov %edx,(1+2+4+8+4)(%eax)
mov %edx,(1+2+4+8+8)(%eax)
mov %edx,(1+2+4+8+12)(%eax)
mov %edx,(-1-2-4-8-16)(%eax,%ecx)
mov %edx,(-1-2-4-8-12)(%eax,%ecx)
mov %edx,(-1-2-4-8-8)(%eax,%ecx)
mov %edx,(-1-2-4-8-4)(%eax,%ecx)
1: ret
2: movzbl 8(%esp),%eax
mov %edi,12(%esp)
imul $0x1010101,%eax
mov 4(%esp),%edi
test $15,%edi
mov %eax,-4(%edi,%ecx)
jnz 2f
1: shr $2, %ecx
rep
stosl
mov 4(%esp),%eax
mov 12(%esp),%edi
ret
2: xor %edx,%edx
sub %edi,%edx
and $15,%edx
mov %eax,(%edi)
mov %eax,4(%edi)
mov %eax,8(%edi)
mov %eax,12(%edi)
sub %edx,%ecx
add %edx,%edi
jmp 1b

View file

@ -1,8 +0,0 @@
#include <string.h>
int memcmp(const void *vl, const void *vr, size_t n)
{
const unsigned char *l=vl, *r=vr;
for (; n && *l == *r; n--, l++, r++);
return n ? *l-*r : 0;
}

View file

@ -1,90 +0,0 @@
#include <string.h>
#include <stdint.h>
void *memset(void *dest, int c, size_t n)
{
unsigned char *s = dest;
size_t k;
/* Fill head and tail with minimal branching. Each
* conditional ensures that all the subsequently used
* offsets are well-defined and in the dest region. */
if (!n) return dest;
s[0] = c;
s[n-1] = c;
if (n <= 2) return dest;
s[1] = c;
s[2] = c;
s[n-2] = c;
s[n-3] = c;
if (n <= 6) return dest;
s[3] = c;
s[n-4] = c;
if (n <= 8) return dest;
/* Advance pointer to align it at a 4-byte boundary,
* and truncate n to a multiple of 4. The previous code
* already took care of any head/tail that get cut off
* by the alignment. */
k = -(uintptr_t)s & 3;
s += k;
n -= k;
n &= -4;
#ifdef __GNUC__
typedef uint32_t __attribute__((__may_alias__)) u32;
typedef uint64_t __attribute__((__may_alias__)) u64;
u32 c32 = ((u32)-1)/255 * (unsigned char)c;
/* In preparation to copy 32 bytes at a time, aligned on
* an 8-byte bounary, fill head/tail up to 28 bytes each.
* As in the initial byte-based head/tail fill, each
* conditional below ensures that the subsequent offsets
* are valid (e.g. !(n<=24) implies n>=28). */
*(u32 *)(s+0) = c32;
*(u32 *)(s+n-4) = c32;
if (n <= 8) return dest;
*(u32 *)(s+4) = c32;
*(u32 *)(s+8) = c32;
*(u32 *)(s+n-12) = c32;
*(u32 *)(s+n-8) = c32;
if (n <= 24) return dest;
*(u32 *)(s+12) = c32;
*(u32 *)(s+16) = c32;
*(u32 *)(s+20) = c32;
*(u32 *)(s+24) = c32;
*(u32 *)(s+n-28) = c32;
*(u32 *)(s+n-24) = c32;
*(u32 *)(s+n-20) = c32;
*(u32 *)(s+n-16) = c32;
/* Align to a multiple of 8 so we can fill 64 bits at a time,
* and avoid writing the same bytes twice as much as is
* practical without introducing additional branching. */
k = 24 + ((uintptr_t)s & 4);
s += k;
n -= k;
/* If this loop is reached, 28 tail bytes have already been
* filled, so any remainder when n drops below 32 can be
* safely ignored. */
u64 c64 = c32 | ((u64)c32 << 32);
for (; n >= 32; n-=32, s+=32) {
*(u64 *)(s+0) = c64;
*(u64 *)(s+8) = c64;
*(u64 *)(s+16) = c64;
*(u64 *)(s+24) = c64;
}
#else
/* Pure C fallback with no aliasing violations. */
for (; n; n--, s++) *s = c;
#endif
return dest;
}

View file

@ -1,72 +0,0 @@
.global memset
.type memset,@function
memset:
movzbq %sil,%rax
mov $0x101010101010101,%r8
imul %r8,%rax
cmp $126,%rdx
ja 2f
test %edx,%edx
jz 1f
mov %sil,(%rdi)
mov %sil,-1(%rdi,%rdx)
cmp $2,%edx
jbe 1f
mov %ax,1(%rdi)
mov %ax,(-1-2)(%rdi,%rdx)
cmp $6,%edx
jbe 1f
mov %eax,(1+2)(%rdi)
mov %eax,(-1-2-4)(%rdi,%rdx)
cmp $14,%edx
jbe 1f
mov %rax,(1+2+4)(%rdi)
mov %rax,(-1-2-4-8)(%rdi,%rdx)
cmp $30,%edx
jbe 1f
mov %rax,(1+2+4+8)(%rdi)
mov %rax,(1+2+4+8+8)(%rdi)
mov %rax,(-1-2-4-8-16)(%rdi,%rdx)
mov %rax,(-1-2-4-8-8)(%rdi,%rdx)
cmp $62,%edx
jbe 1f
mov %rax,(1+2+4+8+16)(%rdi)
mov %rax,(1+2+4+8+16+8)(%rdi)
mov %rax,(1+2+4+8+16+16)(%rdi)
mov %rax,(1+2+4+8+16+24)(%rdi)
mov %rax,(-1-2-4-8-16-32)(%rdi,%rdx)
mov %rax,(-1-2-4-8-16-24)(%rdi,%rdx)
mov %rax,(-1-2-4-8-16-16)(%rdi,%rdx)
mov %rax,(-1-2-4-8-16-8)(%rdi,%rdx)
1: mov %rdi,%rax
ret
2: test $15,%edi
mov %rdi,%r8
mov %rax,-8(%rdi,%rdx)
mov %rdx,%rcx
jnz 2f
1: shr $3,%rcx
rep
stosq
mov %r8,%rax
ret
2: xor %edx,%edx
sub %edi,%edx
and $15,%edx
mov %rax,(%rdi)
mov %rax,8(%rdi)
sub %rdx,%rcx
add %rdx,%rdi
jmp 1b

View file

@ -1,43 +0,0 @@
#include <string.h>
#ifdef __wasm_simd128__
#include <wasm_simd128.h>
#endif
int memcmp(const void *vl, const void *vr, size_t n)
{
#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string)
if (n >= sizeof(v128_t)) {
// memcmp is allowed to read up to n bytes from each object.
// Find the first different character in the objects.
// Unaligned loads handle the case where the objects
// have mismatching alignments.
const v128_t *v1 = (v128_t *)vl;
const v128_t *v2 = (v128_t *)vr;
while (n) {
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(v1), wasm_v128_load(v2));
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(cmp)) {
// Find the offset of the first zero bit (little-endian).
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
const unsigned char *u1 = (unsigned char *)v1 + ctz;
const unsigned char *u2 = (unsigned char *)v2 + ctz;
// This may help the compiler if the function is inlined.
__builtin_assume(*u1 - *u2 != 0);
return *u1 - *u2;
}
// This makes n a multiple of sizeof(v128_t)
// for every iteration except the first.
size_t align = (n - 1) % sizeof(v128_t) + 1;
v1 = (v128_t *)((char *)v1 + align);
v2 = (v128_t *)((char *)v2 + align);
n -= align;
}
return 0;
}
#endif
const unsigned char *l=vl, *r=vr;
for (; n && *l == *r; n--, l++, r++);
return n ? *l-*r : 0;
}

View file

@ -1,94 +0,0 @@
#include <string.h>
#include <stdint.h>
void *memset(void *dest, int c, size_t n)
{
#if defined(__wasm_bulk_memory__)
if (n > BULK_MEMORY_THRESHOLD)
return __builtin_memset(dest, c, n);
#endif
unsigned char *s = dest;
size_t k;
/* Fill head and tail with minimal branching. Each
* conditional ensures that all the subsequently used
* offsets are well-defined and in the dest region. */
if (!n) return dest;
s[0] = c;
s[n-1] = c;
if (n <= 2) return dest;
s[1] = c;
s[2] = c;
s[n-2] = c;
s[n-3] = c;
if (n <= 6) return dest;
s[3] = c;
s[n-4] = c;
if (n <= 8) return dest;
/* Advance pointer to align it at a 4-byte boundary,
* and truncate n to a multiple of 4. The previous code
* already took care of any head/tail that get cut off
* by the alignment. */
k = -(uintptr_t)s & 3;
s += k;
n -= k;
n &= -4;
#ifdef __GNUC__
typedef uint32_t __attribute__((__may_alias__)) u32;
typedef uint64_t __attribute__((__may_alias__)) u64;
u32 c32 = ((u32)-1)/255 * (unsigned char)c;
/* In preparation to copy 32 bytes at a time, aligned on
* an 8-byte bounary, fill head/tail up to 28 bytes each.
* As in the initial byte-based head/tail fill, each
* conditional below ensures that the subsequent offsets
* are valid (e.g. !(n<=24) implies n>=28). */
*(u32 *)(s+0) = c32;
*(u32 *)(s+n-4) = c32;
if (n <= 8) return dest;
*(u32 *)(s+4) = c32;
*(u32 *)(s+8) = c32;
*(u32 *)(s+n-12) = c32;
*(u32 *)(s+n-8) = c32;
if (n <= 24) return dest;
*(u32 *)(s+12) = c32;
*(u32 *)(s+16) = c32;
*(u32 *)(s+20) = c32;
*(u32 *)(s+24) = c32;
*(u32 *)(s+n-28) = c32;
*(u32 *)(s+n-24) = c32;
*(u32 *)(s+n-20) = c32;
*(u32 *)(s+n-16) = c32;
/* Align to a multiple of 8 so we can fill 64 bits at a time,
* and avoid writing the same bytes twice as much as is
* practical without introducing additional branching. */
k = 24 + ((uintptr_t)s & 4);
s += k;
n -= k;
/* If this loop is reached, 28 tail bytes have already been
* filled, so any remainder when n drops below 32 can be
* safely ignored. */
u64 c64 = c32 | ((u64)c32 << 32);
for (; n >= 32; n-=32, s+=32) {
*(u64 *)(s+0) = c64;
*(u64 *)(s+8) = c64;
*(u64 *)(s+16) = c64;
*(u64 *)(s+24) = c64;
}
#else
/* Pure C fallback with no aliasing violations. */
for (; n; n--, s++) *s = c;
#endif
return dest;
}

View file

@ -1786,20 +1786,15 @@ const src_files = [_][]const u8{
"musl/src/stdlib/strtol.c",
"musl/src/stdlib/wcstod.c",
"musl/src/stdlib/wcstol.c",
"musl/src/string/aarch64/memset.S",
"musl/src/string/arm/__aeabi_memset.s",
"musl/src/string/bcmp.c",
"musl/src/string/bcopy.c",
"musl/src/string/explicit_bzero.c",
"musl/src/string/i386/memset.s",
"musl/src/string/index.c",
"musl/src/string/memccpy.c",
"musl/src/string/memchr.c",
"musl/src/string/memcmp.c",
"musl/src/string/memmem.c",
"musl/src/string/mempcpy.c",
"musl/src/string/memrchr.c",
"musl/src/string/memset.c",
"musl/src/string/rindex.c",
"musl/src/string/stpcpy.c",
"musl/src/string/stpncpy.c",
@ -1855,7 +1850,6 @@ const src_files = [_][]const u8{
"musl/src/string/wmemcpy.c",
"musl/src/string/wmemmove.c",
"musl/src/string/wmemset.c",
"musl/src/string/x86_64/memset.s",
"musl/src/temp/mkdtemp.c",
"musl/src/temp/mkostemp.c",
"musl/src/temp/mkostemps.c",

View file

@ -1221,9 +1221,7 @@ const libc_top_half_src_files = [_][]const u8{
"wasi/libc-top-half/musl/src/stdlib/wcstod.c",
"wasi/libc-top-half/musl/src/stdlib/wcstol.c",
"wasi/libc-top-half/musl/src/string/memchr.c",
"wasi/libc-top-half/musl/src/string/memcmp.c",
"wasi/libc-top-half/musl/src/string/memrchr.c",
"wasi/libc-top-half/musl/src/string/memset.c",
"wasi/libc-top-half/musl/src/string/strchrnul.c",
"wasi/libc-top-half/musl/src/thread/pthread_attr_get.c",
"wasi/libc-top-half/musl/src/thread/pthread_attr_setguardsize.c",