From a8173a11e5d61d524a1ce64768536b4e1da9b4ac Mon Sep 17 00:00:00 2001 From: Tristan Ross Date: Thu, 23 Jan 2025 09:00:40 -0800 Subject: [PATCH 1/5] [libc][wchar] implement wcslen Add internal helper, which may be reusable when implementing wmemchr, wcspbrk, wcsrchr, wcsstr. Link: #121183 Link: #124027 Co-authored-by: Nick Desaulniers --- libc/config/gpu/amdgpu/entrypoints.txt | 1 + libc/config/gpu/nvptx/entrypoints.txt | 1 + libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 3 ++- libc/include/wchar.yaml | 6 +++++ libc/src/wchar/CMakeLists.txt | 17 +++++++++++++ libc/src/wchar/wcslen.cpp | 23 ++++++++++++++++++ libc/src/wchar/wcslen.h | 22 +++++++++++++++++ libc/src/wchar/wide_string_utils.h | 29 +++++++++++++++++++++++ libc/test/src/wchar/CMakeLists.txt | 12 ++++++++++ libc/test/src/wchar/wcslen_test.cpp | 26 ++++++++++++++++++++ 12 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 libc/src/wchar/wcslen.cpp create mode 100644 libc/src/wchar/wcslen.h create mode 100644 libc/src/wchar/wide_string_utils.h create mode 100644 libc/test/src/wchar/wcslen_test.cpp diff --git a/libc/config/gpu/amdgpu/entrypoints.txt b/libc/config/gpu/amdgpu/entrypoints.txt index 7a1982808dfeb..756b2cdc7496e 100644 --- a/libc/config/gpu/amdgpu/entrypoints.txt +++ b/libc/config/gpu/amdgpu/entrypoints.txt @@ -261,6 +261,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.time.nanosleep # wchar.h entrypoints + libc.src.wchar.wcslen libc.src.wchar.wctob # locale.h entrypoints diff --git a/libc/config/gpu/nvptx/entrypoints.txt b/libc/config/gpu/nvptx/entrypoints.txt index 059dc9b20d6dd..6b25dae158cc9 100644 --- a/libc/config/gpu/nvptx/entrypoints.txt +++ b/libc/config/gpu/nvptx/entrypoints.txt @@ -261,6 +261,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.time.nanosleep # wchar.h entrypoints + libc.src.wchar.wcslen libc.src.wchar.wctob # locale.h entrypoints diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index f5ba341411768..8bf47fa952cd9 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -350,6 +350,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.write # wchar.h entrypoints + libc.src.wchar.wcslen libc.src.wchar.wctob # sys/uio.h entrypoints diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 49a8d61b93802..f9ab28c2598d5 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -346,6 +346,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.write # wchar.h entrypoints + libc.src.wchar.wcslen libc.src.wchar.wctob ) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 0c1ae9561a7e6..3db9a911c59fe 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -349,8 +349,9 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.write # wchar.h entrypoints - libc.src.wchar.wctob libc.src.wchar.btowc + libc.src.wchar.wcslen + libc.src.wchar.wctob # sys/uio.h entrypoints libc.src.sys.uio.writev diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 27a5926b57455..5bbf8064c713c 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -9,6 +9,12 @@ types: enums: [] objects: [] functions: + - name: wcslen + standards: + - stdc + return_type: size_t + arguments: + - type: const wchar_t * - name: wctob standards: - stdc diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index d4c98ea527a8f..930c20ca67398 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -1,3 +1,20 @@ +add_header_library( + wide_string_utils + HDRS + wide_string_utils.h +) + +add_entrypoint_object( + wcslen + SRCS + wcslen.cpp + HDRS + wcslen.h + DEPENDS + .wide_string_utils + libc.hdr.types.size_t + libc.hdr.types.wchar_t +) add_entrypoint_object( wctob diff --git a/libc/src/wchar/wcslen.cpp b/libc/src/wchar/wcslen.cpp new file mode 100644 index 0000000000000..7d0d8cdf872e8 --- /dev/null +++ b/libc/src/wchar/wcslen.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of wcslen ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcslen.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/wchar/wide_string_utils.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcslen, (const wchar_t *src)) { + return internal::wide_string_length(src); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcslen.h b/libc/src/wchar/wcslen.h new file mode 100644 index 0000000000000..7c022533e9b49 --- /dev/null +++ b/libc/src/wchar/wcslen.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wcslen ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSLEN_H +#define LLVM_LIBC_SRC_WCHAR_WCSLEN_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcslen (const wchar_t *src); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSLEN_H diff --git a/libc/src/wchar/wide_string_utils.h b/libc/src/wchar/wide_string_utils.h new file mode 100644 index 0000000000000..dba01d2885694 --- /dev/null +++ b/libc/src/wchar/wide_string_utils.h @@ -0,0 +1,29 @@ +//===-- Wide String utils -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WIDE_STRING_UTILS_H +#define LLVM_LIBC_SRC_WCHAR_WIDE_STRING_UTILS_H + +#include "src/__support/macros/config.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +LIBC_INLINE size_t wide_string_length(const wchar_t *src) { + const wchar_t *cpy = src; + while (*cpy) + ++cpy; + return cpy - src; +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WIDE_STRING_UTILS_H diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 3cc404b9c86fc..d41e328fc9d90 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -1,5 +1,17 @@ add_custom_target(libc_wchar_unittests) +add_libc_test( + wcslen_test + SUITE + libc_wchar_unittests + SRCS + wcslen_test.cpp + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.wchar.wcslen +) + add_libc_test( btowc_test SUITE diff --git a/libc/test/src/wchar/wcslen_test.cpp b/libc/test/src/wchar/wcslen_test.cpp new file mode 100644 index 0000000000000..fe975cea592f6 --- /dev/null +++ b/libc/test/src/wchar/wcslen_test.cpp @@ -0,0 +1,26 @@ +//===-- Unittests for wcslen ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcslen.h" +#include "hdr/types/wchar_t.h" +#include "hdr/types/size_t.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcWCSLenTest, EmptyString) { + const wchar_t *empty = L""; + + size_t result = LIBC_NAMESPACE::wcslen(empty); + ASSERT_EQ(size_t{0}, result); +} + +TEST(LlvmLibcWCSLenTest, AnyString) { + const wchar_t *any = L"Hello World!"; + + size_t result = LIBC_NAMESPACE::wcslen(any); + ASSERT_EQ(size_t{12}, result); +} From 6f386abea8bc1dfdaf6c3dde985de364410efb1c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 23 Jan 2025 09:27:33 -0800 Subject: [PATCH 2/5] remove wide_string_utils, reuse string_length_byte_read (renamed) --- libc/src/string/string_utils.h | 5 +++-- libc/src/wchar/CMakeLists.txt | 8 +------- libc/src/wchar/wcslen.cpp | 4 ++-- libc/src/wchar/wide_string_utils.h | 29 ----------------------------- 4 files changed, 6 insertions(+), 40 deletions(-) delete mode 100644 libc/src/wchar/wide_string_utils.h diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index fc617bd18e8f6..ad47fc9d706cc 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -79,7 +79,8 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) { return char_ptr - src; } -LIBC_INLINE size_t string_length_byte_read(const char *src) { +template +LIBC_INLINE size_t string_length_trivial(const T *src) { size_t length; for (length = 0; *src; ++src, ++length) ; @@ -96,7 +97,7 @@ LIBC_INLINE size_t string_length(const char *src) { // string a block at a time. return string_length_wide_read(src); #else - return string_length_byte_read(src); + return string_length_trivial(src); #endif } diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 930c20ca67398..703db75b5b194 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -1,9 +1,3 @@ -add_header_library( - wide_string_utils - HDRS - wide_string_utils.h -) - add_entrypoint_object( wcslen SRCS @@ -11,9 +5,9 @@ add_entrypoint_object( HDRS wcslen.h DEPENDS - .wide_string_utils libc.hdr.types.size_t libc.hdr.types.wchar_t + libc.src.string.string_utils ) add_entrypoint_object( diff --git a/libc/src/wchar/wcslen.cpp b/libc/src/wchar/wcslen.cpp index 7d0d8cdf872e8..97ab65454a730 100644 --- a/libc/src/wchar/wcslen.cpp +++ b/libc/src/wchar/wcslen.cpp @@ -12,12 +12,12 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "src/wchar/wide_string_utils.h" +#include "src/string/string_utils.h" // string_length_trivial namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcslen, (const wchar_t *src)) { - return internal::wide_string_length(src); + return internal::string_length_trivial(src); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wide_string_utils.h b/libc/src/wchar/wide_string_utils.h deleted file mode 100644 index dba01d2885694..0000000000000 --- a/libc/src/wchar/wide_string_utils.h +++ /dev/null @@ -1,29 +0,0 @@ -//===-- Wide String utils -------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_WCHAR_WIDE_STRING_UTILS_H -#define LLVM_LIBC_SRC_WCHAR_WIDE_STRING_UTILS_H - -#include "src/__support/macros/config.h" -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" - -namespace LIBC_NAMESPACE_DECL { -namespace internal { - -LIBC_INLINE size_t wide_string_length(const wchar_t *src) { - const wchar_t *cpy = src; - while (*cpy) - ++cpy; - return cpy - src; -} - -} // namespace internal -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_WCHAR_WIDE_STRING_UTILS_H From b0b7b9affafa8377855e4815f8b512a28761077c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 23 Jan 2025 09:32:40 -0800 Subject: [PATCH 3/5] make tests shorter --- libc/test/src/wchar/wcslen_test.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/libc/test/src/wchar/wcslen_test.cpp b/libc/test/src/wchar/wcslen_test.cpp index fe975cea592f6..1610562d188f7 100644 --- a/libc/test/src/wchar/wcslen_test.cpp +++ b/libc/test/src/wchar/wcslen_test.cpp @@ -12,15 +12,9 @@ #include "test/UnitTest/Test.h" TEST(LlvmLibcWCSLenTest, EmptyString) { - const wchar_t *empty = L""; - - size_t result = LIBC_NAMESPACE::wcslen(empty); - ASSERT_EQ(size_t{0}, result); + ASSERT_EQ(size_t{0}, LIBC_NAMESPACE::wcslen(L"")); } TEST(LlvmLibcWCSLenTest, AnyString) { - const wchar_t *any = L"Hello World!"; - - size_t result = LIBC_NAMESPACE::wcslen(any); - ASSERT_EQ(size_t{12}, result); + ASSERT_EQ(size_t{12}, LIBC_NAMESPACE::wcslen(L"Hello World!")); } From 4f34e6452da349c47c99807f6ded57733502e0bd Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 23 Jan 2025 09:41:49 -0800 Subject: [PATCH 4/5] just use string_length --- libc/src/string/CMakeLists.txt | 4 +++- libc/src/string/string_utils.h | 22 ++++++++++------------ libc/src/wchar/wcslen.cpp | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index e3faa543e630c..2c607bf8ea895 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -17,9 +17,11 @@ add_header_library( DEPENDS .memory_utils.inline_bzero .memory_utils.inline_memcpy + libc.hdr.types.size_t libc.include.stdlib - libc.src.__support.common libc.src.__support.CPP.bitset + libc.src.__support.CPP.type_traits + libc.src.__support.common ${string_config_options} ) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index ad47fc9d706cc..d4bb72c7daadd 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -14,12 +14,13 @@ #ifndef LLVM_LIBC_SRC_STRING_STRING_UTILS_H #define LLVM_LIBC_SRC_STRING_STRING_UTILS_H +#include "hdr/types/size_t.h" #include "src/__support/CPP/bitset.h" +#include "src/__support/CPP/type_traits.h" // cpp::is_same_v #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/string/memory_utils/inline_bzero.h" #include "src/string/memory_utils/inline_memcpy.h" -#include // For size_t namespace LIBC_NAMESPACE_DECL { namespace internal { @@ -79,25 +80,22 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) { return char_ptr - src; } -template -LIBC_INLINE size_t string_length_trivial(const T *src) { - size_t length; - for (length = 0; *src; ++src, ++length) - ; - return length; -} - // Returns the length of a string, denoted by the first occurrence // of a null terminator. -LIBC_INLINE size_t string_length(const char *src) { +template +LIBC_INLINE size_t string_length(const T *src) { #ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ // Unsigned int is the default size for most processors, and on x86-64 it // performs better than larger sizes when the src pointer can't be assumed to // be aligned to a word boundary, so it's the size we use for reading the // string a block at a time. - return string_length_wide_read(src); + if constexpr (cpp::is_same_v) + return string_length_wide_read(src); #else - return string_length_trivial(src); + size_t length; + for (length = 0; *src; ++src, ++length) + ; + return length; #endif } diff --git a/libc/src/wchar/wcslen.cpp b/libc/src/wchar/wcslen.cpp index 97ab65454a730..5889e27f9729e 100644 --- a/libc/src/wchar/wcslen.cpp +++ b/libc/src/wchar/wcslen.cpp @@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcslen, (const wchar_t *src)) { - return internal::string_length_trivial(src); + return internal::string_length(src); } } // namespace LIBC_NAMESPACE_DECL From 6c7f5fdd4c1f9dedf355f3ab7887ce20d0ad118c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 23 Jan 2025 09:42:38 -0800 Subject: [PATCH 5/5] format --- libc/src/string/string_utils.h | 3 +-- libc/src/wchar/wcslen.h | 2 +- libc/test/src/wchar/wcslen_test.cpp | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index d4bb72c7daadd..583d35014d398 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -82,8 +82,7 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) { // Returns the length of a string, denoted by the first occurrence // of a null terminator. -template -LIBC_INLINE size_t string_length(const T *src) { +template LIBC_INLINE size_t string_length(const T *src) { #ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ // Unsigned int is the default size for most processors, and on x86-64 it // performs better than larger sizes when the src pointer can't be assumed to diff --git a/libc/src/wchar/wcslen.h b/libc/src/wchar/wcslen.h index 7c022533e9b49..8b2e7f50b007e 100644 --- a/libc/src/wchar/wcslen.h +++ b/libc/src/wchar/wcslen.h @@ -15,7 +15,7 @@ namespace LIBC_NAMESPACE_DECL { -size_t wcslen (const wchar_t *src); +size_t wcslen(const wchar_t *src); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/wchar/wcslen_test.cpp b/libc/test/src/wchar/wcslen_test.cpp index 1610562d188f7..9cf446564c07e 100644 --- a/libc/test/src/wchar/wcslen_test.cpp +++ b/libc/test/src/wchar/wcslen_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "src/wchar/wcslen.h" -#include "hdr/types/wchar_t.h" #include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/wchar/wcslen.h" #include "test/UnitTest/Test.h" TEST(LlvmLibcWCSLenTest, EmptyString) {