Skip to content

[libc][wchar] implement wcslen #124150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/gpu/amdgpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.time.nanosleep

# wchar.h entrypoints
libc.src.wchar.wcslen
libc.src.wchar.wctob

# locale.h entrypoints
Expand Down
1 change: 1 addition & 0 deletions libc/config/gpu/nvptx/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.time.nanosleep

# wchar.h entrypoints
libc.src.wchar.wcslen
libc.src.wchar.wctob

# locale.h entrypoints
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write

# wchar.h entrypoints
libc.src.wchar.wcslen
libc.src.wchar.wctob

# sys/uio.h entrypoints
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write

# wchar.h entrypoints
libc.src.wchar.wcslen
libc.src.wchar.wctob
)

Expand Down
3 changes: 2 additions & 1 deletion libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,9 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write

# wchar.h entrypoints
libc.src.wchar.wctob
libc.src.wchar.btowc
libc.src.wchar.wcslen
libc.src.wchar.wctob

# sys/uio.h entrypoints
libc.src.sys.uio.writev
Expand Down
6 changes: 6 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ types:
enums: []
objects: []
functions:
- name: wcslen
standards:
- stdc
return_type: size_t
arguments:
- type: const wchar_t *
- name: wctob
standards:
- stdc
Expand Down
4 changes: 3 additions & 1 deletion libc/src/string/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ add_header_library(
DEPENDS
.memory_utils.inline_bzero
.memory_utils.inline_memcpy
libc.hdr.types.size_t
libc.include.stdlib
libc.src.__support.common
libc.src.__support.CPP.bitset
libc.src.__support.CPP.type_traits
libc.src.__support.common
${string_config_options}
)

Expand Down
20 changes: 9 additions & 11 deletions libc/src/string/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
#ifndef LLVM_LIBC_SRC_STRING_STRING_UTILS_H
#define LLVM_LIBC_SRC_STRING_STRING_UTILS_H

#include "hdr/types/size_t.h"
#include "src/__support/CPP/bitset.h"
#include "src/__support/CPP/type_traits.h" // cpp::is_same_v
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/string/memory_utils/inline_bzero.h"
#include "src/string/memory_utils/inline_memcpy.h"
#include <stddef.h> // For size_t

namespace LIBC_NAMESPACE_DECL {
namespace internal {
Expand Down Expand Up @@ -79,24 +80,21 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
return char_ptr - src;
}

LIBC_INLINE size_t string_length_byte_read(const char *src) {
size_t length;
for (length = 0; *src; ++src, ++length)
;
return length;
}

// Returns the length of a string, denoted by the first occurrence
// of a null terminator.
LIBC_INLINE size_t string_length(const char *src) {
template <typename T> LIBC_INLINE size_t string_length(const T *src) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's what I was imagining:

template <typename T> LIBC_INLINE size_t string_length(const T *src) {
  size_t length;
  for (length = 0; *src; ++src, ++length)
    ;
  return length;
}

template <char> LIBC_INLINE size_t string_length(const char *src){
#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
  // Unsigned int is the default size for most processors, and on x86-64 it
  // performs better than larger sizes when the src pointer can't be assumed to
  // be aligned to a word boundary, so it's the size we use for reading the
  // string a block at a time.
  return string_length_wide_read<unsigned int>(src);
#else
  size_t length;
  for (length = 0; *src; ++src, ++length)
    ;
  return length;
#endif
}

The reason is to avoid issues around passing incorrect types to string_length_wide_read, I think if constexpr is evaluated late enough in the process you might get warnings about it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The duplication between the two versions (for loop) isn't as DRY as I prefer; I don't think the above approach can be simplified to have the char specialized version of string_length call the non-specialized version.


The reason is to avoid issues around passing incorrect types to string_length_wide_read

The parameter of string_length_wide_read isn't the template variable; the if constexpr statement as written compiles without warning. https://godbolt.org/z/ajYhWbddd


Now that I am pulling in cpp::type_traits, I could add some SFINAE to string_length so that it can only be called with char* and wchar_t*, if you'd like?

I don't feel strongly about any of the above, but please let me know what you prefer. I'll wait to land this until then.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at it written out, it's probably overcomplicating things to add the template specialization just to avoid the if constexpr. If it compiles without warnings, it's probably fine.

I don't think we really need to add the SFINAE to limit the template. In practice I expect it'll be used for char, wchar_t, and maybe unsigned char, but it will behave as expected for any other integer type. Adding the check seems like unnecessary complexity for little practical benefit.

#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
// Unsigned int is the default size for most processors, and on x86-64 it
// performs better than larger sizes when the src pointer can't be assumed to
// be aligned to a word boundary, so it's the size we use for reading the
// string a block at a time.
return string_length_wide_read<unsigned int>(src);
if constexpr (cpp::is_same_v<T, char>)
return string_length_wide_read<unsigned int>(src);
#else
return string_length_byte_read(src);
size_t length;
for (length = 0; *src; ++src, ++length)
;
return length;
Comment on lines +94 to +97
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it make sense to keep string_length_byte_read separate and template that instead of templating the whole string_length function?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a reason why we wouldn't want internal::string_length to work on wide char strings and normal strings.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, reading back through the comments I see you already tried this. In the case would it make sense to have a generic template for string_length that just calls a templated string_length_byte_read, then a specialization for char with the wide read variant?

#endif
}

Expand Down
11 changes: 11 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
add_entrypoint_object(
wcslen
SRCS
wcslen.cpp
HDRS
wcslen.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.string.string_utils
)

add_entrypoint_object(
wctob
Expand Down
23 changes: 23 additions & 0 deletions libc/src/wchar/wcslen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- Implementation of wcslen ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcslen.h"

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/string/string_utils.h" // string_length_trivial

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, wcslen, (const wchar_t *src)) {
return internal::string_length(src);
}

} // namespace LIBC_NAMESPACE_DECL
22 changes: 22 additions & 0 deletions libc/src/wchar/wcslen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Implementation header for wcslen ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCSLEN_H
#define LLVM_LIBC_SRC_WCHAR_WCSLEN_H

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t wcslen(const wchar_t *src);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCSLEN_H
12 changes: 12 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
add_custom_target(libc_wchar_unittests)

add_libc_test(
wcslen_test
SUITE
libc_wchar_unittests
SRCS
wcslen_test.cpp
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.wchar.wcslen
)

add_libc_test(
btowc_test
SUITE
Expand Down
20 changes: 20 additions & 0 deletions libc/test/src/wchar/wcslen_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===-- Unittests for wcslen ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/wchar/wcslen.h"
#include "test/UnitTest/Test.h"

TEST(LlvmLibcWCSLenTest, EmptyString) {
ASSERT_EQ(size_t{0}, LIBC_NAMESPACE::wcslen(L""));
}

TEST(LlvmLibcWCSLenTest, AnyString) {
ASSERT_EQ(size_t{12}, LIBC_NAMESPACE::wcslen(L"Hello World!"));
}
Loading