Skip to content

[libc] add wmemchr, wcslen, wcschr, wcsrchr, wcspbrk, wcsstr #121183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.time.nanosleep

# wchar.h entrypoints
libc.src.wchar.wcsstr
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
libc.src.wchar.wcschr
libc.src.wchar.wcslen
libc.src.wchar.wmemchr
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sort alphabetically, here and below

libc.src.wchar.wctob

# locale.h entrypoints
Expand Down
6 changes: 6 additions & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,12 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write

# wchar.h entrypoints
libc.src.wchar.wcsstr
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
libc.src.wchar.wcschr
libc.src.wchar.wcslen
libc.src.wchar.wmemchr
libc.src.wchar.wctob
)

Expand Down
6 changes: 6 additions & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,12 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write

# wchar.h entrypoints
libc.src.wchar.wcsstr
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
libc.src.wchar.wcschr
libc.src.wchar.wcslen
libc.src.wchar.wmemchr
libc.src.wchar.wctob
)

Expand Down
6 changes: 6 additions & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,12 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write

# wchar.h entrypoints
libc.src.wchar.wcsstr
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
libc.src.wchar.wcschr
libc.src.wchar.wcslen
libc.src.wchar.wmemchr
libc.src.wchar.wctob
libc.src.wchar.btowc
)
Expand Down
42 changes: 42 additions & 0 deletions libc/hdrgen/yaml/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,45 @@ functions:
return_type: int
arguments:
- type: wint_t
- name: wmemchr
standards:
- stdc
return_type: const wchar_t *
arguments:
- type: const wchar_t *
- type: wchar_t
- type: size_t
- name: wcslen
standards:
- stdc
return_type: size_t
arguments:
- type: const wchar_t *
- name: wcschr
standards:
- stdc
return_type: const wchar_t *
arguments:
- type: const wchar_t *
- type: wchar_t
- name: wcsrchr
standards:
- stdc
return_type: const wchar_t *
arguments:
- type: const wchar_t *
- type: wchar_t
- name: wcspbrk
standards:
- stdc
return_type: const wchar_t *
arguments:
- type: const wchar_t *
- type: const wchar_t *
- name: wcsstr
standards:
- stdc
return_type: const wchar_t *
arguments:
- type: const wchar_t *
- type: const wchar_t *
69 changes: 69 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,72 @@ add_entrypoint_object(
libc.hdr.wchar_macros
libc.src.__support.wctype_utils
)

add_entrypoint_object(
wmemchr
SRCS
wmemchr.cpp
HDRS
wmemchr.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.wctype_utils
)

add_entrypoint_object(
wcslen
SRCS
wcslen.cpp
HDRS
wcslen.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.wctype_utils
)

add_entrypoint_object(
wcschr
SRCS
wcschr.cpp
HDRS
wcschr.h
DEPENDS
.wcslen
.wmemchr
libc.hdr.types.wchar_t
)

add_entrypoint_object(
wcsrchr
SRCS
wcsrchr.cpp
HDRS
wcsrchr.h
DEPENDS
.wcslen
libc.hdr.types.wchar_t
)

add_entrypoint_object(
wcspbrk
SRCS
wcspbrk.cpp
HDRS
wcspbrk.h
DEPENDS
.wcslen
libc.hdr.types.wchar_t
)

add_entrypoint_object(
wcsstr
SRCS
wcsstr.cpp
HDRS
wcsstr.h
DEPENDS
.wcslen
libc.hdr.types.wchar_t
)
21 changes: 21 additions & 0 deletions libc/src/wchar/wcschr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation of wcschr ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcschr.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "wcslen.h"
#include "wmemchr.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) {
return wmemchr(s, c, wcslen(s));
}

} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/wchar/wcschr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header for wmemchr -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCSCHR_H
#define LLVM_LIBC_SRC_WCHAR_WCSCHR_H

#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

const wchar_t *wcschr(const wchar_t *s, wchar_t c);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCSCHR_H
22 changes: 22 additions & 0 deletions libc/src/wchar/wcslen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Implementation of wcslen ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcslen.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, wcslen, (const wchar_t *s)) {
size_t length = 0;
while (s[length++])
;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't actually know what's special about wchars, is this significantly different from the existing byte-by-byte implementation in string_utils.h? We probably want wide string utils or something.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wchar_t has conically been 2B on Windows and 4B on Unixes, so we can read more than one byte at time all by having the vanilla implementation as written here.


This impl has an off by one bug in it; length will get post incremented regardless of whether s[length] is truthy or not. So my repost will look slightly different.

Copy link
Contributor

@jhuber6 jhuber6 Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, we could possibly just template the implementation instead of using char *, that way the pointer arithmetic work work I'd think.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the template implementation idea, some sort of generic string type that both char strings and wide-char strings can utilize.

return length;
}

} // namespace LIBC_NAMESPACE_DECL
22 changes: 22 additions & 0 deletions libc/src/wchar/wcslen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Implementation header for wcslen ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCSLEN_H
#define LLVM_LIBC_SRC_WCHAR_WCSLEN_H

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t wcslen(const wchar_t *s);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCSLEN_H
37 changes: 37 additions & 0 deletions libc/src/wchar/wcspbrk.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===-- Implementation of wcspbrk -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcspbrk.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "wcslen.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
(const wchar_t *wcs, const wchar_t *accept)) {
size_t n_accept = wcslen(accept);

for (size_t i = 0; i < wcslen(wcs); i++) {
Copy link
Member

@nickdesaulniers nickdesaulniers Jan 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can implement wcspbrk in terms of wcslen; what if the users has configured llvm-libc to not include wcslen? wcsrchr also has this issue, maybe others.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the LLVM libc's written in such a way that entrypoints should be independent from eachother. The correct way to do this is to move the function into an internal utility header and then make both call that instead.

bool accepted = true;

for (size_t x = 0; x < n_accept; i++) {
if (wcs[i] != accept[x]) {
accepted = false;
break;
}
}

if (!accepted)
continue;
return &wcs[i];
}
return nullptr;
}

} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/wchar/wcspbrk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header for wcspbrk -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
#define LLVM_LIBC_SRC_WCHAR_WCSPBRK_H

#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

const wchar_t *wcspbrk(const wchar_t *wcs, const wchar_t *accept);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
26 changes: 26 additions & 0 deletions libc/src/wchar/wcsrchr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//===-- Implementation of wcsrchr
//------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcsrchr.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "wcslen.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(const wchar_t *, wcsrchr, (const wchar_t *s, wchar_t c)) {
size_t length = wcslen(s);
for (size_t i = 0; i < length; i++) {
if (s[length - i] == c)
return &s[length - i];
}
return nullptr;
}

} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/wchar/wcsrchr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header for wcsrchr -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCSRCHR_H
#define LLVM_LIBC_SRC_WCHAR_WCSRCHR_H

#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

const wchar_t *wcsrchr(const wchar_t *s, wchar_t c);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCSRCHR_H
Loading
Loading