Skip to content

[libc] Implemented wcrtomb internal function and public libc function #144596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,7 @@ if(LLVM_LIBC_FULL_BUILD)

# wchar.h entrypoints
libc.src.wchar.mbrtowc
libc.src.wchar.wcrtomb
)
endif()

Expand Down
8 changes: 8 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ functions:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- type: size_t
- name: wcrtomb
standards:
- stdc
return_type: size_t
arguments:
- type: char *__restrict
- type: wchar_t
- type: mbstate_t *__restrict
- name: wcscpy
standards:
- stdc
Expand Down
16 changes: 16 additions & 0 deletions libc/src/__support/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,22 @@ add_object_library(
.mbstate
)

add_object_library(
wcrtomb
HDRS
wcrtomb.h
SRCS
wcrtomb.cpp
DEPENDS
libc.hdr.types.char32_t
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.error_or
libc.src.__support.common
.character_converter
.mbstate
)

add_object_library(
mbrtowc
HDRS
Expand Down
49 changes: 49 additions & 0 deletions libc/src/__support/wchar/wcrtomb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//===-- Implementation of wcrtomb -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/wchar/wcrtomb.h"
#include "src/__support/error_or.h"
#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/mbstate.h"

#include "hdr/types/char32_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_assert.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
mbstate *__restrict ps) {
static_assert(sizeof(wchar_t) == 4);

CharacterConverter cr(ps);

if (s == nullptr)
return Error(-1);

int status = cr.push(static_cast<char32_t>(wc));
if (status != 0)
return Error(status);

size_t count = 0;
while (!cr.isEmpty()) {
auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
LIBC_ASSERT(utf8.has_value());

*s = utf8.value();
s++;
count++;
}
return count;
}

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
26 changes: 26 additions & 0 deletions libc/src/__support/wchar/wcrtomb.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//===-- Implementation header for wcrtomb ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps);

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
14 changes: 14 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)

add_entrypoint_object(
wcrtomb
SRCS
wcrtomb.cpp
HDRS
wcrtomb.h
DEPENDS
libc.hdr.types.wchar_t
libc.hdr.types.mbstate_t
libc.src.__support.libc_errno
libc.src.__support.wchar.wcrtomb
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
mbrtowc
SRCS
Expand Down
45 changes: 45 additions & 0 deletions libc/src/wchar/wcrtomb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//===-- Implementation of wcrtomb -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcrtomb.h"

#include "hdr/types/mbstate_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/wcrtomb.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, wcrtomb,
(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;

// when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
char buf[sizeof(wchar_t) / sizeof(char)];
if (s == nullptr) {
s = buf;
wc = L'\0';
}

auto result = internal::wcrtomb(
s, wc,
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));

if (!result.has_value()) {
libc_errno = EILSEQ;
return -1;
}

return result.value();
}

} // namespace LIBC_NAMESPACE_DECL
23 changes: 23 additions & 0 deletions libc/src/wchar/wcrtomb.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- Implementation header for wcrtomb -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
#define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
14 changes: 14 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ add_libc_test(
libc.src.wchar.wctob
)

add_libc_test(
wcrtomb_test
SUITE
libc_wchar_unittests
SRCS
wcrtomb_test.cpp
DEPENDS
libc.src.wchar.wcrtomb
libc.src.string.memset
libc.hdr.types.wchar_t
libc.hdr.types.mbstate_t
libc.src.__support.libc_errno
)

add_libc_test(
wmemset_test
SUITE
Expand Down
93 changes: 93 additions & 0 deletions libc/test/src/wchar/wcrtomb_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//===-- Unittests for wcrtomb --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hdr/types/mbstate_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
#include "src/string/memset.h"
#include "src/wchar/wcrtomb.h"
#include "test/UnitTest/Test.h"

TEST(LlvmLibcWCRToMBTest, OneByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'U';
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(1));
ASSERT_EQ(mb[0], 'U');
}

TEST(LlvmLibcWCRToMBTest, TwoByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xff -> utf8: 0xc3 0xbf
wchar_t wc = 0xff;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(2));
ASSERT_EQ(mb[0], static_cast<char>(0xc3));
ASSERT_EQ(mb[1], static_cast<char>(0xbf));
}

TEST(LlvmLibcWCRToMBTest, ThreeByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
wchar_t wc = 0xac15;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(3));
ASSERT_EQ(mb[0], static_cast<char>(0xea));
ASSERT_EQ(mb[1], static_cast<char>(0xb0));
ASSERT_EQ(mb[2], static_cast<char>(0x95));
}

TEST(LlvmLibcWCRToMBTest, FourByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
wchar_t wc = 0x1f921;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(4));
ASSERT_EQ(mb[0], static_cast<char>(0xf0));
ASSERT_EQ(mb[1], static_cast<char>(0x9f));
ASSERT_EQ(mb[2], static_cast<char>(0xa4));
ASSERT_EQ(mb[3], static_cast<char>(0xa1));
}

TEST(LlvmLibcWCRToMBTest, NullString) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'A';
char mb[4];

// should be equivalent to the call wcrtomb(buf, L'\0', state)
size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state);
size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state);

ASSERT_EQ(cnt1, cnt2);
}

TEST(LlvmLibcWCRToMBTest, NullState) {
wchar_t wc = L'A';
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr);
ASSERT_EQ(cnt, static_cast<size_t>(1));
}

TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = 0x12ffff;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(-1));
ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
}
Loading