Skip to content

Commit e464684

Browse files
authored
[Clang] Allow raw string literals in C as an extension (#88265)
This enables raw R"" string literals in C in some language modes and adds an option to disable or enable them explicitly as an extension. Background: GCC supports raw string literals in C in `-gnuXY` modes starting with gnu99. This pr both enables raw string literals in gnu99 mode and later in C and adds an `-f[no-]raw-string-literals` flag to override this behaviour. The decision not to enable raw string literals in gnu89 mode, according to the GCC devs, is intentional as that mode is supposed to be used for ‘old code’ that they don’t want to break; we’ve decided to match GCC’s behaviour here as well. The `-fraw-string-literals` flag can additionally be used to enable raw string literals in modes where they aren’t enabled by default (such as c99—as opposed to gnu99—or even e.g. C++03); conversely, the negated flag can be used to disable them in any gnuXY modes that *do* provide them by default, or to override a previous flag. However, we do *not* support disabling raw string literals (or indeed either of these two options) in C++11 mode and later, because we don’t want to just start supporting disabling features that are actually part of the language in the general case. This fixes #85703.
1 parent 397daea commit e464684

File tree

13 files changed

+110
-9
lines changed

13 files changed

+110
-9
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ code bases.
4343
C/C++ Language Potentially Breaking Changes
4444
-------------------------------------------
4545

46+
- Clang now supports raw string literals in ``-std=gnuXY`` mode as an extension in
47+
C99 and later. This behaviour can also be overridden using ``-f[no-]raw-string-literals``.
48+
Support of raw string literals in C++ is not affected. Fixes (#GH85703).
49+
4650
C++ Specific Potentially Breaking Changes
4751
-----------------------------------------
4852
- Clang now diagnoses function/variable templates that shadow their own template parameters, e.g. ``template<class T> void T();``.

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,9 @@ def err_drv_negative_columns : Error<
375375
"invalid value '%1' in '%0', value must be 'none' or a positive integer">;
376376
def err_drv_small_columns : Error<
377377
"invalid value '%1' in '%0', value must be '%2' or greater">;
378+
def warn_drv_fraw_string_literals_in_cxx11 : Warning<
379+
"ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">,
380+
InGroup<UnusedCommandLineArgument>;
378381

379382
def err_drv_invalid_malign_branch_EQ : Error<
380383
"invalid argument '%0' to -malign-branch=; each element must be one of: %1">;

clang/include/clang/Basic/LangOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type")
465465

466466
LANGOPT(CXXAssumptions, 1, 1, "Enable or disable codegen and compile-time checks for C++23's [[assume]] attribute")
467467

468+
LANGOPT(RawStringLiterals, 1, 1, "Enable or disable raw string literals")
469+
468470
ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
469471
StrictFlexArraysLevelKind::Default,
470472
"Rely on strict definition of flexible arrays")

clang/include/clang/Basic/LangStandard.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,13 @@ struct LangStandard {
134134
/// hasDigraphs - Language supports digraphs.
135135
bool hasDigraphs() const { return Flags & Digraphs; }
136136

137+
/// hasRawStringLiterals - Language supports R"()" raw string literals.
138+
bool hasRawStringLiterals() const {
139+
// GCC supports raw string literals in C99 and later, but not in C++
140+
// before C++11.
141+
return isCPlusPlus11() || (!isCPlusPlus() && isC99() && isGNUMode());
142+
}
143+
137144
/// isGNUMode - Language includes GNU extensions.
138145
bool isGNUMode() const { return Flags & GNUMode; }
139146

clang/include/clang/Driver/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4240,6 +4240,12 @@ def fenable_matrix : Flag<["-"], "fenable-matrix">, Group<f_Group>,
42404240
HelpText<"Enable matrix data type and related builtin functions">,
42414241
MarshallingInfoFlag<LangOpts<"MatrixTypes">>;
42424242

4243+
defm raw_string_literals : BoolFOption<"raw-string-literals",
4244+
LangOpts<"RawStringLiterals">, Default<std#".hasRawStringLiterals()">,
4245+
PosFlag<SetTrue, [], [], "Enable">,
4246+
NegFlag<SetFalse, [], [], "Disable">,
4247+
BothFlags<[], [ClangOption, CC1Option], " raw string literals">>;
4248+
42434249
def fzero_call_used_regs_EQ
42444250
: Joined<["-"], "fzero-call-used-regs=">, Group<f_Group>,
42454251
Visibility<[ClangOption, CC1Option]>,

clang/lib/Basic/LangOptions.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
125125
Opts.HexFloats = Std.hasHexFloats();
126126
Opts.WChar = Std.isCPlusPlus();
127127
Opts.Digraphs = Std.hasDigraphs();
128+
Opts.RawStringLiterals = Std.hasRawStringLiterals();
128129

129130
Opts.HLSL = Lang == Language::HLSL;
130131
if (Opts.HLSL && Opts.IncludeDefaultHeader)

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6519,6 +6519,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
65196519
Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
65206520
Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs);
65216521
Args.AddLastArg(CmdArgs, options::OPT_fzero_call_used_regs_EQ);
6522+
Args.AddLastArg(CmdArgs, options::OPT_fraw_string_literals,
6523+
options::OPT_fno_raw_string_literals);
65226524

65236525
if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
65246526
Triple.hasDefaultEmulatedTLS()))

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,19 @@ static bool FixupInvocation(CompilerInvocation &Invocation,
610610
LangOpts.NewAlignOverride = 0;
611611
}
612612

613+
// The -f[no-]raw-string-literals option is only valid in C and in C++
614+
// standards before C++11.
615+
if (LangOpts.CPlusPlus11) {
616+
if (Args.hasArg(OPT_fraw_string_literals, OPT_fno_raw_string_literals)) {
617+
Args.claimAllArgs(OPT_fraw_string_literals, OPT_fno_raw_string_literals);
618+
Diags.Report(diag::warn_drv_fraw_string_literals_in_cxx11)
619+
<< bool(LangOpts.RawStringLiterals);
620+
}
621+
622+
// Do not allow disabling raw string literals in C++11 or later.
623+
LangOpts.RawStringLiterals = true;
624+
}
625+
613626
// Prevent the user from specifying both -fsycl-is-device and -fsycl-is-host.
614627
if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsHost)
615628
Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fsycl-is-device"

clang/lib/Lex/DependencyDirectivesScanner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ struct Scanner {
7373
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
7474
LangOpts.ObjC = true;
7575
LangOpts.LineComment = true;
76-
// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and
77-
// R"()" literals.
76+
LangOpts.RawStringLiterals = true;
77+
// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"".
7878
return LangOpts;
7979
}
8080

clang/lib/Lex/Lexer.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3876,7 +3876,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
38763876
tok::utf16_char_constant);
38773877

38783878
// UTF-16 raw string literal
3879-
if (Char == 'R' && LangOpts.CPlusPlus11 &&
3879+
if (Char == 'R' && LangOpts.RawStringLiterals &&
38803880
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
38813881
return LexRawStringLiteral(Result,
38823882
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -3898,7 +3898,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
38983898
SizeTmp2, Result),
38993899
tok::utf8_char_constant);
39003900

3901-
if (Char2 == 'R' && LangOpts.CPlusPlus11) {
3901+
if (Char2 == 'R' && LangOpts.RawStringLiterals) {
39023902
unsigned SizeTmp3;
39033903
char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
39043904
// UTF-8 raw string literal
@@ -3934,7 +3934,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
39343934
tok::utf32_char_constant);
39353935

39363936
// UTF-32 raw string literal
3937-
if (Char == 'R' && LangOpts.CPlusPlus11 &&
3937+
if (Char == 'R' && LangOpts.RawStringLiterals &&
39383938
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
39393939
return LexRawStringLiteral(Result,
39403940
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -3949,7 +3949,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
39493949
// Notify MIOpt that we read a non-whitespace/non-comment token.
39503950
MIOpt.ReadToken();
39513951

3952-
if (LangOpts.CPlusPlus11) {
3952+
if (LangOpts.RawStringLiterals) {
39533953
Char = getCharAndSize(CurPtr, SizeTmp);
39543954

39553955
if (Char == '"')
@@ -3972,7 +3972,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
39723972
tok::wide_string_literal);
39733973

39743974
// Wide raw string literal.
3975-
if (LangOpts.CPlusPlus11 && Char == 'R' &&
3975+
if (LangOpts.RawStringLiterals && Char == 'R' &&
39763976
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
39773977
return LexRawStringLiteral(Result,
39783978
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
2+
// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
3+
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
4+
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
5+
// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++11 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
6+
// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
7+
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++11 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
8+
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
9+
// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++20 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
10+
// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
11+
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++20 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
12+
// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
13+
14+
// CHECK-PRE-CXX11-NOT: ignoring '-fraw-string-literals'
15+
// CHECK-PRE-CXX11-NOT: ignoring '-fno-raw-string-literals'
16+
// CHECK-POS: ignoring '-fraw-string-literals', which is only valid for C and C++ standards before C++11
17+
// CHECK-NEG: ignoring '-fno-raw-string-literals', which is only valid for C and C++ standards before C++11

clang/test/Lexer/raw-string-ext.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -verify=supported %s
2+
// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -fraw-string-literals -verify=supported %s
3+
// RUN: %clang_cc1 -fsyntax-only -std=gnu89 -verify=unsupported %s
4+
// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -verify=unsupported %s
5+
// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -DUNICODE -fno-raw-string-literals -verify=unsupported %s
6+
7+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -verify=unsupported,cxx-unsupported %s
8+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -verify=unsupported,cxx-unsupported %s
9+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -fraw-string-literals -verify=supported %s
10+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -fraw-string-literals -verify=supported %s
11+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -verify=supported,cxx %s
12+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -verify=supported,cxx %s
13+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s
14+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s
15+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s
16+
// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s
17+
18+
// GCC supports raw string literals in C99 and later in '-std=gnuXY' mode; we
19+
// additionally provide '-f[no-]raw-string-literals' to enable/disable them
20+
// explicitly in C.
21+
//
22+
// We do not allow disabling raw string literals in C++ mode if they’re enabled
23+
// by the language standard, i.e. in C++11 or later.
24+
25+
// Driver warnings.
26+
// yes-warning@* {{ignoring '-fraw-string-literals'}}
27+
// no-warning@* {{ignoring '-fno-raw-string-literals'}}
28+
29+
void f() {
30+
(void) R"foo()foo"; // unsupported-error {{use of undeclared identifier 'R'}} cxx-unsupported-error {{expected ';' after expression}}
31+
(void) LR"foo()foo"; // unsupported-error {{use of undeclared identifier 'LR'}} cxx-unsupported-error {{expected ';' after expression}}
32+
33+
#ifdef UNICODE
34+
(void) uR"foo()foo"; // unsupported-error {{use of undeclared identifier 'uR'}} cxx-unsupported-error {{expected ';' after expression}}
35+
(void) u8R"foo()foo"; // unsupported-error {{use of undeclared identifier 'u8R'}} cxx-unsupported-error {{expected ';' after expression}}
36+
(void) UR"foo()foo"; // unsupported-error {{use of undeclared identifier 'UR'}} cxx-unsupported-error {{expected ';' after expression}}
37+
#endif
38+
}
39+
40+
// supported-error@* {{missing terminating delimiter}}
41+
// supported-error@* {{expected expression}}
42+
// supported-error@* {{expected ';' after top level declarator}}
43+
#define R "bar"
44+
const char* s = R"foo(";

clang/unittests/Lex/DependencyDirectivesScannerTest.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -583,10 +583,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest, UnderscorePragma) {
583583
R"(_Pragma(u"clang module import"))", Out));
584584
EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
585585

586-
// FIXME: R"()" strings depend on using C++11 language mode
586+
// R"()" strings are enabled by default.
587587
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
588588
R"(_Pragma(R"abc(clang module import)abc"))", Out));
589-
EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
589+
EXPECT_STREQ(R"(_Pragma(R"abc(clang module import)abc"))"
590+
"\n",
591+
Out.data());
590592
}
591593

592594
TEST(MinimizeSourceToDependencyDirectivesTest, Include) {

0 commit comments

Comments
 (0)