Skip to content

Commit 31d39cc

Browse files
committed
[clang][modules-driver] Add scanner to detect C++20 module presence
This PR is part of a series aimed at implementing native support for explicit module builds from the Clang driver. This introduces a new scanner that detects C++20 module usage in source files without using the preprocessor or lexer. For now, it is enabled only with the -fmodules-driver flag and serves solely diagnostic purposes. In the future, the scanner will help the driver determine whether to implicitly perform an explicit module build and will be enabled for any (modules-driver compatible) compilation with 2 or more inputs. Since the scanner adds very little overhead, we are also exploring enabling it for compilations with only a single input. This approach could allow us to detect `import std` usage in a single-file compilation, which would then activate the modules driver. For performance measurements, see https://github.com/naveen-seth/llvm-dev-cxx-modules-check-benchmark. RFC: https://discourse.llvm.org/t/rfc-modules-support-simple-c-20-modules-use-from-the-clang-driver-without-a-build-system
1 parent df7db44 commit 31d39cc

File tree

6 files changed

+438
-0
lines changed

6 files changed

+438
-0
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,19 @@ def err_drv_reduced_module_output_overrided : Warning<
577577
"please consider use '-fmodule-output=' to specify the output file for reduced BMI explicitly">,
578578
InGroup<DiagGroup<"reduced-bmi-output-overrided">>;
579579

580+
def remark_fmodules_driver_enabled : Remark<
581+
"support for explicit module builds enabled (experimental)">,
582+
InGroup<ModulesDriver>;
583+
def remark_found_cxx20_module_usage : Remark<
584+
"found C++20 module usage in file '%0'">,
585+
InGroup<ModulesDriver>;
586+
def remark_performing_explicit_module_build : Remark<
587+
"performing explicit module build">,
588+
InGroup<ModulesDriver>;
589+
def warn_modules_driver_unsupported_standard : Warning<
590+
"'-fmodules-driver' is not supported before C++20">,
591+
InGroup<ModulesDriver>;
592+
580593
def warn_drv_delayed_template_parsing_after_cxx20 : Warning<
581594
"-fdelayed-template-parsing is deprecated after C++20">,
582595
InGroup<DiagGroup<"delayed-template-parsing-in-cxx20">>;

clang/include/clang/Basic/DiagnosticGroups.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ def ModuleConflict : DiagGroup<"module-conflict">;
625625
def ModuleFileExtension : DiagGroup<"module-file-extension">;
626626
def ModuleIncludeDirectiveTranslation : DiagGroup<"module-include-translation">;
627627
def ModuleMap : DiagGroup<"module-map">;
628+
def ModulesDriver : DiagGroup<"modules-driver">;
628629
def RoundTripCC1Args : DiagGroup<"round-trip-cc1-args">;
629630
def NewlineEOF : DiagGroup<"newline-eof">;
630631
def Nullability : DiagGroup<"nullability">;

clang/include/clang/Driver/Driver.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,8 @@ class Driver {
505505

506506
/// BuildActions - Construct the list of actions to perform for the
507507
/// given arguments, which are only done for a single architecture.
508+
/// If the compilation is an explicit module build, delegates to
509+
/// BuildExplicitModuleBuildActions. Otherwise, uses BuildDefaultActions.
508510
///
509511
/// \param C - The compilation that is being built.
510512
/// \param Args - The input arguments.
@@ -790,6 +792,35 @@ class Driver {
790792
/// compilation based on which -f(no-)?lto(=.*)? option occurs last.
791793
void setLTOMode(const llvm::opt::ArgList &Args);
792794

795+
/// BuildDefaultActions - Constructs the list of actions to perform
796+
/// for the provided arguments, which are only done for a single architecture.
797+
///
798+
/// \param C - The compilation that is being built.
799+
/// \param Args - The input arguments.
800+
/// \param Actions - The list to store the resulting actions onto.
801+
void BuildDefaultActions(Compilation &C, llvm::opt::DerivedArgList &Args,
802+
const InputList &Inputs, ActionList &Actions) const;
803+
804+
/// BuildExplicitModuleBuildActions - Performs a dependency scan and
805+
/// constructs the list of actions to perform for dependency order and
806+
/// the provided arguments. This is only done for a single a architecture.
807+
///
808+
/// \param C - The compilation that is being built.
809+
/// \param Args - The input arguments.
810+
/// \param Actions - The list to store the resulting actions onto.
811+
void BuildExplicitModuleBuildActions(Compilation &C,
812+
llvm::opt::DerivedArgList &Args,
813+
const InputList &Inputs,
814+
ActionList &Actions) const;
815+
816+
/// Scans the leading lines of the C++ source inputs to detect C++20 module
817+
/// usage.
818+
///
819+
/// \returns True if module usage is detected, false otherwise, or an error on
820+
/// read failure.
821+
llvm::ErrorOr<bool>
822+
ScanInputsForCXXModuleUsage(const InputList &Inputs) const;
823+
793824
/// Retrieves a ToolChain for a particular \p Target triple.
794825
///
795826
/// Will cache ToolChains for the life of the driver object, and create them

clang/include/clang/Driver/Options.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3240,6 +3240,13 @@ def modules_reduced_bmi : Flag<["-"], "fmodules-reduced-bmi">,
32403240
HelpText<"Generate the reduced BMI">,
32413241
MarshallingInfoFlag<FrontendOpts<"GenReducedBMI">>;
32423242

3243+
def fmodules_driver : Flag<["-"], "fmodules-driver">,
3244+
Group<f_Group>, Visibility<[ClangOption]>,
3245+
HelpText<"Enable support for explicit module builds from the driver (experimental)">;
3246+
def fno_modules_driver : Flag<["-"], "fno-modules-driver">,
3247+
Group<f_Group>, Visibility<[ClangOption]>,
3248+
HelpText<"Disable support for explicit module builds from the driver (experimental)">;
3249+
32433250
def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">,
32443251
Group<f_Group>, Visibility<[ClangOption, CC1Option]>, Alias<modules_reduced_bmi>;
32453252

clang/lib/Driver/Driver.cpp

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "ToolChains/WebAssembly.h"
5454
#include "ToolChains/XCore.h"
5555
#include "ToolChains/ZOS.h"
56+
#include "clang/Basic/CharInfo.h"
5657
#include "clang/Basic/DiagnosticDriver.h"
5758
#include "clang/Basic/TargetID.h"
5859
#include "clang/Basic/Version.h"
@@ -4285,6 +4286,13 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
42854286
YcArg = nullptr;
42864287
}
42874288

4289+
if (Args.hasArgNoClaim(options::OPT_fmodules_driver))
4290+
// TODO: Check against all incompatible -fmodules-driver arguments
4291+
if (!ModulesModeCXX20) {
4292+
Diag(diag::warn_modules_driver_unsupported_standard);
4293+
Args.eraseArg(options::OPT_fmodules_driver);
4294+
}
4295+
42884296
Arg *FinalPhaseArg;
42894297
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
42904298

@@ -4403,6 +4411,177 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
44034411
}
44044412
}
44054413

4414+
namespace {
4415+
static void skipWhitespace(const char *&Ptr) {
4416+
while (isWhitespace(*Ptr))
4417+
++Ptr;
4418+
}
4419+
4420+
// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n).
4421+
static unsigned isEOL(const char *Ptr) {
4422+
if (*Ptr == '\0')
4423+
return 0;
4424+
if (*(Ptr + 1) != '\0' && isVerticalWhitespace(Ptr[0]) &&
4425+
isVerticalWhitespace(Ptr[1]) && Ptr[0] != Ptr[1])
4426+
return 2;
4427+
return !!isVerticalWhitespace(Ptr[0]);
4428+
}
4429+
4430+
static void skipLine(const char *&Ptr) {
4431+
for (;;) {
4432+
char LastNonWhitespace = ' ';
4433+
while (!isVerticalWhitespace(*Ptr) && *Ptr != '\0') {
4434+
if (!isHorizontalWhitespace(*Ptr))
4435+
LastNonWhitespace = *Ptr;
4436+
++Ptr;
4437+
}
4438+
4439+
const unsigned Len = isEOL(Ptr);
4440+
if (!Len)
4441+
return;
4442+
4443+
Ptr += Len;
4444+
if (LastNonWhitespace != '\\')
4445+
break;
4446+
}
4447+
}
4448+
4449+
// Returns the length of a line splice sequence (including trailing
4450+
// whitespace), or 0 if no line splice is found.
4451+
static unsigned isLineSplice(const char *Start) {
4452+
if (*Start != '\\')
4453+
return 0;
4454+
4455+
const char *Ptr = Start + 1;
4456+
while (isHorizontalWhitespace(*Ptr))
4457+
++Ptr;
4458+
4459+
if (unsigned Len = isEOL(Ptr))
4460+
return Ptr - Start + Len;
4461+
return 0;
4462+
}
4463+
4464+
static bool trySkipLineSplice(const char *&Ptr) {
4465+
if (unsigned Len = isLineSplice(Ptr); Len) {
4466+
Ptr += Len;
4467+
return true;
4468+
}
4469+
return false;
4470+
}
4471+
4472+
static bool trySkipDirective(const char *&Ptr) {
4473+
if (*Ptr != '#')
4474+
return false;
4475+
4476+
++Ptr;
4477+
skipLine(Ptr);
4478+
return true;
4479+
}
4480+
4481+
static bool trySkipLineComment(const char *&Ptr) {
4482+
if (Ptr[0] != '/' || Ptr[1] != '/')
4483+
return false;
4484+
4485+
Ptr += 2;
4486+
skipLine(Ptr);
4487+
return true;
4488+
}
4489+
4490+
static bool trySkipBlockComment(const char *&Ptr) {
4491+
if (Ptr[0] != '/' || Ptr[1] != '*')
4492+
return false;
4493+
4494+
Ptr += 2;
4495+
while (*Ptr != '\0') {
4496+
if (Ptr[0] == '*' && Ptr[1] == '/') {
4497+
Ptr += 2; // '*/'
4498+
return true;
4499+
}
4500+
++Ptr;
4501+
}
4502+
return true;
4503+
}
4504+
4505+
static bool trySkipComment(const char *&Ptr) {
4506+
return trySkipLineComment(Ptr) || trySkipBlockComment(Ptr);
4507+
}
4508+
4509+
// Skipps over comments and (non-module) directives
4510+
static void skipToRelevantCXXModuleText(const char *&Ptr) {
4511+
while (*Ptr != '\0') {
4512+
skipWhitespace(Ptr);
4513+
if (trySkipComment(Ptr) || trySkipDirective(Ptr) || trySkipLineSplice(Ptr))
4514+
continue;
4515+
break; // Found relevant text!
4516+
}
4517+
}
4518+
4519+
static bool scanBufferForCXXModuleUsage(const llvm::MemoryBuffer &Buffer) {
4520+
const char *Ptr = Buffer.getBufferStart();
4521+
skipToRelevantCXXModuleText(Ptr);
4522+
4523+
// Check if buffer has enough bytes left to check for the module-related
4524+
// declaration fragment we want to check without making potentially
4525+
// memory-mapped buffer load unnecessary pages.
4526+
constexpr int MinKeywordLength = 6;
4527+
const char *Begin = Ptr;
4528+
for (int i = 0; i < MinKeywordLength; ++i) {
4529+
if (*Ptr == '\0')
4530+
return false;
4531+
++Ptr;
4532+
}
4533+
StringRef Text(Begin, MinKeywordLength);
4534+
4535+
const bool IsGlobalModule = Text.starts_with("module");
4536+
if (!IsGlobalModule && !Text.starts_with("import") &&
4537+
!Text.starts_with("export"))
4538+
return false;
4539+
4540+
// Ensure the keyword has a proper ending and isn't part of a identifier
4541+
// or namespace. For this we might have to skip comments and line
4542+
// continuations.
4543+
while (*Ptr != '\0') {
4544+
if (isWhitespace(*Ptr) || (IsGlobalModule && *Ptr == ';'))
4545+
return true;
4546+
if (trySkipBlockComment(Ptr) || trySkipLineSplice(Ptr))
4547+
continue;
4548+
return false;
4549+
}
4550+
4551+
return false;
4552+
}
4553+
4554+
static bool hasCXXModuleInputType(const Driver::InputList &Inputs) {
4555+
const auto IsTypeCXXModule = [](const auto &Input) -> bool {
4556+
const auto TypeID = Input.first;
4557+
return (TypeID == types::TY_CXXModule);
4558+
};
4559+
return llvm::any_of(Inputs, IsTypeCXXModule);
4560+
}
4561+
4562+
} // anonymous namespace
4563+
4564+
llvm::ErrorOr<bool>
4565+
Driver::ScanInputsForCXXModuleUsage(const InputList &Inputs) const {
4566+
const auto CXXInputs = llvm::make_filter_range(
4567+
Inputs, [](const auto &Input) { return types::isCXX(Input.first); });
4568+
4569+
for (const auto &Input : CXXInputs) {
4570+
StringRef Filename = Input.second->getSpelling();
4571+
auto ErrOrBuffer = VFS->getBufferForFile(Filename);
4572+
if (!ErrOrBuffer)
4573+
return ErrOrBuffer.getError();
4574+
const auto Buffer = std::move(*ErrOrBuffer);
4575+
4576+
if (scanBufferForCXXModuleUsage(*Buffer)) {
4577+
Diags.Report(diag::remark_found_cxx20_module_usage) << Filename;
4578+
return true;
4579+
}
4580+
}
4581+
4582+
return false;
4583+
}
4584+
44064585
void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
44074586
const InputList &Inputs, ActionList &Actions) const {
44084587
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
@@ -4414,6 +4593,34 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
44144593

44154594
handleArguments(C, Args, Inputs, Actions);
44164595

4596+
if (Args.hasFlag(options::OPT_fmodules_driver,
4597+
options::OPT_fno_modules_driver, false)) {
4598+
Diags.Report(diag::remark_fmodules_driver_enabled);
4599+
// TODO: Move the logic for implicitly enabling explicit-module-builds out
4600+
// of -fmodules-driver once it is no longer experimental.
4601+
// Currently, this serves diagnostic purposes only.
4602+
bool UsesCXXModules = hasCXXModuleInputType(Inputs);
4603+
if (!UsesCXXModules) {
4604+
const auto ErrOrScanResult = ScanInputsForCXXModuleUsage(Inputs);
4605+
if (!ErrOrScanResult) {
4606+
Diags.Report(diag::err_cannot_open_file)
4607+
<< ErrOrScanResult.getError().message();
4608+
return;
4609+
}
4610+
UsesCXXModules = *ErrOrScanResult;
4611+
}
4612+
if (UsesCXXModules)
4613+
BuildExplicitModuleBuildActions(C, Args, Inputs, Actions);
4614+
return;
4615+
}
4616+
4617+
Driver::BuildDefaultActions(C, Args, Inputs, Actions);
4618+
}
4619+
4620+
void Driver::BuildDefaultActions(Compilation &C, DerivedArgList &Args,
4621+
const InputList &Inputs,
4622+
ActionList &Actions) const {
4623+
44174624
bool UseNewOffloadingDriver =
44184625
C.isOffloadingHostKind(Action::OFK_OpenMP) ||
44194626
C.isOffloadingHostKind(Action::OFK_SYCL) ||
@@ -4693,6 +4900,14 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
46934900
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
46944901
}
46954902

4903+
void Driver::BuildExplicitModuleBuildActions(Compilation &C,
4904+
llvm::opt::DerivedArgList &Args,
4905+
const InputList &Inputs,
4906+
ActionList &Actions) const {
4907+
Diags.Report(diag::remark_performing_explicit_module_build);
4908+
return;
4909+
}
4910+
46964911
/// Returns the canonical name for the offloading architecture when using a HIP
46974912
/// or CUDA architecture.
46984913
static StringRef getCanonicalArchString(Compilation &C,

0 commit comments

Comments
 (0)