diff --git a/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp b/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp index d872020c2d8a3..22a3eb97f938b 100644 --- a/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp +++ b/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp @@ -65,7 +65,7 @@ class PragmaAnnotateHandler : public PragmaHandler { Token Tok; PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; if (HandledDecl) { DiagnosticsEngine &D = PP.getDiagnostics(); diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 723f5d48b4f5f..b2df3e7929434 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -466,6 +466,9 @@ def err_pp_embed_device_file : Error< def ext_pp_extra_tokens_at_eol : ExtWarn< "extra tokens at end of #%0 directive">, InGroup; +def ext_pp_extra_tokens_at_module_directive_eol + : ExtWarn<"extra tokens at end of '%0' directive">, + InGroup; def ext_pp_comma_expr : Extension<"comma operator in operand of #if">; def ext_pp_bad_vaargs_use : Extension< @@ -495,8 +498,8 @@ def warn_cxx98_compat_variadic_macro : Warning< InGroup, DefaultIgnore; def ext_named_variadic_macro : Extension< "named variadic macros are a GNU extension">, InGroup; -def err_embedded_directive : Error< - "embedding a #%0 directive within macro arguments is not supported">; +def err_embedded_directive : Error<"embedding a %select{#|C++ }0%1 directive " + "within macro arguments is not supported">; def ext_embedded_directive : Extension< "embedding a directive within macro arguments has undefined behavior">, InGroup>; @@ -983,6 +986,19 @@ def warn_module_conflict : Warning< InGroup; // C++20 modules +def err_pp_expected_module_name_or_header_name + : Error<"expected module name or header name">; +def err_pp_expected_semi_after_module_or_import + : Error<"'%select{module|import}0' directive must end with a ';' on the " + "same line">; +def err_module_decl_in_header + : Error<"module declaration must not come from an #include directive">; +def err_pp_cond_span_module_decl + : Error<"preprocessor conditionals shall not span a module declaration">; +def err_pp_module_expected_ident + : Error<"expected a module name after '%select{module|import}0'">; +def err_pp_unsupported_module_partition + : Error<"module partitions are only supported for C++20 onwards">; def err_header_import_semi_in_macro : Error< "semicolon terminating header import declaration cannot be produced " "by a macro">; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 6c30da376dafb..54a2fc169a5b2 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1762,38 +1762,36 @@ def ext_bit_int : Extension< } // end of Parse Issue category. let CategoryName = "Modules Issue" in { -def err_unexpected_module_decl : Error< - "module declaration can only appear at the top level">; -def err_module_expected_ident : Error< - "expected a module name after '%select{module|import}0'">; -def err_attribute_not_module_attr : Error< - "%0 attribute cannot be applied to a module">; -def err_keyword_not_module_attr : Error< - "%0 cannot be applied to a module">; -def err_attribute_not_import_attr : Error< - "%0 attribute cannot be applied to a module import">; -def err_keyword_not_import_attr : Error< - "%0 cannot be applied to a module import">; -def err_module_expected_semi : Error< - "expected ';' after module name">; -def err_global_module_introducer_not_at_start : Error< - "'module;' introducing a global module fragment can appear only " - "at the start of the translation unit">; -def err_module_fragment_exported : Error< - "%select{global|private}0 module fragment cannot be exported">; -def err_private_module_fragment_expected_semi : Error< - "expected ';' after private module fragment declaration">; -def err_missing_before_module_end : Error<"expected %0 at end of module">; -def err_unsupported_module_partition : Error< - "module partitions are only supported for C++20 onwards">; -def err_import_not_allowed_here : Error< - "imports must immediately follow the module declaration">; -def err_partition_import_outside_module : Error< - "module partition imports must be within a module purview">; -def err_import_in_wrong_fragment : Error< - "module%select{| partition}0 imports cannot be in the %select{global|private}1 module fragment">; - -def err_export_empty : Error<"export declaration cannot be empty">; + def err_unexpected_module_import_decl + : Error<"%select{module|import}0 declaration can only appear at the top " + "level">; + def err_module_expected_ident + : Error<"expected a module name after '%select{module|import}0'">; + def err_attribute_not_module_attr + : Error<"%0 attribute cannot be applied to a module">; + def err_keyword_not_module_attr : Error<"%0 cannot be applied to a module">; + def err_attribute_not_import_attr + : Error<"%0 attribute cannot be applied to a module import">; + def err_keyword_not_import_attr + : Error<"%0 cannot be applied to a module import">; + def err_module_expected_semi : Error<"expected ';' after module name">; + def err_global_module_introducer_not_at_start + : Error<"'module;' introducing a global module fragment can appear only " + "at the start of the translation unit">; + def err_module_fragment_exported + : Error<"%select{global|private}0 module fragment cannot be exported">; + def err_private_module_fragment_expected_semi + : Error<"expected ';' after private module fragment declaration">; + def err_missing_before_module_end : Error<"expected %0 at end of module">; + def err_import_not_allowed_here + : Error<"imports must immediately follow the module declaration">; + def err_partition_import_outside_module + : Error<"module partition imports must be within a module purview">; + def err_import_in_wrong_fragment + : Error<"module%select{| partition}0 imports cannot be in the " + "%select{global|private}1 module fragment">; + + def err_export_empty : Error<"export declaration cannot be empty">; } let CategoryName = "Generics Issue" in { diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index e4044bcdfcc60..488c1bf9599a0 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -179,6 +179,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsModulesImport : 1; + // True if this is the 'module' contextual keyword. + LLVM_PREFERRED_TYPE(bool) + unsigned IsModulesDecl : 1; + // True if this is a mangled OpenMP variant name. LLVM_PREFERRED_TYPE(bool) unsigned IsMangledOpenMPVariantName : 1; @@ -215,8 +219,9 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), - IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), - IsRestrictExpansion(false), IsFinal(false), IsKeywordInCpp(false) {} + IsModulesDecl(false), IsMangledOpenMPVariantName(false), + IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false), + IsKeywordInCpp(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -528,6 +533,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { RecomputeNeedsHandleIdentifier(); } + /// Determine whether this is the contextual keyword \c module. + bool isModulesDeclaration() const { return IsModulesDecl; } + + /// Set whether this identifier is the contextual keyword \c module. + void setModulesDeclaration(bool I) { + IsModulesDecl = I; + if (I) + NeedsHandleIdentifier = true; + else + RecomputeNeedsHandleIdentifier(); + } + /// Determine whether this is the mangled name of an OpenMP variant. bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } @@ -745,10 +762,11 @@ class IdentifierTable { // contents. II->Entry = &Entry; - // If this is the 'import' contextual keyword, mark it as such. + // If this is the 'import' or 'module' contextual keyword, mark it as such. if (Name == "import") II->setModulesImport(true); - + else if (Name == "module") + II->setModulesDeclaration(true); return *II; } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 94e72fea56a68..7750c84dbef78 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -133,6 +133,9 @@ PPKEYWORD(pragma) // C23 & C++26 #embed PPKEYWORD(embed) +// C++20 Module Directive +PPKEYWORD(module) + // GNU Extensions. PPKEYWORD(import) PPKEYWORD(include_next) @@ -1023,6 +1026,9 @@ ANNOTATION(module_include) ANNOTATION(module_begin) ANNOTATION(module_end) +// Annotations for C++, Clang and Objective-C named modules. +ANNOTATION(module_name) + // Annotation for a header_name token that has been looked up and transformed // into the name of a header unit. ANNOTATION(header_unit) diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index 0ae490f0e8073..112d3b00160fd 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -863,7 +863,7 @@ class CompilerInstance : public ModuleLoader { /// load it. ModuleLoadResult findOrCompileModuleAndReadAST(StringRef ModuleName, SourceLocation ImportLoc, - SourceLocation ModuleNameLoc, + SourceRange ModuleNameRange, bool IsInclusionDirective); /// Creates a \c CompilerInstance for compiling a module. diff --git a/clang/include/clang/Lex/CodeCompletionHandler.h b/clang/include/clang/Lex/CodeCompletionHandler.h index bd3e05a36bb33..2ef29743415ae 100644 --- a/clang/include/clang/Lex/CodeCompletionHandler.h +++ b/clang/include/clang/Lex/CodeCompletionHandler.h @@ -13,12 +13,15 @@ #ifndef LLVM_CLANG_LEX_CODECOMPLETIONHANDLER_H #define LLVM_CLANG_LEX_CODECOMPLETIONHANDLER_H +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceLocation.h" #include "llvm/ADT/StringRef.h" namespace clang { class IdentifierInfo; class MacroInfo; +using ModuleIdPath = ArrayRef; /// Callback handler that receives notifications when performing code /// completion within the preprocessor. @@ -70,6 +73,11 @@ class CodeCompletionHandler { /// file where we expect natural language, e.g., a comment, string, or /// \#error directive. virtual void CodeCompleteNaturalLanguage() { } + + /// Callback invoked when performing code completion inside the module name + /// part of an import directive. + virtual void CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) {} }; } diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 4d82e20e5d4f3..f6fc425f004d1 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -48,6 +48,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Registry.h" +#include "llvm/Support/TrailingObjects.h" #include #include #include @@ -82,6 +83,7 @@ class PreprocessorLexer; class PreprocessorOptions; class ScratchBuffer; class TargetInfo; +class ModuleNameLoc; namespace Builtin { class Context; @@ -135,6 +137,24 @@ struct CXXStandardLibraryVersionInfo { std::uint64_t Version; }; +class ExportContextualKeywordInfo { + Token ExportTok; + bool AtPhysicalStartOfLine = false; + +public: + ExportContextualKeywordInfo() = default; + ExportContextualKeywordInfo(const Token &Tok, bool AtPhysicalStartOfLine) + : ExportTok(Tok), AtPhysicalStartOfLine(AtPhysicalStartOfLine) {} + + bool isValid() const { return ExportTok.is(tok::kw_export); } + bool isAtPhysicalStartOfLine() const { return AtPhysicalStartOfLine; } + Token getExportTok() const { return ExportTok; } + void reset() { + ExportTok.startToken(); + AtPhysicalStartOfLine = false; + } +}; + /// Engages in a tight little dance with the lexer to efficiently /// preprocess tokens. /// @@ -338,8 +358,9 @@ class Preprocessor { /// lexed, if any. SourceLocation ModuleImportLoc; - /// The import path for named module that we're currently processing. - SmallVector NamedModuleImportPath; + /// The source location of the \c module contextual keyword we just + /// lexed, if any. + SourceLocation ModuleDeclLoc; llvm::DenseMap> CheckPoints; unsigned CheckPointCounter = 0; @@ -350,6 +371,15 @@ class Preprocessor { /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; + /// Whether we're importing a standard C++20 named Modules. + bool ImportingCXXNamedModules = false; + + /// Whether we're declaring a standard C++20 named Modules. + bool DeclaringCXXNamedModules = false; + + /// Whether the last token we lexed was an 'export' keyword. + ExportContextualKeywordInfo LastTokenWasExportKeyword; + /// First pp-token source location in current translation unit. SourceLocation FirstPPTokenLoc; @@ -556,12 +586,7 @@ class Preprocessor { reset(); } - void handleIdentifier(IdentifierInfo *Identifier) { - if (isModuleCandidate() && Identifier) - Name += Identifier->getName().str(); - else if (!isNamedModule()) - reset(); - } + void handleModuleName(ModuleNameLoc *Path); void handleColon() { if (isModuleCandidate()) @@ -570,13 +595,6 @@ class Preprocessor { reset(); } - void handlePeriod() { - if (isModuleCandidate()) - Name += "."; - else if (!isNamedModule()) - reset(); - } - void handleSemi() { if (!Name.empty() && isModuleCandidate()) { if (State == InterfaceCandidate) @@ -631,10 +649,6 @@ class Preprocessor { ModuleDeclSeq ModuleDeclState; - /// Whether the module import expects an identifier next. Otherwise, - /// it expects a '.' or ';'. - bool ModuleImportExpectsIdentifier = false; - /// The identifier and source location of the currently-active /// \#pragma clang arc_cf_code_audited begin. IdentifierLoc PragmaARCCFCodeAuditedInfo; @@ -1768,6 +1782,21 @@ class Preprocessor { /// Lex the parameters for an #embed directive, returns nullopt on error. std::optional LexEmbedParameters(Token &Current, bool ForHasEmbed); + bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, + SmallVectorImpl &Path, + bool AllowMacroExpansion = true); + void EnterModuleSuffixTokenStream(ArrayRef Toks); + void HandleCXXImportDirective(Token Import); + void HandleCXXModuleDirective(Token Module); + + /// Callback invoked when the lexer sees one of export, import or module token + /// at the start of a line. + /// + /// This consumes the import, module directive, modifies the + /// lexer/preprocessor state, and advances the lexer(s) so that the next token + /// read is the correct one. + bool HandleModuleContextualKeyword(Token &Result, + bool TokAtPhysicalStartOfLine); /// Get the start location of the first pp-token in main file. SourceLocation getMainFileFirstPPTokenLoc() const { @@ -2391,7 +2420,7 @@ class Preprocessor { /// /// \return The location of the end of the directive (the terminating /// newline). - SourceLocation CheckEndOfDirective(const char *DirType, + SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros = false); /// Read and discard all tokens remaining on the current line until @@ -2473,11 +2502,12 @@ class Preprocessor { } /// If we're importing a standard C++20 Named Modules. - bool isInImportingCXXNamedModules() const { - // NamedModuleImportPath will be non-empty only if we're importing - // Standard C++ named modules. - return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && - !IsAtImport; + bool isImportingCXXNamedModules() const { + return getLangOpts().CPlusPlusModules && ImportingCXXNamedModules; + } + + bool isDeclaringCXXNamedModules() const { + return getLangOpts().CPlusPlusModules && DeclaringCXXNamedModules; } /// Allocate a new MacroInfo object with the provided SourceLocation. @@ -2710,6 +2740,10 @@ class Preprocessor { void removeCachedMacroExpandedTokensOfLastLexer(); + /// Peek the next token. If so, return the token, if not, this + /// method should have no observable side-effect on the lexed tokens. + std::optional peekNextPPToken(); + /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, @@ -3136,6 +3170,53 @@ struct EmbedAnnotationData { StringRef FileName; }; +/// Represents module name annotation data. +/// +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// partition-name: [C++20] +/// : module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +class ModuleNameLoc final + : llvm::TrailingObjects { + friend TrailingObjects; + unsigned NumIdentifierLocs; + + unsigned numTrailingObjects(OverloadToken) const { + return getNumIdentifierLocs(); + } + + ModuleNameLoc(ModuleIdPath Path) : NumIdentifierLocs(Path.size()) { + (void)llvm::copy(Path, getTrailingObjects()); + } + +public: + static std::string stringFromModuleIdPath(ModuleIdPath Path); + static ModuleNameLoc *Create(Preprocessor &PP, ModuleIdPath Path); + static Token CreateAnnotToken(Preprocessor &PP, ModuleIdPath Path); + unsigned getNumIdentifierLocs() const { return NumIdentifierLocs; } + ModuleIdPath getModuleIdPath() const { + return {getTrailingObjects(), getNumIdentifierLocs()}; + } + + SourceLocation getBeginLoc() const { + return getModuleIdPath().front().getLoc(); + } + SourceLocation getEndLoc() const { + auto &Last = getModuleIdPath().back(); + return Last.getLoc().getLocWithOffset( + Last.getIdentifierInfo()->getLength()); + } + SourceRange getRange() const { return {getBeginLoc(), getEndLoc()}; } + + std::string str() const; + void print(llvm::raw_ostream &OS) const; + void dump() const { print(llvm::errs()); } +}; + /// Registry of pragma handlers added by plugins using PragmaHandlerRegistry = llvm::Registry; diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index fc43e72593b94..ded14b815067e 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -233,6 +233,9 @@ class Token { PtrData = const_cast(Ptr); } + template T getAnnotationValueAs() const { + return static_cast(getAnnotationValue()); + } void *getAnnotationValue() const { assert(isAnnotation() && "Used AnnotVal on non-annotation token"); return PtrData; @@ -291,6 +294,10 @@ class Token { /// Return the ObjC keyword kind. tok::ObjCKeywordKind getObjCKeywordID() const; + /// Return true if we have an C++20 Modules contextual keyword(export, import + /// or module). + bool isModuleContextualKeyword(bool AllowExport = true) const; + bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const; /// Return true if this token has trigraphs or escaped newlines in it. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a47e23ffbd357..0b3226ad9959e 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1077,6 +1077,8 @@ class Parser : public CodeCompletionHandler { unsigned ArgumentIndex) override; void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled) override; void CodeCompleteNaturalLanguage() override; + void CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) override; ///@} diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9397546c8fc5d..d9fe68a08a55c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -146,6 +146,7 @@ class MangleNumberingContext; typedef ArrayRef ModuleIdPath; class ModuleLoader; class MultiLevelTemplateArgumentList; +class ModuleNameLoc; struct NormalizedConstraint; class ObjCInterfaceDecl; class ObjCMethodDecl; @@ -9847,7 +9848,7 @@ class Sema final : public SemaBase { /// \param IsPartition If the name is for a partition. DeclResult ActOnModuleImport(SourceLocation StartLoc, SourceLocation ExportLoc, - SourceLocation ImportLoc, ModuleIdPath Path, + SourceLocation ImportLoc, ModuleNameLoc *PathLoc, bool IsPartition = false); DeclResult ActOnModuleImport(SourceLocation StartLoc, SourceLocation ExportLoc, diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 4a2b77cd16bfc..8b740da54c305 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -349,8 +349,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { if (LangOpts.IEEE128) AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); - // Add the 'import' contextual keyword. + // Add the 'import' and 'module' contextual keyword. get("import").setModulesImport(true); + get("module").setModulesDeclaration(true); } /// Checks if the specified token kind represents a keyword in the @@ -482,6 +483,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 6, 'd', 'f', define); CASE( 6, 'i', 'n', ifndef); CASE( 6, 'i', 'p', import); + CASE(6, 'm', 'd', module); CASE( 6, 'p', 'a', pragma); CASE( 7, 'd', 'f', defined); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 09a66b652518f..99c6c8f48f856 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1865,8 +1865,8 @@ static ModuleSource selectModuleSource( } ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( - StringRef ModuleName, SourceLocation ImportLoc, - SourceLocation ModuleNameLoc, bool IsInclusionDirective) { + StringRef ModuleName, SourceLocation ImportLoc, SourceRange ModuleNameRange, + bool IsInclusionDirective) { // Search for a module with the given name. HeaderSearch &HS = PP->getHeaderSearchInfo(); Module *M = @@ -1883,10 +1883,11 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( std::string ModuleFilename; ModuleSource Source = selectModuleSource(M, ModuleName, ModuleFilename, BuiltModules, HS); + SourceLocation ModuleNameLoc = ModuleNameRange.getBegin(); if (Source == MS_ModuleNotFound) { // We can't find a module, error out here. getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_found) - << ModuleName << SourceRange(ImportLoc, ModuleNameLoc); + << ModuleName << ModuleNameRange; return nullptr; } if (ModuleFilename.empty()) { @@ -2072,8 +2073,11 @@ CompilerInstance::loadModule(SourceLocation ImportLoc, MM.cacheModuleLoad(*Path[0].getIdentifierInfo(), Module); } else { + SourceLocation ModuleNameEndLoc = Path.back().getLoc().getLocWithOffset( + Path.back().getIdentifierInfo()->getLength()); ModuleLoadResult Result = findOrCompileModuleAndReadAST( - ModuleName, ImportLoc, ModuleNameLoc, IsInclusionDirective); + ModuleName, ImportLoc, SourceRange{ModuleNameLoc, ModuleNameEndLoc}, + IsInclusionDirective); if (!Result.isNormal()) return Result; if (!Result) diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 9e046633328d7..68f736ddffd67 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -758,7 +758,8 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, if (Tok.is(tok::eof) || (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && - !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed))) + !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed) && + !Tok.is(tok::annot_module_name))) return; // EmittedDirectiveOnThisLine takes priority over RequireSameLine. @@ -978,6 +979,11 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, *Callbacks->OS << static_cast(Byte); PrintComma = true; } + } else if (Tok.is(tok::annot_module_name)) { + Tok.getAnnotationValueAs()->print(*Callbacks->OS); + PP.Lex(Tok); + IsStartOfLine = true; + continue; } else if (Tok.isAnnotation()) { // Ignore annotation tokens created by pragmas - the pragmas themselves // will be reproduced in the preprocessed output. diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 1b6b16c561141..f43498444c8d5 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -517,21 +517,32 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, First = Previous; return false; } - if (Tok.is(tok::eof)) + if (Tok.is(tok::eof) || (LangOpts.CPlusPlusModules && Tok.is(tok::eod))) return reportError( DirectiveLoc, diag::err_dep_source_scanner_missing_semi_after_at_import); if (Tok.is(tok::semi)) break; } + + // Skip extra tokens after semi in C++20 Modules directive. + bool IsCXXModules = Kind == DirectiveKind::cxx_export_import_decl || + Kind == DirectiveKind::cxx_export_module_decl || + Kind == DirectiveKind::cxx_import_decl || + Kind == DirectiveKind::cxx_module_decl; + if (IsCXXModules) + lexPPDirectiveBody(First, End); pushDirective(Kind); skipWhitespace(First, End); if (First == End) return false; - if (!isVerticalWhitespace(*First)) - return reportError( - DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); - skipNewline(First, End); + if (!IsCXXModules) { + if (!isVerticalWhitespace(*First)) + return reportError( + DirectiveLoc, + diag::err_dep_source_scanner_unexpected_tokens_at_import); + skipNewline(First, End); + } return false; } @@ -866,10 +877,6 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { if (*First == '@') return lexAt(First, End); - // Handle module directives for C++20 modules. - if (*First == 'i' || *First == 'e' || *First == 'm') - return lexModule(First, End); - if (*First == '_') { if (isNextIdentifierOrSkipLine("_Pragma", First, End)) return lex_Pragma(First, End); @@ -882,6 +889,26 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { auto ScEx2 = make_scope_exit( [&]() { TheLexer.setParsingPreprocessorDirective(false); }); + // Since P1857R3, the standard handling C++ module/import as a directive: + // + // [cpp.pre]p1: + // A preprocessing directive consists of a sequence of preprocessing tokens + // that satisfies the following constraints: At the start of translation + // phase 4, the first preprocessing token in the sequence, referred to as a + // directive-introducing token, begins with the first character in the source + // file (optionally after whitespace containing no new-line characters) or + // follows whitespace containing at least one new-line character, and is + // - a # preprocessing token, or + // - an import preprocessing token immediately followed on the same logical + // source line by a header-name, <, identifier, string-literal, or : + // preprocessing token, or + // - a module preprocessing token immediately followed on the same logical + // source line by an identifier, :, or ; preprocessing token, or + // - an export preprocessing token immediately followed on the same logical + // source line by one of the two preceding forms. + if (*First == 'i' || *First == 'e' || *First == 'm') + return lexModule(First, End); + // Lex '#'. const dependency_directives_scan::Token &HashTok = lexToken(First, End); if (HashTok.is(tok::hashhash)) { diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index dfdba2317ee89..97945534d0ff5 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -73,6 +73,19 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; } +/// Return true if we have an C++20 Modules contextual keyword(export, import +/// or module). +bool Token::isModuleContextualKeyword(bool AllowExport) const { + if (AllowExport && is(tok::kw_export)) + return true; + if (isOneOf(tok::kw_import, tok::kw_module)) + return true; + if (isNot(tok::identifier)) + return false; + const auto *II = getIdentifierInfo(); + return II->isModulesImport() || II->isModulesDeclaration(); +} + /// Determine whether the token kind starts a simple-type-specifier. bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { switch (getKind()) { @@ -3738,6 +3751,7 @@ bool Lexer::Lex(Token &Result) { bool isRawLex = isLexingRawMode(); (void) isRawLex; bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); + // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); return returnedToken; @@ -4031,11 +4045,17 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ case 'v': case 'w': case 'x': case 'y': case 'z': - case '_': + case '_': { // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexIdentifierContinue(Result, CurPtr); - + bool returnedToken = LexIdentifierContinue(Result, CurPtr); + if (returnedToken && Result.isModuleContextualKeyword() && + LangOpts.CPlusPlusModules && + PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine) && + !LexingRawMode && !Is_PragmaLexer) + goto HandleDirective; + return returnedToken; + } case '$': // $ in identifiers. if (LangOpts.DollarIdents) { if (!isLexingRawMode()) @@ -4518,8 +4538,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { HandleDirective: // We parsed a # character and it's the start of a preprocessing directive. - - FormTokenWithChars(Result, CurPtr, tok::hash); + if (!Result.isOneOf(tok::kw_import, tok::kw_module)) + FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); if (PP->hadModuleLoaderFatalFailure()) @@ -4542,6 +4562,10 @@ const char *Lexer::convertDependencyDirectiveToken( Result.setKind(DDTok.Kind); Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length); + if (Result.is(tok::raw_identifier)) + Result.setRawIdentifierData(TokPtr); + else if (Result.isLiteral()) + Result.setLiteralData(TokPtr); BufferPtr = TokPtr + DDTok.Length; return TokPtr; } @@ -4596,15 +4620,19 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { Result.setRawIdentifierData(TokPtr); if (!isLexingRawMode()) { const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (Result.isModuleContextualKeyword() && + PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) { + PP->HandleDirective(Result); + return false; + } if (II->isHandleIdentifierCase()) return PP->HandleIdentifier(Result); } return true; } - if (Result.isLiteral()) { - Result.setLiteralData(TokPtr); + if (Result.isLiteral()) return true; - } + if (Result.is(tok::colon)) { // Convert consecutive colons to 'tok::coloncolon'. if (*BufferPtr == ':') { diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index e6da19d24f1c5..97cbb80b30a5e 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -439,7 +439,7 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, /// true, then we consider macros that expand to zero tokens as being ok. /// /// Returns the location of the end of the directive. -SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, +SourceLocation Preprocessor::CheckEndOfDirective(StringRef DirType, bool EnableMacros) { Token Tmp; // Lex unexpanded tokens for most directives: macros might expand to zero @@ -466,7 +466,14 @@ SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && !CurTokenLexer) Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); - Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; + + unsigned DiagID = diag::ext_pp_extra_tokens_at_eol; + // C++20 import or module directive has no '#' prefix. + if (getLangOpts().CPlusPlusModules && + (DirType == "import" || DirType == "module")) + DiagID = diag::ext_pp_extra_tokens_at_module_directive_eol; + + Diag(Tmp, DiagID) << DirType << Hint; return DiscardUntilEndOfDirective().getEnd(); } @@ -1245,9 +1252,14 @@ void Preprocessor::HandleDirective(Token &Result) { // Save the '#' token in case we need to return it later. Token SavedHash = Result; + bool IsCXX20ImportOrModuleDirective = + getLangOpts().CPlusPlusModules && + Result.isModuleContextualKeyword(/*AllowExport=*/false); + // Read the next token, the directive flavor. This isn't expanded due to // C99 6.10.3p8. - LexUnexpandedToken(Result); + if (!IsCXX20ImportOrModuleDirective) + LexUnexpandedToken(Result); // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: // #define A(x) #x @@ -1266,7 +1278,9 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp___include_macros: case tok::pp_pragma: case tok::pp_embed: - Diag(Result, diag::err_embedded_directive) << II->getName(); + case tok::pp_module: + Diag(Result, diag::err_embedded_directive) + << IsCXX20ImportOrModuleDirective << II->getName(); Diag(*ArgMacro, diag::note_macro_expansion_here) << ArgMacro->getIdentifierInfo(); DiscardUntilEndOfDirective(); @@ -1357,9 +1371,12 @@ void Preprocessor::HandleDirective(Token &Result) { // C99 6.10.6 - Pragma Directive. case tok::pp_pragma: return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); - + case tok::pp_module: + return HandleCXXModuleDirective(Result); // GNU Extensions. case tok::pp_import: + if (IsCXX20ImportOrModuleDirective) + return HandleCXXImportDirective(Result); return HandleImportDirective(SavedHash.getLocation(), Result); case tok::pp_include_next: return HandleIncludeNextDirective(SavedHash.getLocation(), Result); @@ -4065,3 +4082,242 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, StringRef(static_cast(Mem), OriginalFilename.size()); HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents, FilenameToGo); } + +void Preprocessor::HandleCXXImportDirective(Token ImportTok) { + assert(getLangOpts().CPlusPlusModules && ImportTok.is(tok::kw_import)); + llvm::SaveAndRestore SaveImportingCXXModules( + this->ImportingCXXNamedModules); + ImportingCXXNamedModules = true; + + if (LastTokenWasExportKeyword.isValid()) + LastTokenWasExportKeyword.reset(); + + Token Tok; + if (LexHeaderName(Tok)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + return; + } + + SourceLocation UseLoc = ImportTok.getLocation(); + SmallVector DirToks{ImportTok}; + SmallVector Path; + bool ImportingHeader = false; + bool IsPartition = false; + std::string FlatName; + switch (Tok.getKind()) { + case tok::header_name: + ImportingHeader = true; + DirToks.push_back(Tok); + break; + case tok::colon: + IsPartition = true; + DirToks.push_back(Tok); + UseLoc = Tok.getLocation(); + Lex(Tok); + [[fallthrough]]; + case tok::identifier: { + if (LexModuleNameContinue(Tok, UseLoc, Path)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + return; + } + + bool IsValid = + (IsPartition && ModuleDeclState.isNamedModule()) || !IsPartition; + if (Callbacks && IsValid) { + if (IsPartition && ModuleDeclState.isNamedModule()) { + FlatName += ModuleDeclState.getPrimaryName(); + FlatName += ":"; + } + + FlatName += ModuleNameLoc::stringFromModuleIdPath(Path); + SourceLocation StartLoc = IsPartition ? UseLoc : Path[0].getLoc(); + IdentifierLoc FlatNameLoc(StartLoc, getIdentifierInfo(FlatName)); + + // We don't/shouldn't load the standard c++20 modules when preprocessing. + // so the imported module is nullptr. + Callbacks->moduleImport(ImportTok.getLocation(), + ModuleIdPath(FlatNameLoc), + /*Imported=*/nullptr); + } + DirToks.push_back(ModuleNameLoc::CreateAnnotToken(*this, Path)); + DirToks.push_back(Tok); + break; + } + default: + Diag(ImportTok, diag::err_pp_expected_module_name_or_header_name); + break; + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + if (!DirToks.back().isOneOf(tok::semi, tok::eod)) + CollectPpImportSuffix(DirToks); + + // This is not a pp-import after all. + if (DirToks.back().isNot(tok::semi)) { + Diag(DirToks.back(), diag::err_pp_expected_semi_after_module_or_import) + << /*IsImport*/ true + << FixItHint::CreateInsertion(DirToks.back().getLocation(), + tok::getPunctuatorSpelling(tok::semi)); + return; + } + + if (DirToks.back().isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + else + DirToks.pop_back(); + + // C++2a [cpp.module]p1: + // The ';' preprocessing-token terminating a pp-import shall not have + // been produced by macro replacement. + SourceLocation SemiLoc = DirToks.back().getLocation(); + if (SemiLoc.isMacroID()) + Diag(SemiLoc, diag::err_header_import_semi_in_macro); + + if (ImportingHeader) { + auto Action = HandleHeaderIncludeOrImport( + /*HashLoc*/ SourceLocation(), ImportTok, Tok, SemiLoc); + switch (Action.Kind) { + case ImportAction::None: + break; + + case ImportAction::ModuleBegin: + // Let the parser know we're textually entering the module. + DirToks.emplace_back(); + DirToks.back().startToken(); + DirToks.back().setKind(tok::annot_module_begin); + DirToks.back().setLocation(SemiLoc); + DirToks.back().setAnnotationEndLoc(SemiLoc); + DirToks.back().setAnnotationValue(Action.ModuleForHeader); + [[fallthrough]]; + + case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: + case ImportAction::SkippedModuleImport: + // We chose to import (or textually enter) the file. Convert the + // header-name token into a header unit annotation token. + DirToks[1].setKind(tok::annot_header_unit); + DirToks[1].setAnnotationEndLoc(DirToks[0].getLocation()); + DirToks[1].setAnnotationValue(Action.ModuleForHeader); + // FIXME: Call the moduleImport callback? + break; + case ImportAction::Failure: + assert(TheModuleLoader.HadFatalFailure && + "This should be an early exit only to a fatal error"); + CurLexer->cutOffLexing(); + return; + } + } + + EnterModuleSuffixTokenStream(DirToks); +} + +void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) { + assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module)); + Token Introducer = ModuleTok; + if (LastTokenWasExportKeyword.isValid()) { + Introducer = LastTokenWasExportKeyword.getExportTok(); + LastTokenWasExportKeyword.reset(); + } + + SourceLocation StartLoc = Introducer.getLocation(); + if (!IncludeMacroStack.empty()) { + SourceLocation End = DiscardUntilEndOfDirective().getEnd(); + Diag(StartLoc, diag::err_module_decl_in_header) + << SourceRange(StartLoc, End); + return; + } + + if (CurPPLexer->getConditionalStackDepth() != 0) { + SourceLocation End = DiscardUntilEndOfDirective().getEnd(); + Diag(StartLoc, diag::err_pp_cond_span_module_decl) + << SourceRange(StartLoc, End); + return; + } + + Token Tok; + SourceLocation UseLoc = ModuleTok.getLocation(); + SmallVector DirToks{ModuleTok}; + SmallVector Path, Partition; + LexUnexpandedToken(Tok); + + switch (Tok.getKind()) { + // Global Module Fragment. + case tok::semi: + DirToks.push_back(Tok); + break; + case tok::colon: + DirToks.push_back(Tok); + LexUnexpandedToken(Tok); + if (Tok.isNot(tok::kw_private)) { + Diag(DirToks.back().getLocation(), diag::err_pp_module_expected_ident) + << /*IsImport=*/false + << FixItHint::CreateReplacement( + {Tok.getLocation(), Tok.getEndLoc()}, + tok::getKeywordSpelling(tok::kw_private)); + return; + } + DirToks.push_back(Tok); + break; + case tok::identifier: { + // C++ [cpp.module]p3: Any preprocessing tokens after the module + // preprocessing token in the module directive are processed just as in + // normal text. + // + // P3034R1 Module Declarations Shouldn’t be Macros. + if (LexModuleNameContinue(Tok, UseLoc, Path, + /*AllowMacroExpansion=*/false)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName()); + return; + } + + DirToks.push_back(ModuleNameLoc::CreateAnnotToken(*this, Path)); + + // C++20 [cpp.module]p + // The pp-tokens, if any, of a pp-module shall be of the form: + // pp-module-name pp-module-partition[opt] pp-tokens[opt] + if (Tok.is(tok::colon)) { + DirToks.push_back(Tok); + LexUnexpandedToken(Tok); + if (LexModuleNameContinue(Tok, UseLoc, Partition)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName()); + return; + } + DirToks.push_back(ModuleNameLoc::CreateAnnotToken(*this, Partition)); + } + DirToks.push_back(Tok); + break; + } + default: + break; + ; + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + SourceLocation End = DirToks.back().getLocation(); + if (!DirToks.back().isOneOf(tok::semi, tok::eod)) { + CollectPpImportSuffix(DirToks); + End = DirToks.back().getLocation(); + } + + // This is not a pp-import after all. + if (DirToks.back().isNot(tok::semi)) { + Diag(DirToks.back(), diag::err_pp_expected_semi_after_module_or_import) + << /*IsImport*/ false + << FixItHint::CreateInsertion(DirToks.back().getLocation(), + tok::getPunctuatorSpelling(tok::semi)); + return; + } + + if (DirToks.back().isNot(tok::eod)) + End = CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName()); + else + End = DirToks.pop_back_val().getLocation(); + + EnterModuleSuffixTokenStream(DirToks); +} diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index bcd3ea60ce3da..555a5d4eaeccd 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -54,6 +54,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Capacity.h" @@ -876,17 +877,21 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { if (((LastTokenWasAt && II.isModulesImport()) || Identifier.is(tok::kw_import)) && !InMacroArgs && !DisableMacroExpansion && - (getLangOpts().Modules || getLangOpts().DebuggerSupport) && CurLexerCallback != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); - NamedModuleImportPath.clear(); IsAtImport = true; - ModuleImportExpectsIdentifier = true; CurLexerCallback = CLK_LexAfterModuleImport; } return true; } +void Preprocessor::ModuleDeclSeq::handleModuleName(ModuleNameLoc *Path) { + if (isModuleCandidate() && Path) + Name += Path->str(); + else if (!isNamedModule()) + reset(); +} + void Preprocessor::Lex(Token &Result) { ++LexLevel; @@ -930,6 +935,7 @@ void Preprocessor::Lex(Token &Result) { // This token is injected to represent the translation of '#include "a.h"' // into "import a.h;". Mimic the notional ';'. case tok::annot_module_include: + case tok::annot_repl_input_end: case tok::semi: TrackGMFState.handleSemi(); StdCXXImportSeqState.handleSemi(); @@ -947,31 +953,21 @@ void Preprocessor::Lex(Token &Result) { case tok::colon: ModuleDeclState.handleColon(); break; - case tok::period: - ModuleDeclState.handlePeriod(); + case tok::kw_import: + if (StdCXXImportSeqState.atTopLevel()) { + TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); + StdCXXImportSeqState.handleImport(); + } break; - case tok::identifier: - // Check "import" and "module" when there is no open bracket. The two - // identifiers are not meaningful with open brackets. + case tok::kw_module: if (StdCXXImportSeqState.atTopLevel()) { - if (Result.getIdentifierInfo()->isModulesImport()) { - TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); - StdCXXImportSeqState.handleImport(); - if (StdCXXImportSeqState.afterImportSeq()) { - ModuleImportLoc = Result.getLocation(); - NamedModuleImportPath.clear(); - IsAtImport = false; - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - } - break; - } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { - TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); - ModuleDeclState.handleModule(); - break; - } + TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); + ModuleDeclState.handleModule(); } - ModuleDeclState.handleIdentifier(Result.getIdentifierInfo()); + break; + case tok::annot_module_name: + ModuleDeclState.handleModuleName( + Result.getAnnotationValueAs()); if (ModuleDeclState.isModuleCandidate()) break; [[fallthrough]]; @@ -989,6 +985,9 @@ void Preprocessor::Lex(Token &Result) { } LastTokenWasAt = Result.is(tok::at); + if (Result.isNot(tok::kw_export)) + LastTokenWasExportKeyword.reset(); + --LexLevel; if ((LexLevel == 0 || PreprocessToken) && @@ -1111,43 +1110,173 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { return false; } +ModuleNameLoc *ModuleNameLoc::Create(Preprocessor &PP, ModuleIdPath Path) { + assert(!Path.empty() && "expect at least one identifier in a module name"); + void *Mem = PP.getPreprocessorAllocator().Allocate( + totalSizeToAlloc(Path.size()), alignof(ModuleNameLoc)); + return new (Mem) ModuleNameLoc(Path); +} + +Token ModuleNameLoc::CreateAnnotToken(Preprocessor &PP, ModuleIdPath Path) { + auto *NameLoc = Create(PP, Path); + Token ModuleNameTok; + ModuleNameTok.startToken(); + ModuleNameTok.setKind(tok::annot_module_name); + ModuleNameTok.setAnnotationRange(NameLoc->getRange()); + ModuleNameTok.setAnnotationValue(static_cast(NameLoc)); + return ModuleNameTok; +} + +// We represent the primary and partition names as 'Paths' which are sections +// of the hierarchical access path for a clang module. However for C++20 +// the periods in a name are just another character, and we will need to +// flatten them into a string. +std::string ModuleNameLoc::stringFromModuleIdPath(ModuleIdPath Path) { + std::string Name; + if (Path.empty()) + return Name; + + for (auto &Piece : Path) { + assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid()); + if (!Name.empty()) + Name += "."; + Name += Piece.getIdentifierInfo()->getName(); + } + return Name; +} + +std::string ModuleNameLoc::str() const { + return stringFromModuleIdPath(getModuleIdPath()); +} + +void ModuleNameLoc::print(llvm::raw_ostream &OS) const { OS << str(); } + +bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, + SmallVectorImpl &Path, + bool AllowMacroExpansion) { + auto ConsumeToken = [&]() { + return AllowMacroExpansion ? Lex(Tok) : LexUnexpandedToken(Tok); + }; + + while (true) { + if (Tok.isNot(tok::identifier)) { + if (Tok.is(tok::code_completion)) { + CurLexer->cutOffLexing(); + Tok.setKind(tok::eof); + this->getCodeCompletionHandler()->CodeCompleteModuleImport(UseLoc, + Path); + } + Diag(Tok.getLocation(), diag::err_pp_expected_module_name) + << Path.empty(); + return true; + } + + // Record this part of the module path. + Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo()); + ConsumeToken(); + + if (Tok.isNot(tok::period)) + return false; + + ConsumeToken(); + } +} + +/// P1857R3: Modules Dependency Discovery +/// +/// At the start of phase 4 an import or module token is treated as starting a +/// directive and are converted to their respective keywords iff: +/// - After skipping horizontal whitespace are +/// - at the start of a logical line, or +/// - preceded by an 'export' at the start of the logical line. +/// - Are followed by an identifier pp token (before macro expansion), or +/// - <, ", or : (but not ::) pp tokens for 'import', or +/// - ; for 'module' +/// Otherwise the token is treated as an identifier. +bool Preprocessor::HandleModuleContextualKeyword( + Token &Result, bool TokAtPhysicalStartOfLine) { + if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword()) + return false; + + if (Result.is(tok::kw_export)) { + LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine}; + return false; + } + + if (LastTokenWasExportKeyword.isValid()) { + // The export keyword was not at the start of line, it's not a + // directive-introducing token. + if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine()) + return false; + // [cpp.pre]/1.4 + // export // not a preprocessing directive + // import foo; // preprocessing directive (ill-formed at phase + // 7) + if (TokAtPhysicalStartOfLine) + return false; + } else if (!TokAtPhysicalStartOfLine) + return false; + + bool SavedParsingPreprocessorDirective = + CurPPLexer->ParsingPreprocessorDirective; + CurPPLexer->ParsingPreprocessorDirective = true; + auto _ = llvm::make_scope_exit([&]() { + CurPPLexer->ParsingPreprocessorDirective = + SavedParsingPreprocessorDirective; + }); + + if (Result.getIdentifierInfo()->isModulesImport() && + isNextPPTokenOneOf(tok::raw_identifier, tok::less, tok::string_literal, + tok::colon)) { + Result.setKind(tok::kw_import); + ModuleImportLoc = Result.getLocation(); + IsAtImport = false; + return true; + } + + if (Result.getIdentifierInfo()->isModulesDeclaration() && + isNextPPTokenOneOf(tok::raw_identifier, tok::colon, tok::semi)) { + Result.setKind(tok::kw_module); + ModuleDeclLoc = Result.getLocation(); + return true; + } + + // Ok, it's an identifier. + return false; +} + /// Collect the tokens of a C++20 pp-import-suffix. void Preprocessor::CollectPpImportSuffix(SmallVectorImpl &Toks) { // FIXME: For error recovery, consider recognizing attribute syntax here // and terminating / diagnosing a missing semicolon if we find anything // else? (Can we leave that to the parser?) - unsigned BracketDepth = 0; while (true) { Toks.emplace_back(); Lex(Toks.back()); switch (Toks.back().getKind()) { - case tok::l_paren: case tok::l_square: case tok::l_brace: - ++BracketDepth; - break; - - case tok::r_paren: case tok::r_square: case tok::r_brace: - if (BracketDepth == 0) - return; - --BracketDepth; - break; - case tok::semi: - if (BracketDepth == 0) - return; - break; - + case tok::eod: case tok::eof: return; - default: break; } } } +// Allocate a holding buffer for a sequence of tokens and introduce it into +// the token stream. +void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef Toks) { + if (Toks.empty()) + return; + auto ToksCopy = std::make_unique(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), + /*DisableMacroExpansion*/ true, /*IsReinject*/ false); +} -/// Lex a token following the 'import' contextual keyword. +// Lex a token following the 'import' contextual keyword. /// /// pp-import: [C++20] /// import header-name pp-import-suffix[opt] ; @@ -1170,186 +1299,42 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. recomputeCurLexerKind(); - // Lex the next token. The header-name lexing rules are used at the start of - // a pp-import. - // - // For now, we only support header-name imports in C++20 mode. - // FIXME: Should we allow this in all language modes that support an import - // declaration as an extension? - if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { - if (LexHeaderName(Result)) - return true; - - if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { - std::string Name = ModuleDeclState.getPrimaryName().str(); - Name += ":"; - NamedModuleImportPath.emplace_back(Result.getLocation(), - getIdentifierInfo(Name)); - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - } else { - Lex(Result); - } - - // Allocate a holding buffer for a sequence of tokens and introduce it into - // the token stream. - auto EnterTokens = [this](ArrayRef Toks) { - auto ToksCopy = std::make_unique(Toks.size()); - std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); - EnterTokenStream(std::move(ToksCopy), Toks.size(), - /*DisableMacroExpansion*/ true, /*IsReinject*/ false); - }; - - bool ImportingHeader = Result.is(tok::header_name); - // Check for a header-name. SmallVector Suffix; - if (ImportingHeader) { - // Enter the header-name token into the token stream; a Lex action cannot - // both return a token and cache tokens (doing so would corrupt the token - // cache if the call to Lex comes from CachingLex / PeekAhead). - Suffix.push_back(Result); - - // Consume the pp-import-suffix and expand any macros in it now. We'll add - // it back into the token stream later. - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not a pp-import after all. - EnterTokens(Suffix); - return false; - } - - // C++2a [cpp.module]p1: - // The ';' preprocessing-token terminating a pp-import shall not have - // been produced by macro replacement. - SourceLocation SemiLoc = Suffix.back().getLocation(); - if (SemiLoc.isMacroID()) - Diag(SemiLoc, diag::err_header_import_semi_in_macro); - - // Reconstitute the import token. - Token ImportTok; - ImportTok.startToken(); - ImportTok.setKind(tok::kw_import); - ImportTok.setLocation(ModuleImportLoc); - ImportTok.setIdentifierInfo(getIdentifierInfo("import")); - ImportTok.setLength(6); - - auto Action = HandleHeaderIncludeOrImport( - /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); - switch (Action.Kind) { - case ImportAction::None: - break; - - case ImportAction::ModuleBegin: - // Let the parser know we're textually entering the module. - Suffix.emplace_back(); - Suffix.back().startToken(); - Suffix.back().setKind(tok::annot_module_begin); - Suffix.back().setLocation(SemiLoc); - Suffix.back().setAnnotationEndLoc(SemiLoc); - Suffix.back().setAnnotationValue(Action.ModuleForHeader); - [[fallthrough]]; - - case ImportAction::ModuleImport: - case ImportAction::HeaderUnitImport: - case ImportAction::SkippedModuleImport: - // We chose to import (or textually enter) the file. Convert the - // header-name token into a header unit annotation token. - Suffix[0].setKind(tok::annot_header_unit); - Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); - Suffix[0].setAnnotationValue(Action.ModuleForHeader); - // FIXME: Call the moduleImport callback? - break; - case ImportAction::Failure: - assert(TheModuleLoader.HadFatalFailure && - "This should be an early exit only to a fatal error"); - Result.setKind(tok::eof); - CurLexer->cutOffLexing(); - EnterTokens(Suffix); - return true; - } - - EnterTokens(Suffix); + SmallVector Path; + Lex(Result); + if (LexModuleNameContinue(Result, ModuleImportLoc, Path)) return false; - } - // The token sequence - // - // import identifier (. identifier)* - // - // indicates a module import directive. We already saw the 'import' - // contextual keyword, so now we're looking for the identifiers. - if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { - // We expected to see an identifier here, and we did; continue handling - // identifiers. - NamedModuleImportPath.emplace_back(Result.getLocation(), - Result.getIdentifierInfo()); - ModuleImportExpectsIdentifier = false; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we're expecting a '.' or a ';', and we got a '.', then wait until we - // see the next identifier. (We can also see a '[[' that begins an - // attribute-specifier-seq here under the Standard C++ Modules.) - if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we didn't recognize a module name at all, this is not a (valid) import. - if (NamedModuleImportPath.empty() || Result.is(tok::eof)) - return true; + Suffix.push_back(ModuleNameLoc::CreateAnnotToken(*this, Path)); + Suffix.push_back(Result); // Consume the pp-import-suffix and expand any macros in it now, if we're not // at the semicolon already. - SourceLocation SemiLoc = Result.getLocation(); - if (Result.isNot(tok::semi)) { - Suffix.push_back(Result); - CollectPpImportSuffix(Suffix); + SourceLocation SemiLoc = Suffix.back().getLocation(); + if (Suffix.back().isNot(tok::semi)) { + if (Result.isNot(tok::eof)) + CollectPpImportSuffix(Suffix); if (Suffix.back().isNot(tok::semi)) { // This is not an import after all. - EnterTokens(Suffix); + EnterModuleSuffixTokenStream(Suffix); return false; } SemiLoc = Suffix.back().getLocation(); } - // Under the standard C++ Modules, the dot is just part of the module name, - // and not a real hierarchy separator. Flatten such module names now. - // - // FIXME: Is this the right level to be performing this transformation? - std::string FlatModuleName; - if (getLangOpts().CPlusPlusModules) { - for (auto &Piece : NamedModuleImportPath) { - // If the FlatModuleName ends with colon, it implies it is a partition. - if (!FlatModuleName.empty() && FlatModuleName.back() != ':') - FlatModuleName += "."; - FlatModuleName += Piece.getIdentifierInfo()->getName(); - } - SourceLocation FirstPathLoc = NamedModuleImportPath[0].getLoc(); - NamedModuleImportPath.clear(); - NamedModuleImportPath.emplace_back(FirstPathLoc, - getIdentifierInfo(FlatModuleName)); - } - Module *Imported = nullptr; - // We don't/shouldn't load the standard c++20 modules when preprocessing. - if (getLangOpts().Modules && !isInImportingCXXNamedModules()) { - Imported = TheModuleLoader.loadModule(ModuleImportLoc, - NamedModuleImportPath, - Module::Hidden, + if (getLangOpts().Modules) { + Imported = TheModuleLoader.loadModule(ModuleImportLoc, Path, Module::Hidden, /*IsInclusionDirective=*/false); if (Imported) makeModuleVisible(Imported, SemiLoc); } if (Callbacks) - Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported); + Callbacks->moduleImport(ModuleImportLoc, Path, Imported); if (!Suffix.empty()) { - EnterTokens(Suffix); + EnterModuleSuffixTokenStream(Suffix); return false; } return true; diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp index 05f4203bd722b..f94caee24dc11 100644 --- a/clang/lib/Lex/TokenConcatenation.cpp +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -161,7 +161,8 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const { // No space is required between header unit name in quote and semi. - if (PrevTok.is(tok::annot_header_unit) && Tok.is(tok::semi)) + if (PrevTok.isOneOf(tok::annot_header_unit, tok::annot_module_name) && + Tok.is(tok::semi)) return false; // Conservatively assume that every annotation token that has a printable @@ -197,11 +198,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, if (Tok.isAnnotation()) { // Modules annotation can show up when generated automatically for includes. assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin, - tok::annot_module_end, tok::annot_embed) && + tok::annot_module_end, tok::annot_embed, + tok::annot_module_name) && "unexpected annotation in AvoidConcat"); ConcatInfo = 0; - if (Tok.is(tok::annot_embed)) + if (Tok.isOneOf(tok::annot_embed, tok::annot_module_name)) return true; } diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 47f4134fb1465..676fbe6f98f44 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -699,7 +699,9 @@ bool TokenLexer::Lex(Token &Tok) { HasLeadingSpace = false; // Handle recursive expansion! - if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { + if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr && + (!PP.getLangOpts().CPlusPlusModules || + !Tok.isModuleContextualKeyword())) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. IdentifierInfo *II = Tok.getIdentifierInfo(); diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 18f399aca59e8..76297386e0f1e 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/Basic/DiagnosticParse.h" #include "clang/Basic/StackExhaustionHandler.h" +#include "clang/Lex/ModuleLoader.h" #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" @@ -517,8 +518,6 @@ void Parser::Initialize() { Ident_trivially_relocatable_if_eligible = nullptr; Ident_replaceable_if_eligible = nullptr; Ident_GNU_final = nullptr; - Ident_import = nullptr; - Ident_module = nullptr; Ident_super = &PP.getIdentifierTable().get("super"); @@ -574,11 +573,6 @@ void Parser::Initialize() { PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block); } - if (getLangOpts().CPlusPlusModules) { - Ident_import = PP.getIdentifierInfo("import"); - Ident_module = PP.getIdentifierInfo("module"); - } - Actions.Initialize(); // Prime the lexer look-ahead. @@ -626,24 +620,8 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, switch (NextToken().getKind()) { case tok::kw_module: goto module_decl; - - // Note: no need to handle kw_import here. We only form kw_import under - // the Standard C++ Modules, and in that case 'export import' is parsed as - // an export-declaration containing an import-declaration. - - // Recognize context-sensitive C++20 'export module' and 'export import' - // declarations. - case tok::identifier: { - IdentifierInfo *II = NextToken().getIdentifierInfo(); - if ((II == Ident_module || II == Ident_import) && - GetLookAheadToken(2).isNot(tok::coloncolon)) { - if (II == Ident_module) - goto module_decl; - else - goto import_decl; - } - break; - } + case tok::kw_import: + goto import_decl; default: break; @@ -713,21 +691,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, //else don't tell Sema that we ended parsing: more input might come. return true; - case tok::identifier: - // C++2a [basic.link]p3: - // A token sequence beginning with 'export[opt] module' or - // 'export[opt] import' and not immediately followed by '::' - // is never interpreted as the declaration of a top-level-declaration. - if ((Tok.getIdentifierInfo() == Ident_module || - Tok.getIdentifierInfo() == Ident_import) && - NextToken().isNot(tok::coloncolon)) { - if (Tok.getIdentifierInfo() == Ident_module) - goto module_decl; - else - goto import_decl; - } - break; - default: break; } @@ -920,8 +883,9 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, case tok::kw_import: { Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module; if (getLangOpts().CPlusPlusModules) { - llvm_unreachable("not expecting a c++20 import here"); - ProhibitAttributes(Attrs); + Diag(Tok, diag::err_unexpected_module_import_decl) << /*IsImport*/ true; + SkipUntil(tok::semi); + return nullptr; } SingleDecl = ParseModuleImport(SourceLocation(), IS); } break; @@ -1013,7 +977,7 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, return nullptr; case tok::kw_module: - Diag(Tok, diag::err_unexpected_module_decl); + Diag(Tok, diag::err_unexpected_module_import_decl) << /*IsImport*/ false; SkipUntil(tok::semi); return nullptr; @@ -2236,6 +2200,11 @@ void Parser::CodeCompleteNaturalLanguage() { Actions.CodeCompletion().CodeCompleteNaturalLanguage(); } +void Parser::CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) { + Actions.CodeCompletion().CodeCompleteModuleImport(ImportLoc, Path); +} + bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) { assert((Tok.is(tok::kw___if_exists) || Tok.is(tok::kw___if_not_exists)) && "Expected '__if_exists' or '__if_not_exists'"); @@ -2390,20 +2359,20 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { return Actions.ActOnPrivateModuleFragmentDecl(ModuleLoc, PrivateLoc); } - SmallVector Path; - if (ParseModuleName(ModuleLoc, Path, /*IsImport*/ false)) + ModuleNameLoc *Path = nullptr; + if (Tok.isNot(tok::annot_module_name)) return nullptr; + Path = Tok.getAnnotationValueAs(); + ConsumeAnnotationToken(); // Parse the optional module-partition. - SmallVector Partition; + ModuleNameLoc *Partition = nullptr; if (Tok.is(tok::colon)) { - SourceLocation ColonLoc = ConsumeToken(); - if (!getLangOpts().CPlusPlusModules) - Diag(ColonLoc, diag::err_unsupported_module_partition) - << SourceRange(ColonLoc, Partition.back().getLoc()); - // Recover by ignoring the partition name. - else if (ParseModuleName(ModuleLoc, Partition, /*IsImport*/ false)) + ConsumeToken(); + if (Tok.isNot(tok::annot_module_name)) return nullptr; + Partition = Tok.getAnnotationValueAs(); + ConsumeAnnotationToken(); } // We don't support any module attributes yet; just parse them and diagnose. @@ -2416,8 +2385,10 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { ExpectAndConsumeSemi(diag::err_module_expected_semi); - return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, Partition, - ImportState, Introducer.isFirstPPToken()); + return Actions.ActOnModuleDecl( + StartLoc, ModuleLoc, MDK, Path->getModuleIdPath(), + Partition ? Partition->getModuleIdPath() : ModuleIdPath{}, ImportState, + Introducer.isFirstPPToken()); } Decl *Parser::ParseModuleImport(SourceLocation AtLoc, @@ -2434,7 +2405,7 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, SourceLocation ImportLoc = ConsumeToken(); // For C++20 modules, we can have "name" or ":Partition name" as valid input. - SmallVector Path; + ModuleNameLoc *Path = nullptr; bool IsPartition = false; Module *HeaderUnit = nullptr; if (Tok.is(tok::header_name)) { @@ -2447,18 +2418,17 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, HeaderUnit = reinterpret_cast(Tok.getAnnotationValue()); ConsumeAnnotationToken(); } else if (Tok.is(tok::colon)) { - SourceLocation ColonLoc = ConsumeToken(); - if (!getLangOpts().CPlusPlusModules) - Diag(ColonLoc, diag::err_unsupported_module_partition) - << SourceRange(ColonLoc, Path.back().getLoc()); - // Recover by leaving partition empty. - else if (ParseModuleName(ColonLoc, Path, /*IsImport*/ true)) + ConsumeToken(); + if (Tok.isNot(tok::annot_module_name)) return nullptr; - else - IsPartition = true; + IsPartition = true; + Path = Tok.getAnnotationValueAs(); + ConsumeAnnotationToken(); } else { - if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true)) + if (Tok.isNot(tok::annot_module_name)) return nullptr; + Path = Tok.getAnnotationValueAs(); + ConsumeAnnotationToken(); } ParsedAttributes Attrs(AttrFactory); @@ -2526,7 +2496,7 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, if (HeaderUnit) Import = Actions.ActOnModuleImport(StartLoc, ExportLoc, ImportLoc, HeaderUnit); - else if (!Path.empty()) + else if (Path) Import = Actions.ActOnModuleImport(StartLoc, ExportLoc, ImportLoc, Path, IsPartition); if (Import.isInvalid()) diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 7c982bcd63d73..174ae33e880a8 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -13,7 +13,9 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTMutationListener.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/ModuleLoader.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/ParsedAttr.h" #include "clang/Sema/SemaInternal.h" @@ -57,23 +59,6 @@ static void checkModuleImportContext(Sema &S, Module *M, } } -// We represent the primary and partition names as 'Paths' which are sections -// of the hierarchical access path for a clang module. However for C++20 -// the periods in a name are just another character, and we will need to -// flatten them into a string. -static std::string stringFromPath(ModuleIdPath Path) { - std::string Name; - if (Path.empty()) - return Name; - - for (auto &Piece : Path) { - if (!Name.empty()) - Name += "."; - Name += Piece.getIdentifierInfo()->getName(); - } - return Name; -} - /// Helper function for makeTransitiveImportsVisible to decide whether /// the \param Imported module unit is in the same module with the \param /// CurrentModule. @@ -303,7 +288,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // We were asked to compile a module interface unit but this is a module // implementation unit. Diag(ModuleLoc, diag::err_module_interface_implementation_mismatch) - << FixItHint::CreateInsertion(ModuleLoc, "export "); + << FixItHint::CreateInsertion(ModuleLoc, "export "); MDK = ModuleDeclKind::Interface; break; @@ -369,10 +354,10 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // Flatten the dots in a module name. Unlike Clang's hierarchical module map // modules, the dots here are just another character that can appear in a // module name. - std::string ModuleName = stringFromPath(Path); + std::string ModuleName = ModuleNameLoc::stringFromModuleIdPath(Path); if (IsPartition) { ModuleName += ":"; - ModuleName += stringFromPath(Partition); + ModuleName += ModuleNameLoc::stringFromModuleIdPath(Partition); } // If a module name was explicitly specified on the command line, it must be // correct. @@ -385,7 +370,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, << getLangOpts().CurrentModule; return nullptr; } - const_cast(getLangOpts()).CurrentModule = ModuleName; + const_cast(getLangOpts()).CurrentModule = ModuleName; auto &Map = PP.getHeaderSearchInfo().getModuleMap(); Module *Mod; // The module we are creating. @@ -430,7 +415,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, Interface = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, Module::AllVisible, /*IsInclusionDirective=*/false); - const_cast(getLangOpts()).CurrentModule = ModuleName; + const_cast(getLangOpts()).CurrentModule = ModuleName; if (!Interface) { Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName; @@ -574,8 +559,8 @@ Sema::ActOnPrivateModuleFragmentDecl(SourceLocation ModuleLoc, DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, SourceLocation ExportLoc, - SourceLocation ImportLoc, ModuleIdPath Path, - bool IsPartition) { + SourceLocation ImportLoc, + ModuleNameLoc *PathLoc, bool IsPartition) { assert((!IsPartition || getLangOpts().CPlusPlusModules) && "partition seen in non-C++20 code?"); @@ -584,6 +569,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, IdentifierLoc ModuleNameLoc; std::string ModuleName; + ModuleIdPath Path; if (IsPartition) { // We already checked that we are in a module purview in the parser. assert(!ModuleScopes.empty() && "in a module purview, but no module?"); @@ -592,15 +578,17 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, // otherwise, the name of the importing named module. ModuleName = NamedMod->getPrimaryModuleInterfaceName().str(); ModuleName += ":"; - ModuleName += stringFromPath(Path); + ModuleName += PathLoc->str(); ModuleNameLoc = - IdentifierLoc(Path[0].getLoc(), PP.getIdentifierInfo(ModuleName)); + IdentifierLoc(PathLoc->getBeginLoc(), PP.getIdentifierInfo(ModuleName)); Path = ModuleIdPath(ModuleNameLoc); } else if (getLangOpts().CPlusPlusModules) { - ModuleName = stringFromPath(Path); + ModuleName = PathLoc->str(); ModuleNameLoc = - IdentifierLoc(Path[0].getLoc(), PP.getIdentifierInfo(ModuleName)); + IdentifierLoc(PathLoc->getBeginLoc(), PP.getIdentifierInfo(ModuleName)); Path = ModuleIdPath(ModuleNameLoc); + } else { + Path = PathLoc->getModuleIdPath(); } // Diagnose self-import before attempting a load. diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index fa86d714ff69a..8546f64e3a31a 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -653,7 +653,7 @@ void ModuleDepCollectorPP::InclusionDirective( void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) { - if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) { + if (MDC.ScanInstance.getPreprocessor().isImportingCXXNamedModules()) { P1689ModuleInfo RequiredModule; RequiredModule.ModuleName = Path[0].getIdentifierInfo()->getName().str(); RequiredModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule; diff --git a/clang/test/CXX/basic/basic.link/p3.cpp b/clang/test/CXX/basic/basic.link/p3.cpp index 01202264d2591..1cf2b750a8a81 100644 --- a/clang/test/CXX/basic/basic.link/p3.cpp +++ b/clang/test/CXX/basic/basic.link/p3.cpp @@ -1,35 +1,18 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s -// RUN: %clang_cc1 -std=c++2a -verify %s -DIMPORT_ERROR=1 -// RUN: %clang_cc1 -std=c++2a -verify %s -DIMPORT_ERROR=2 +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -std=c++2a -verify %t/M.cppm +// RUN: %clang_cc1 -std=c++2a -verify %t/ImportError1.cppm +// RUN: %clang_cc1 -std=c++2a -verify %t/ImportError2.cppm + +//--- M.cppm module; -#if IMPORT_ERROR != 2 struct import { struct inner {}; }; -#endif struct module { struct inner {}; }; - constexpr int n = 123; export module m; // #1 - -// Import errors are fatal, so we test them in isolation. -#if IMPORT_ERROR == 1 -import x = {}; // expected-error {{expected ';' after module name}} - // expected-error@-1 {{module 'x' not found}} - -#elif IMPORT_ERROR == 2 -struct X; -template struct import; -template<> struct import { - static X y; -}; - -// This is not valid because the 'import ' is a pp-import, even though it -// grammatically can't possibly be an import declaration. -struct X {} import::y; // expected-error {{'n' file not found}} - -#else module y = {}; // expected-error {{multiple module declarations}} expected-error 2{{}} // expected-note@#1 {{previous module declaration}} @@ -40,8 +23,8 @@ import::inner xi = {}; module::inner yi = {}; namespace N { - module a; - import b; + module a; // expected-error {{module declaration can only appear at the top level}} + import b; // expected-error {{import declaration can only appear at the top level}} } extern "C++" module cxxm; @@ -51,4 +34,34 @@ template module module_var_template; // This is a variable named 'import' that shadows the type 'import' above. struct X {} import; -#endif + +//--- ImportError1.cppm +module; + +struct import { struct inner {}; }; +struct module { struct inner {}; }; + +constexpr int n = 123; + +export module m; // #1 + +import x = {}; // expected-error {{expected ';' after module name}} + // expected-error@-1 {{module 'x' not found}} + +//--- ImportError2.cppm +// expected-no-diagnostics +module; + +struct module { struct inner {}; }; + +constexpr int n = 123; + +export module m; // #1 + +struct X; +template struct import; +template<> struct import { + static X y; +}; + +struct X {} import::y; diff --git a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp index fd0038b3f7745..a57919f48afdd 100644 --- a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp +++ b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp @@ -107,4 +107,4 @@ void test_late() { // expected-error@-2 {{undeclared identifier}} internal_private = 1; // expected-error {{use of undeclared identifier 'internal_private'}} -} \ No newline at end of file +} diff --git a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp index 0e0e5fec6e9d8..f51066806947f 100644 --- a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp +++ b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp @@ -15,7 +15,7 @@ import ; // CHECK: import ; import ; -// CHECK: foo; import ; +// CHECK: foo; import ; foo; import ; // CHECK: foo import ; @@ -45,7 +45,7 @@ export export import ; import ; UNBALANCED_PAREN -// CHECK: import ; +// CHECK: import ; import ; ) @@ -57,14 +57,19 @@ import ; // CHECK: import ; import HEADER; -// CHECK: import ; +// CHECK: {{^}}foo{{$}} +// CHECK-NEXT: {{^}} bar{{$}} +// CHECK-NEXT: {{^}}>;{{$}} import < foo bar >; // CHECK: import{{$}} -// CHECK: {{^}}; +// CHECK-NEXT: {{^}}<{{$}} +// CHECK-NEXT: {{^}}foo{{$}} +// CHECK-NEXT: {{^}} bar{{$}} +// CHECK-NEXT: {{^}}>;{{$}} import < foo @@ -72,7 +77,7 @@ foo >; // CHECK: import{{$}} -// CHECK: {{^}}; +// CHECK: {{^}}; import ; diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm index f65f050a3c7bd..53fd3ea29eccb 100644 --- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm +++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm @@ -44,8 +44,9 @@ import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applie import x [[blarg::noreturn]]; // expected-warning-re {{unknown attribute 'blarg::noreturn' ignored{{.*}}}} import x.y; -import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import x.; // expected-error {{expected identifier after '.' in module name}} +import .x; // expected-error {{unknown type name 'import'}} \ + // expected-error {{cannot use dot operator on a type}} import blarg; // expected-error {{module 'blarg' not found}} @@ -62,8 +63,9 @@ import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applie import x [[blarg::noreturn]]; // expected-warning-re {{unknown attribute 'blarg::noreturn' ignored{{.*}}}} import x.y; -import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import x.; // expected-error {{expected identifier after '.' in module name}} +import .x; // expected-error {{unknown type name 'import'}} \ + // expected-error {{cannot use dot operator on a type}} import blarg; // expected-error {{module 'blarg' not found}} diff --git a/clang/test/CXX/module/module.interface/p1.cpp b/clang/test/CXX/module/module.interface/p1.cpp index 54a201e502323..9b3ec1348f9cf 100644 --- a/clang/test/CXX/module/module.interface/p1.cpp +++ b/clang/test/CXX/module/module.interface/p1.cpp @@ -1,28 +1,19 @@ -// RUN: %clang_cc1 -std=c++2a %s -DERRORS -verify -// RUN: %clang_cc1 -std=c++2a %s -emit-module-interface -o %t.pcm -// RUN: %clang_cc1 -std=c++2a %s -fmodule-file=M=%t.pcm -DIMPLEMENTATION -verify -Db=b2 -Dc=c2 +// RUN: rm -rf %t +// RUN: split-file %s %t -module; +// RUN: %clang_cc1 -std=c++2a %t/errors.cppm -verify +// RUN: %clang_cc1 -std=c++2a %t/M.cppm -emit-module-interface -o %t/M.pcm +// RUN: %clang_cc1 -std=c++2a %t/impl.cppm -fmodule-file=M=%t/M.pcm -verify -#ifdef ERRORS +//--- errors.cppm +module; export int a; // expected-error {{export declaration can only be used within a module purview}} -#endif - -#ifndef IMPLEMENTATION -export -#else -// expected-error@#1 {{export declaration can only be used within a module purview}} -// expected-error@#2 {{export declaration can only be used within a module purview}} -// expected-note@+2 1+{{add 'export'}} -#endif -module M; - +export module M; export int b; // #1 namespace N { export int c; // #2 } -#ifdef ERRORS namespace { // expected-note 2{{anonymous namespace begins here}} export int d1; // expected-error {{export declaration appears within anonymous namespace}} namespace X { @@ -35,4 +26,19 @@ export { export int f; } // expected-error {{within another export declaration}} module :private; // expected-note {{private module fragment begins here}} export int priv; // expected-error {{export declaration cannot be used in a private module fragment}} -#endif + +//--- M.cppm +export module M; +export int b; +namespace N { + export int c; +} + +//--- impl.cppm +module M; // #M + +export int b2; // expected-error {{export declaration can only be used within a module purview}} +namespace N { + export int c2; // expected-error {{export declaration can only be used within a module purview}} +} +// expected-note@#M 2+{{add 'export'}} diff --git a/clang/test/Modules/pr121066.cpp b/clang/test/Modules/pr121066.cpp index e92a81c53d683..849488e938d50 100644 --- a/clang/test/Modules/pr121066.cpp +++ b/clang/test/Modules/pr121066.cpp @@ -1,4 +1,3 @@ // RUN: %clang_cc1 -std=c++20 -fsyntax-only %s -verify -import mod // expected-error {{expected ';' after module name}} - // expected-error@-1 {{module 'mod' not found}} +import mod // expected-error {{'import' directive must end with a ';' on the same line}} diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 07450a0c59ec6..042e8bd80a50b 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -193,7 +193,8 @@ TEST_P(ASTMatchersTest, ExportDecl) { if (!GetParam().isCXX20OrLater()) { return; } - const std::string moduleHeader = "module;export module ast_matcher_test;"; + const std::string moduleHeader = + "module;\n export module ast_matcher_test;\n"; EXPECT_TRUE(matches(moduleHeader + "export void foo();", exportDecl(has(functionDecl())))); EXPECT_TRUE(matches(moduleHeader + "export { void foo(); int v; }", diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index 61f74929c1e98..9398175d4336f 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -1092,11 +1092,11 @@ ort \ ASSERT_FALSE( minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;" - "exp\\\nort import:l[[rename]];" - "import<<=3;import a b d e d e f e;" - "import foo[[no_unique_address]];import foo();" - "import f(:sefse);import f(->a=3);" - "\n", + "\nexp\\\nort import:l[[rename]];" + "\nimport<<=3;\nimport a b d e d e f e;" + "\nimport foo[[no_unique_address]];\nimport foo();" + "\nimport f(:sefse);\nimport f(->a=3);" + "\n\n", Out.data()); ASSERT_EQ(Directives.size(), 11u); EXPECT_EQ(Directives[0].Kind, pp_include); diff --git a/clang/unittests/Lex/ModuleDeclStateTest.cpp b/clang/unittests/Lex/ModuleDeclStateTest.cpp index 6ecba4de3187c..052dccd6fa8d5 100644 --- a/clang/unittests/Lex/ModuleDeclStateTest.cpp +++ b/clang/unittests/Lex/ModuleDeclStateTest.cpp @@ -40,7 +40,7 @@ class CheckNamedModuleImportingCB : public PPCallbacks { void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override { ASSERT_TRUE(NextCheckingIndex < IsImportingNamedModulesAssertions.size()); - EXPECT_EQ(PP.isInImportingCXXNamedModules(), + EXPECT_EQ(PP.isImportingCXXNamedModules(), IsImportingNamedModulesAssertions[NextCheckingIndex]); NextCheckingIndex++;