diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 0ec1cb4d0c5d8..55661e8d4256f 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -350,8 +350,8 @@ class Preprocessor { /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; - /// First pp-token in current translation unit. - std::optional FirstPPToken; + /// First pp-token source location in current translation unit. + SourceLocation FirstPPTokenLoc; /// A position within a C++20 import-seq. class StdCXXImportSeq { @@ -1769,20 +1769,13 @@ class Preprocessor { std::optional LexEmbedParameters(Token &Current, bool ForHasEmbed); - /// Whether the preprocessor already seen the first pp-token in main file. - bool hasSeenMainFileFirstPPToken() const { return FirstPPToken.has_value(); } - - /// Record first pp-token and check if it has a Token::FirstPPToken flag. - void HandleMainFileFirstPPToken(const Token &Tok) { - if (!hasSeenMainFileFirstPPToken() && Tok.isFirstPPToken() && - SourceMgr.isWrittenInMainFile(Tok.getLocation())) - FirstPPToken = Tok; + /// Get the start location of the first pp-token in main file. + SourceLocation getMainFileFirstPPTokenLoc() const { + assert(FirstPPTokenLoc.isValid() && + "Did not see the first pp-token in the main file"); + return FirstPPTokenLoc; } - Token getMainFileFirstPPToken() const { - assert(FirstPPToken && "First main file pp-token doesn't exists"); - return *FirstPPToken; - } bool LexAfterModuleImport(Token &Result); void CollectPpImportSuffix(SmallVectorImpl &Toks); diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 777b4e6266c71..7ac933d8f9d45 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,8 +139,9 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If the next token lexed will pop this macro off the expansion stack, - /// return std::nullopt, otherwise return the next unexpanded token. + /// If TokenLexer::isAtEnd returns true(the next token lexed will pop this + /// macro off the expansion stack), return std::nullopt, otherwise return the + /// next unexpanded token. std::optional peekNextPPToken() const; /// Lex and return a token from this macro stream. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index f4d16ecce393c..42ea7edf3aaad 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3228,6 +3228,7 @@ std::optional Lexer::peekNextPPToken() { bool atStartOfLine = IsAtStartOfLine; bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; bool leadingSpace = HasLeadingSpace; + bool isFirstPPToken = IsFirstPPToken; Token Tok; Lex(Tok); @@ -3238,7 +3239,7 @@ std::optional Lexer::peekNextPPToken() { HasLeadingSpace = leadingSpace; IsAtStartOfLine = atStartOfLine; IsAtPhysicalStartOfLine = atPhysicalStartOfLine; - + IsFirstPPToken = isFirstPPToken; // Restore the lexer back to non-skipping mode. LexingRawMode = false; @@ -3740,10 +3741,6 @@ bool Lexer::Lex(Token &Result) { bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); - - if (returnedToken && Result.isFirstPPToken() && PP && - !PP->hasSeenMainFileFirstPPToken()) - PP->HandleMainFileFirstPPToken(Result); return returnedToken; } @@ -4547,8 +4544,6 @@ const char *Lexer::convertDependencyDirectiveToken( Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length); BufferPtr = TokPtr + DDTok.Length; - if (PP && !PP->hasSeenMainFileFirstPPToken() && Result.isFirstPPToken()) - PP->HandleMainFileFirstPPToken(Result); return TokPtr; } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index c8974e5a3528c..be061f462f65a 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1242,9 +1242,6 @@ void Preprocessor::HandleDirective(Token &Result) { // pp-directive. bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); - if (!hasSeenMainFileFirstPPToken()) - HandleMainFileFirstPPToken(Result); - // Save the '#' token in case we need to return it later. Token SavedHash = Result; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 709cf3bb87c8e..b8b91e32179af 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -431,9 +431,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // to disable the optimization in this case. if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro(); - if (!hasSeenMainFileFirstPPToken()) - HandleMainFileFirstPPToken(Identifier); - // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. if (MI->isBuiltinMacro()) { if (Callbacks) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 7fecbe9eee53c..9329f9fd4460a 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -566,6 +566,21 @@ void Preprocessor::EnterMainSourceFile() { // #imported, it won't be re-entered. if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID)) markIncluded(*FE); + + // Record the first PP token in the main file. This is used to generate + // better diagnostics for C++ modules. + // + // // This is a comment. + // #define FOO int // note: add 'module;' to the start of the file + // ^ FirstPPToken // to introduce a global module fragment. + // + // export module M; // error: module declaration must occur + // // at the start of the translation unit. + if (getLangOpts().CPlusPlusModules) { + std::optional FirstPPTok = CurLexer->peekNextPPToken(); + if (FirstPPTok && FirstPPTok->isFirstPPToken()) + FirstPPTokenLoc = FirstPPTok->getLocation(); + } } // Preprocess Predefines to populate the initial preprocessor state. diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index fe70ce3fba6a5..7c982bcd63d73 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -337,11 +337,9 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // tokens in a file (excluding the global module fragment.). if (getLangOpts().CPlusPlusModules && !IntroducerIsFirstPPToken && !SeenGMF) { Diag(ModuleLoc, diag::err_module_decl_not_at_start); - SourceLocation BeginLoc = PP.getMainFileFirstPPToken().getLocation(); - if (BeginLoc.isValid()) { - Diag(BeginLoc, diag::note_global_module_introducer_missing) - << FixItHint::CreateInsertion(BeginLoc, "module;\n"); - } + SourceLocation BeginLoc = PP.getMainFileFirstPPTokenLoc(); + Diag(BeginLoc, diag::note_global_module_introducer_missing) + << FixItHint::CreateInsertion(BeginLoc, "module;\n"); } // C++23 [module.unit]p1: ... The identifiers module and import shall not diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index 33c8abbec35a3..2adb55484be88 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -49,8 +49,7 @@ class LexerTest : public ::testing::Test { } std::unique_ptr CreatePP(StringRef Source, - TrivialModuleLoader &ModLoader, - StringRef PreDefines = {}) { + TrivialModuleLoader &ModLoader) { std::unique_ptr Buf = llvm::MemoryBuffer::getMemBuffer(Source); SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); @@ -63,7 +62,7 @@ class LexerTest : public ::testing::Test { /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); if (!PreDefines.empty()) - PP->setPredefines(PreDefines.str()); + PP->setPredefines(PreDefines); PP->Initialize(*Target); PP->EnterMainSourceFile(); return PP; @@ -111,6 +110,7 @@ class LexerTest : public ::testing::Test { std::shared_ptr TargetOpts; IntrusiveRefCntPtr Target; std::unique_ptr PP; + std::string PreDefines; }; TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { @@ -773,6 +773,7 @@ TEST(LexerPreambleTest, PreambleBounds) { } TEST_F(LexerTest, CheckFirstPPToken) { + LangOpts.CPlusPlusModules = true; { TrivialModuleLoader ModLoader; auto PP = CreatePP("// This is a comment\n" @@ -781,9 +782,8 @@ TEST_F(LexerTest, CheckFirstPPToken) { Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::kw_int)); + EXPECT_TRUE(PP->getMainFileFirstPPTokenLoc().isValid()); + EXPECT_EQ(PP->getMainFileFirstPPTokenLoc(), Tok.getLocation()); } { TrivialModuleLoader ModLoader; @@ -794,24 +794,28 @@ TEST_F(LexerTest, CheckFirstPPToken) { Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::hash)); + EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok, + PP->getSourceManager(), PP->getLangOpts(), + /*IgnoreWhiteSpace=*/false)); + EXPECT_TRUE(Tok.isFirstPPToken()); + EXPECT_TRUE(Tok.is(tok::hash)); } { + PreDefines = "#define FOO int\n"; TrivialModuleLoader ModLoader; auto PP = CreatePP("// This is a comment\n" "FOO a;", - ModLoader, "#define FOO int\n"); + ModLoader); Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::identifier)); - EXPECT_TRUE( - PP->getMainFileFirstPPToken().getIdentifierInfo()->isStr("FOO")); + EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok, + PP->getSourceManager(), PP->getLangOpts(), + /*IgnoreWhiteSpace=*/false)); + EXPECT_TRUE(Tok.isFirstPPToken()); + EXPECT_TRUE(Tok.is(tok::raw_identifier)); + EXPECT_TRUE(Tok.getRawIdentifier() == "FOO"); } } } // anonymous namespace