r-lib · MichaelChirico · Dec 4, 2023 · Dec 1, 2023 · Dec 1, 2023 · Dec 1, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -31,6 +31,7 @@
 * `unnecessary_lambda_linter` is extended to encourage vectorized comparisons where possible, e.g. `sapply(x, sum) > 0` instead of `sapply(x, function(x) sum(x) > 0)` (part of #884, @MichaelChirico). Toggle this behavior with argument `allow_comparison`.
 * `backport_linter()` is slightly faster by moving expensive computations outside the linting function (#2339, #2348, @AshesITR and @MichaelChirico).
 * `Linter()` has a new argument `linter_level` (default `NA`). This is used by `lint()` to more efficiently check for expression levels than the idiom `if (!is_lint_level(...)) { return(list()) }` (#2351, @AshesITR).
+* `string_boundary_linter()` recognizes regular expression calls like `grepl("^abc$", x)` that can be replaced by using `==` instead (#1613, @MichaelChirico).
 * `unreachable_code_linter()` has an argument `allow_comment_regex` for customizing which "terminal" comments to exclude (#2327, @MichaelChirico). `# nolint end` comments are always excluded, as are {covr} exclusions (e.g. `# nocov end`) by default.
 * `format()` and `print()` methods for `lint` and `lints` classes get a new option `width` to control the printing width of lint messages (#1884, @MichaelChirico). The default is controlled by a new option `lintr.format_width`; if unset, no wrapping occurs (matching earlier behavior).
 

diff --git a/R/string_boundary_linter.R b/R/string_boundary_linter.R
@@ -66,6 +66,11 @@ string_boundary_linter <- function(allow_grepl = FALSE) {
     /following-sibling::expr[2]
     /STR_CONST[ {str_cond} ]
   ")
+  str_detect_message_map <- c(
+    both = "Use == to check for an exact string match.",
+    initial = "Use startsWith() to detect a fixed initial substring.",
+    terminal = "Use endsWith() to detect a fixed terminal substring."
+  )
 
   if (!allow_grepl) {
     grepl_xpath <- glue("
@@ -84,16 +89,34 @@ string_boundary_linter <- function(allow_grepl = FALSE) {
       /expr[2]
       /STR_CONST[ {str_cond} ]
     ")
+    grepl_lint_fmt <- paste(
+      "Use !is.na(x) & %1$s(x, string) to detect a fixed %2$s substring, or,",
+      "if missingness is not a concern, just %1$s()."
+    )
+    grepl_message_map <- c(
+      both = "Use == to check for an exact string match.",
+      initial = sprintf(grepl_lint_fmt, "startsWith", "initial"),
+      terminal = sprintf(grepl_lint_fmt, "endsWith", "terminal")
+    )
   }
 
   get_regex_lint_data <- function(xml, xpath) {
     expr <- xml_find_all(xml, xpath)
     patterns <- get_r_string(expr)
     initial_anchor <- startsWith(patterns, "^")
+    terminal_anchor <- endsWith(patterns, "$")
     search_start <- 1L + initial_anchor
-    search_end <- nchar(patterns) - 1L + initial_anchor
+    search_end <- nchar(patterns) - terminal_anchor
     can_replace <- is_not_regex(substr(patterns, search_start, search_end))
-    list(lint_expr = expr[can_replace], initial_anchor = initial_anchor[can_replace])
+    initial_anchor <- initial_anchor[can_replace]
+    terminal_anchor <- terminal_anchor[can_replace]
+
+    lint_type <- character(length(initial_anchor))
+
+    lint_type[initial_anchor & terminal_anchor] <- "both"
+    lint_type[initial_anchor & !terminal_anchor] <- "initial"
+    lint_type[!initial_anchor & terminal_anchor] <- "terminal"
+    list(lint_expr = expr[can_replace], lint_type = lint_type)
   }
 
   substr_xpath_parts <- glue("
@@ -125,38 +148,23 @@ string_boundary_linter <- function(allow_grepl = FALSE) {
     lints <- list()
 
     str_detect_lint_data <- get_regex_lint_data(xml, str_detect_xpath)
-    str_detect_lint_message <- paste(
-      ifelse(
-        str_detect_lint_data$initial_anchor,
-        "Use startsWith() to detect a fixed initial substring.",
-        "Use endsWith() to detect a fixed terminal substring."
-      ),
-      "Doing so is more readable and more efficient."
-    )
+    str_detect_lint_message <- str_detect_message_map[str_detect_lint_data$lint_type]
 
     lints <- c(lints, xml_nodes_to_lints(
       str_detect_lint_data$lint_expr,
       source_expression = source_expression,
-      lint_message = str_detect_lint_message,
+      lint_message =  paste(str_detect_lint_message, "Doing so is more readable and more efficient."),
       type = "warning"
     ))
 
     if (!allow_grepl) {
       grepl_lint_data <- get_regex_lint_data(xml, grepl_xpath)
-      grepl_replacement <- ifelse(grepl_lint_data$initial_anchor, "startsWith", "endsWith")
-      grepl_type <- ifelse(grepl_lint_data$initial_anchor, "initial", "terminal")
-      grepl_lint_message <- paste(
-        sprintf(
-          "Use !is.na(x) & %s(x, string) to detect a fixed %s substring, or, if missingness is not a concern, just %s.",
-          grepl_replacement, grepl_type, grepl_replacement
-        ),
-        "Doing so is more readable and more efficient."
-      )
+      grepl_lint_message <- grepl_message_map[grepl_lint_data$lint_type]
 
       lints <- c(lints, xml_nodes_to_lints(
         grepl_lint_data$lint_expr,
         source_expression = source_expression,
-        lint_message = grepl_lint_message,
+        lint_message =  paste(grepl_lint_message, "Doing so is more readable and more efficient."),
         type = "warning"
       ))
     }

diff --git a/tests/testthat/test-string_boundary_linter.R b/tests/testthat/test-string_boundary_linter.R
@@ -71,78 +71,112 @@ test_that("string_boundary_linter blocks simple disallowed grepl() usages", {
 })
 
 test_that("string_boundary_linter blocks simple disallowed str_detect() usages", {
+  linter <- string_boundary_linter()
+
   expect_lint(
     "str_detect(x, '^a')",
     rex::rex("Use startsWith() to detect a fixed initial substring."),
-    string_boundary_linter()
+    linter
   )
   expect_lint(
     "str_detect(x, 'a$')",
     rex::rex("Use endsWith() to detect a fixed terminal substring."),
-    string_boundary_linter()
+    linter
   )
 })
 
 test_that("string_boundary_linter blocks disallowed substr()/substring() usage", {
-  expect_lint(
-    "substr(x, 1L, 2L) == 'ab'",
-    rex::rex("Use startsWith() to detect an initial substring."),
-    string_boundary_linter()
-  )
+  linter <- string_boundary_linter()
+  starts_message <- rex::rex("Use startsWith() to detect an initial substring.")
+  ends_message <- rex::rex("Use endsWith() to detect a terminal substring.")
+
+  expect_lint("substr(x, 1L, 2L) == 'ab'", starts_message, linter)
   # end doesn't matter, just anchoring to 1L
-  expect_lint(
-    "substr(x, 1L, end) == 'ab'",
-    rex::rex("Use startsWith() to detect an initial substring."),
-    string_boundary_linter()
-  )
-  expect_lint(
-    "substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'",
-    rex::rex("Use endsWith() to detect a terminal substring."),
-    string_boundary_linter()
-  )
+  expect_lint("substr(x, 1L, end) == 'ab'", starts_message, linter)
+  expect_lint("substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'", ends_message, linter)
   # start doesn't matter, just anchoring to nchar(x)
-  expect_lint(
-    "substring(x, start, nchar(x)) == 'abcde'",
-    rex::rex("Use endsWith() to detect a terminal substring."),
-    string_boundary_linter()
-  )
+  expect_lint("substring(x, start, nchar(x)) == 'abcde'", ends_message, linter)
   # more complicated expressions
-  expect_lint(
-    "substring(colnames(x), start, nchar(colnames(x))) == 'abc'",
-    rex::rex("Use endsWith() to detect a terminal substring."),
-    string_boundary_linter()
-  )
+  expect_lint("substring(colnames(x), start, nchar(colnames(x))) == 'abc'", ends_message, linter)
 })
 
 test_that("plain ^ or $ are skipped", {
-  expect_lint('grepl("^", x)', NULL, string_boundary_linter())
-  expect_lint('grepl("$", x)', NULL, string_boundary_linter())
+  linter <- string_boundary_linter()
+
+  expect_lint('grepl("^", x)', NULL, linter)
+  expect_lint('grepl("$", x)', NULL, linter)
 })
 
 test_that("substr inverted tests are caught as well", {
+  linter <- string_boundary_linter()
+
   expect_lint(
     "substr(x, 1L, 2L) != 'ab'",
     rex::rex("Use startsWith() to detect an initial substring."),
-    string_boundary_linter()
+    linter
   )
   expect_lint(
     "substring(x, nchar(x) - 4L, nchar(x)) != 'abcde'",
     rex::rex("Use endsWith() to detect a terminal substring."),
-    string_boundary_linter()
+    linter
   )
 })
 
 test_that("R>=4 raw strings are detected", {
+  linter <- string_boundary_linter()
+
   skip_if_not_r_version("4.0.0")
-  expect_lint('grepl(R"(^.{3})", x)', NULL, string_boundary_linter())
+  expect_lint('grepl(R"(^.{3})", x)', NULL, linter)
   expect_lint(
     'grepl(R"(^abc)", x)',
     rex::rex("Use !is.na(x) & startsWith(x, string) to detect a fixed initial substring,"),
-    string_boundary_linter()
+    linter
   )
 })
 
 test_that("grepl() can optionally be ignored", {
-  expect_lint("grepl('^abc', x)", NULL, string_boundary_linter(allow_grepl = TRUE))
-  expect_lint("grepl('xyz$', x)", NULL, string_boundary_linter(allow_grepl = TRUE))
+  linter <- string_boundary_linter(allow_grepl = TRUE)
+
+  expect_lint("grepl('^abc', x)", NULL, linter)
+  expect_lint("grepl('xyz$', x)", NULL, linter)
+})
+
+test_that("whole-string regex recommends ==, not {starts,ends}With()", {
+  linter <- string_boundary_linter()
+  lint_msg <- rex::rex("Use == to check for an exact string match.")
+
+  expect_lint("grepl('^abc$', x)", lint_msg, linter)
+  expect_lint("grepl('^a\\\\.b$', x)", lint_msg, linter)
+  expect_lint("str_detect(x, '^abc$')", lint_msg, linter)
+  expect_lint("str_detect(x, '^a[.]b$')", lint_msg, linter)
+})
+
+test_that("vectorization + metadata work as intended", {
+  expect_lint(
+    trim_some("{
+      substring(a, 1, 3) == 'abc'
+      substring(b, nchar(b) - 3, nchar(b)) == 'defg'
+      substr(c, 1, 3) == 'hij'
+      substr(d, nchar(d) - 3, nchar(d)) == 'klmn'
+      grepl('^abc', e)
+      grepl('abc$', f)
+      grepl('^abc$', g)
+      str_detect(h, '^abc')
+      str_detect(i, 'abc$')
+      str_detect(j, '^abc$')
+    }"),
+    list(
+      list("startsWith", line_number = 2L),
+      list("endsWith", line_number = 3L),
+      list("startsWith", line_number = 4L),
+      list("endsWith", line_number = 5L),
+      list("startsWith", line_number = 6L),
+      list("endsWith", line_number = 7L),
+      list("==", line_number = 8L),
+      list("startsWith", line_number = 9L),
+      list("endsWith", line_number = 10L),
+      list("==", line_number = 11L)
+    ),
+    string_boundary_linter()
+  )
 })