diff --git a/NEWS.md b/NEWS.md index ca1f80525..0fdee44e7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -31,6 +31,7 @@ * `unnecessary_lambda_linter` is extended to encourage vectorized comparisons where possible, e.g. `sapply(x, sum) > 0` instead of `sapply(x, function(x) sum(x) > 0)` (part of #884, @MichaelChirico). Toggle this behavior with argument `allow_comparison`. * `backport_linter()` is slightly faster by moving expensive computations outside the linting function (#2339, #2348, @AshesITR and @MichaelChirico). * `Linter()` has a new argument `linter_level` (default `NA`). This is used by `lint()` to more efficiently check for expression levels than the idiom `if (!is_lint_level(...)) { return(list()) }` (#2351, @AshesITR). +* `string_boundary_linter()` recognizes regular expression calls like `grepl("^abc$", x)` that can be replaced by using `==` instead (#1613, @MichaelChirico). * `unreachable_code_linter()` has an argument `allow_comment_regex` for customizing which "terminal" comments to exclude (#2327, @MichaelChirico). `# nolint end` comments are always excluded, as are {covr} exclusions (e.g. `# nocov end`) by default. * `format()` and `print()` methods for `lint` and `lints` classes get a new option `width` to control the printing width of lint messages (#1884, @MichaelChirico). The default is controlled by a new option `lintr.format_width`; if unset, no wrapping occurs (matching earlier behavior). diff --git a/R/string_boundary_linter.R b/R/string_boundary_linter.R index 9813f8564..6121061e9 100644 --- a/R/string_boundary_linter.R +++ b/R/string_boundary_linter.R @@ -66,6 +66,11 @@ string_boundary_linter <- function(allow_grepl = FALSE) { /following-sibling::expr[2] /STR_CONST[ {str_cond} ] ") + str_detect_message_map <- c( + both = "Use == to check for an exact string match.", + initial = "Use startsWith() to detect a fixed initial substring.", + terminal = "Use endsWith() to detect a fixed terminal substring." + ) if (!allow_grepl) { grepl_xpath <- glue(" @@ -84,16 +89,34 @@ string_boundary_linter <- function(allow_grepl = FALSE) { /expr[2] /STR_CONST[ {str_cond} ] ") + grepl_lint_fmt <- paste( + "Use !is.na(x) & %1$s(x, string) to detect a fixed %2$s substring, or,", + "if missingness is not a concern, just %1$s()." + ) + grepl_message_map <- c( + both = "Use == to check for an exact string match.", + initial = sprintf(grepl_lint_fmt, "startsWith", "initial"), + terminal = sprintf(grepl_lint_fmt, "endsWith", "terminal") + ) } get_regex_lint_data <- function(xml, xpath) { expr <- xml_find_all(xml, xpath) patterns <- get_r_string(expr) initial_anchor <- startsWith(patterns, "^") + terminal_anchor <- endsWith(patterns, "$") search_start <- 1L + initial_anchor - search_end <- nchar(patterns) - 1L + initial_anchor + search_end <- nchar(patterns) - terminal_anchor can_replace <- is_not_regex(substr(patterns, search_start, search_end)) - list(lint_expr = expr[can_replace], initial_anchor = initial_anchor[can_replace]) + initial_anchor <- initial_anchor[can_replace] + terminal_anchor <- terminal_anchor[can_replace] + + lint_type <- character(length(initial_anchor)) + + lint_type[initial_anchor & terminal_anchor] <- "both" + lint_type[initial_anchor & !terminal_anchor] <- "initial" + lint_type[!initial_anchor & terminal_anchor] <- "terminal" + list(lint_expr = expr[can_replace], lint_type = lint_type) } substr_xpath_parts <- glue(" @@ -125,38 +148,23 @@ string_boundary_linter <- function(allow_grepl = FALSE) { lints <- list() str_detect_lint_data <- get_regex_lint_data(xml, str_detect_xpath) - str_detect_lint_message <- paste( - ifelse( - str_detect_lint_data$initial_anchor, - "Use startsWith() to detect a fixed initial substring.", - "Use endsWith() to detect a fixed terminal substring." - ), - "Doing so is more readable and more efficient." - ) + str_detect_lint_message <- str_detect_message_map[str_detect_lint_data$lint_type] lints <- c(lints, xml_nodes_to_lints( str_detect_lint_data$lint_expr, source_expression = source_expression, - lint_message = str_detect_lint_message, + lint_message = paste(str_detect_lint_message, "Doing so is more readable and more efficient."), type = "warning" )) if (!allow_grepl) { grepl_lint_data <- get_regex_lint_data(xml, grepl_xpath) - grepl_replacement <- ifelse(grepl_lint_data$initial_anchor, "startsWith", "endsWith") - grepl_type <- ifelse(grepl_lint_data$initial_anchor, "initial", "terminal") - grepl_lint_message <- paste( - sprintf( - "Use !is.na(x) & %s(x, string) to detect a fixed %s substring, or, if missingness is not a concern, just %s.", - grepl_replacement, grepl_type, grepl_replacement - ), - "Doing so is more readable and more efficient." - ) + grepl_lint_message <- grepl_message_map[grepl_lint_data$lint_type] lints <- c(lints, xml_nodes_to_lints( grepl_lint_data$lint_expr, source_expression = source_expression, - lint_message = grepl_lint_message, + lint_message = paste(grepl_lint_message, "Doing so is more readable and more efficient."), type = "warning" )) } diff --git a/tests/testthat/test-string_boundary_linter.R b/tests/testthat/test-string_boundary_linter.R index 875304048..a76630675 100644 --- a/tests/testthat/test-string_boundary_linter.R +++ b/tests/testthat/test-string_boundary_linter.R @@ -71,78 +71,112 @@ test_that("string_boundary_linter blocks simple disallowed grepl() usages", { }) test_that("string_boundary_linter blocks simple disallowed str_detect() usages", { + linter <- string_boundary_linter() + expect_lint( "str_detect(x, '^a')", rex::rex("Use startsWith() to detect a fixed initial substring."), - string_boundary_linter() + linter ) expect_lint( "str_detect(x, 'a$')", rex::rex("Use endsWith() to detect a fixed terminal substring."), - string_boundary_linter() + linter ) }) test_that("string_boundary_linter blocks disallowed substr()/substring() usage", { - expect_lint( - "substr(x, 1L, 2L) == 'ab'", - rex::rex("Use startsWith() to detect an initial substring."), - string_boundary_linter() - ) + linter <- string_boundary_linter() + starts_message <- rex::rex("Use startsWith() to detect an initial substring.") + ends_message <- rex::rex("Use endsWith() to detect a terminal substring.") + + expect_lint("substr(x, 1L, 2L) == 'ab'", starts_message, linter) # end doesn't matter, just anchoring to 1L - expect_lint( - "substr(x, 1L, end) == 'ab'", - rex::rex("Use startsWith() to detect an initial substring."), - string_boundary_linter() - ) - expect_lint( - "substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'", - rex::rex("Use endsWith() to detect a terminal substring."), - string_boundary_linter() - ) + expect_lint("substr(x, 1L, end) == 'ab'", starts_message, linter) + expect_lint("substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'", ends_message, linter) # start doesn't matter, just anchoring to nchar(x) - expect_lint( - "substring(x, start, nchar(x)) == 'abcde'", - rex::rex("Use endsWith() to detect a terminal substring."), - string_boundary_linter() - ) + expect_lint("substring(x, start, nchar(x)) == 'abcde'", ends_message, linter) # more complicated expressions - expect_lint( - "substring(colnames(x), start, nchar(colnames(x))) == 'abc'", - rex::rex("Use endsWith() to detect a terminal substring."), - string_boundary_linter() - ) + expect_lint("substring(colnames(x), start, nchar(colnames(x))) == 'abc'", ends_message, linter) }) test_that("plain ^ or $ are skipped", { - expect_lint('grepl("^", x)', NULL, string_boundary_linter()) - expect_lint('grepl("$", x)', NULL, string_boundary_linter()) + linter <- string_boundary_linter() + + expect_lint('grepl("^", x)', NULL, linter) + expect_lint('grepl("$", x)', NULL, linter) }) test_that("substr inverted tests are caught as well", { + linter <- string_boundary_linter() + expect_lint( "substr(x, 1L, 2L) != 'ab'", rex::rex("Use startsWith() to detect an initial substring."), - string_boundary_linter() + linter ) expect_lint( "substring(x, nchar(x) - 4L, nchar(x)) != 'abcde'", rex::rex("Use endsWith() to detect a terminal substring."), - string_boundary_linter() + linter ) }) test_that("R>=4 raw strings are detected", { + linter <- string_boundary_linter() + skip_if_not_r_version("4.0.0") - expect_lint('grepl(R"(^.{3})", x)', NULL, string_boundary_linter()) + expect_lint('grepl(R"(^.{3})", x)', NULL, linter) expect_lint( 'grepl(R"(^abc)", x)', rex::rex("Use !is.na(x) & startsWith(x, string) to detect a fixed initial substring,"), - string_boundary_linter() + linter ) }) test_that("grepl() can optionally be ignored", { - expect_lint("grepl('^abc', x)", NULL, string_boundary_linter(allow_grepl = TRUE)) - expect_lint("grepl('xyz$', x)", NULL, string_boundary_linter(allow_grepl = TRUE)) + linter <- string_boundary_linter(allow_grepl = TRUE) + + expect_lint("grepl('^abc', x)", NULL, linter) + expect_lint("grepl('xyz$', x)", NULL, linter) +}) + +test_that("whole-string regex recommends ==, not {starts,ends}With()", { + linter <- string_boundary_linter() + lint_msg <- rex::rex("Use == to check for an exact string match.") + + expect_lint("grepl('^abc$', x)", lint_msg, linter) + expect_lint("grepl('^a\\\\.b$', x)", lint_msg, linter) + expect_lint("str_detect(x, '^abc$')", lint_msg, linter) + expect_lint("str_detect(x, '^a[.]b$')", lint_msg, linter) +}) + +test_that("vectorization + metadata work as intended", { + expect_lint( + trim_some("{ + substring(a, 1, 3) == 'abc' + substring(b, nchar(b) - 3, nchar(b)) == 'defg' + substr(c, 1, 3) == 'hij' + substr(d, nchar(d) - 3, nchar(d)) == 'klmn' + grepl('^abc', e) + grepl('abc$', f) + grepl('^abc$', g) + str_detect(h, '^abc') + str_detect(i, 'abc$') + str_detect(j, '^abc$') + }"), + list( + list("startsWith", line_number = 2L), + list("endsWith", line_number = 3L), + list("startsWith", line_number = 4L), + list("endsWith", line_number = 5L), + list("startsWith", line_number = 6L), + list("endsWith", line_number = 7L), + list("==", line_number = 8L), + list("startsWith", line_number = 9L), + list("endsWith", line_number = 10L), + list("==", line_number = 11L) + ), + string_boundary_linter() + ) })