Skip to content

Commit dc93b54

Browse files
Merge 560d296 into c6dab82
2 parents c6dab82 + 560d296 commit dc93b54

File tree

3 files changed

+99
-56
lines changed

3 files changed

+99
-56
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
* `unnecessary_lambda_linter` is extended to encourage vectorized comparisons where possible, e.g. `sapply(x, sum) > 0` instead of `sapply(x, function(x) sum(x) > 0)` (part of #884, @MichaelChirico). Toggle this behavior with argument `allow_comparison`.
3232
* `backport_linter()` is slightly faster by moving expensive computations outside the linting function (#2339, #2348, @AshesITR and @MichaelChirico).
3333
* `Linter()` has a new argument `linter_level` (default `NA`). This is used by `lint()` to more efficiently check for expression levels than the idiom `if (!is_lint_level(...)) { return(list()) }` (#2351, @AshesITR).
34+
* `string_boundary_linter()` recognizes regular expression calls like `grepl("^abc$", x)` that can be replaced by using `==` instead (#1613, @MichaelChirico).
3435
* `unreachable_code_linter()` has an argument `allow_comment_regex` for customizing which "terminal" comments to exclude (#2327, @MichaelChirico). `# nolint end` comments are always excluded, as are {covr} exclusions (e.g. `# nocov end`) by default.
3536
* `format()` and `print()` methods for `lint` and `lints` classes get a new option `width` to control the printing width of lint messages (#1884, @MichaelChirico). The default is controlled by a new option `lintr.format_width`; if unset, no wrapping occurs (matching earlier behavior).
3637

R/string_boundary_linter.R

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ string_boundary_linter <- function(allow_grepl = FALSE) {
6666
/following-sibling::expr[2]
6767
/STR_CONST[ {str_cond} ]
6868
")
69+
str_detect_message_map <- c(
70+
both = "Use == to check for an exact string match.",
71+
initial = "Use startsWith() to detect a fixed initial substring.",
72+
terminal = "Use endsWith() to detect a fixed terminal substring."
73+
)
6974

7075
if (!allow_grepl) {
7176
grepl_xpath <- glue("
@@ -84,16 +89,34 @@ string_boundary_linter <- function(allow_grepl = FALSE) {
8489
/expr[2]
8590
/STR_CONST[ {str_cond} ]
8691
")
92+
grepl_lint_fmt <- paste(
93+
"Use !is.na(x) & %1$s(x, string) to detect a fixed %2$s substring, or,",
94+
"if missingness is not a concern, just %1$s()."
95+
)
96+
grepl_message_map <- c(
97+
both = "Use == to check for an exact string match.",
98+
initial = sprintf(grepl_lint_fmt, "startsWith", "initial"),
99+
terminal = sprintf(grepl_lint_fmt, "endsWith", "terminal")
100+
)
87101
}
88102

89103
get_regex_lint_data <- function(xml, xpath) {
90104
expr <- xml_find_all(xml, xpath)
91105
patterns <- get_r_string(expr)
92106
initial_anchor <- startsWith(patterns, "^")
107+
terminal_anchor <- endsWith(patterns, "$")
93108
search_start <- 1L + initial_anchor
94-
search_end <- nchar(patterns) - 1L + initial_anchor
109+
search_end <- nchar(patterns) - terminal_anchor
95110
can_replace <- is_not_regex(substr(patterns, search_start, search_end))
96-
list(lint_expr = expr[can_replace], initial_anchor = initial_anchor[can_replace])
111+
initial_anchor <- initial_anchor[can_replace]
112+
terminal_anchor <- terminal_anchor[can_replace]
113+
114+
lint_type <- character(length(initial_anchor))
115+
116+
lint_type[initial_anchor & terminal_anchor] <- "both"
117+
lint_type[initial_anchor & !terminal_anchor] <- "initial"
118+
lint_type[!initial_anchor & terminal_anchor] <- "terminal"
119+
list(lint_expr = expr[can_replace], lint_type = lint_type)
97120
}
98121

99122
substr_xpath_parts <- glue("
@@ -125,38 +148,23 @@ string_boundary_linter <- function(allow_grepl = FALSE) {
125148
lints <- list()
126149

127150
str_detect_lint_data <- get_regex_lint_data(xml, str_detect_xpath)
128-
str_detect_lint_message <- paste(
129-
ifelse(
130-
str_detect_lint_data$initial_anchor,
131-
"Use startsWith() to detect a fixed initial substring.",
132-
"Use endsWith() to detect a fixed terminal substring."
133-
),
134-
"Doing so is more readable and more efficient."
135-
)
151+
str_detect_lint_message <- str_detect_message_map[str_detect_lint_data$lint_type]
136152

137153
lints <- c(lints, xml_nodes_to_lints(
138154
str_detect_lint_data$lint_expr,
139155
source_expression = source_expression,
140-
lint_message = str_detect_lint_message,
156+
lint_message = paste(str_detect_lint_message, "Doing so is more readable and more efficient."),
141157
type = "warning"
142158
))
143159

144160
if (!allow_grepl) {
145161
grepl_lint_data <- get_regex_lint_data(xml, grepl_xpath)
146-
grepl_replacement <- ifelse(grepl_lint_data$initial_anchor, "startsWith", "endsWith")
147-
grepl_type <- ifelse(grepl_lint_data$initial_anchor, "initial", "terminal")
148-
grepl_lint_message <- paste(
149-
sprintf(
150-
"Use !is.na(x) & %s(x, string) to detect a fixed %s substring, or, if missingness is not a concern, just %s.",
151-
grepl_replacement, grepl_type, grepl_replacement
152-
),
153-
"Doing so is more readable and more efficient."
154-
)
162+
grepl_lint_message <- grepl_message_map[grepl_lint_data$lint_type]
155163

156164
lints <- c(lints, xml_nodes_to_lints(
157165
grepl_lint_data$lint_expr,
158166
source_expression = source_expression,
159-
lint_message = grepl_lint_message,
167+
lint_message = paste(grepl_lint_message, "Doing so is more readable and more efficient."),
160168
type = "warning"
161169
))
162170
}

tests/testthat/test-string_boundary_linter.R

Lines changed: 69 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -71,78 +71,112 @@ test_that("string_boundary_linter blocks simple disallowed grepl() usages", {
7171
})
7272

7373
test_that("string_boundary_linter blocks simple disallowed str_detect() usages", {
74+
linter <- string_boundary_linter()
75+
7476
expect_lint(
7577
"str_detect(x, '^a')",
7678
rex::rex("Use startsWith() to detect a fixed initial substring."),
77-
string_boundary_linter()
79+
linter
7880
)
7981
expect_lint(
8082
"str_detect(x, 'a$')",
8183
rex::rex("Use endsWith() to detect a fixed terminal substring."),
82-
string_boundary_linter()
84+
linter
8385
)
8486
})
8587

8688
test_that("string_boundary_linter blocks disallowed substr()/substring() usage", {
87-
expect_lint(
88-
"substr(x, 1L, 2L) == 'ab'",
89-
rex::rex("Use startsWith() to detect an initial substring."),
90-
string_boundary_linter()
91-
)
89+
linter <- string_boundary_linter()
90+
starts_message <- rex::rex("Use startsWith() to detect an initial substring.")
91+
ends_message <- rex::rex("Use endsWith() to detect a terminal substring.")
92+
93+
expect_lint("substr(x, 1L, 2L) == 'ab'", starts_message, linter)
9294
# end doesn't matter, just anchoring to 1L
93-
expect_lint(
94-
"substr(x, 1L, end) == 'ab'",
95-
rex::rex("Use startsWith() to detect an initial substring."),
96-
string_boundary_linter()
97-
)
98-
expect_lint(
99-
"substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'",
100-
rex::rex("Use endsWith() to detect a terminal substring."),
101-
string_boundary_linter()
102-
)
95+
expect_lint("substr(x, 1L, end) == 'ab'", starts_message, linter)
96+
expect_lint("substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'", ends_message, linter)
10397
# start doesn't matter, just anchoring to nchar(x)
104-
expect_lint(
105-
"substring(x, start, nchar(x)) == 'abcde'",
106-
rex::rex("Use endsWith() to detect a terminal substring."),
107-
string_boundary_linter()
108-
)
98+
expect_lint("substring(x, start, nchar(x)) == 'abcde'", ends_message, linter)
10999
# more complicated expressions
110-
expect_lint(
111-
"substring(colnames(x), start, nchar(colnames(x))) == 'abc'",
112-
rex::rex("Use endsWith() to detect a terminal substring."),
113-
string_boundary_linter()
114-
)
100+
expect_lint("substring(colnames(x), start, nchar(colnames(x))) == 'abc'", ends_message, linter)
115101
})
116102

117103
test_that("plain ^ or $ are skipped", {
118-
expect_lint('grepl("^", x)', NULL, string_boundary_linter())
119-
expect_lint('grepl("$", x)', NULL, string_boundary_linter())
104+
linter <- string_boundary_linter()
105+
106+
expect_lint('grepl("^", x)', NULL, linter)
107+
expect_lint('grepl("$", x)', NULL, linter)
120108
})
121109

122110
test_that("substr inverted tests are caught as well", {
111+
linter <- string_boundary_linter()
112+
123113
expect_lint(
124114
"substr(x, 1L, 2L) != 'ab'",
125115
rex::rex("Use startsWith() to detect an initial substring."),
126-
string_boundary_linter()
116+
linter
127117
)
128118
expect_lint(
129119
"substring(x, nchar(x) - 4L, nchar(x)) != 'abcde'",
130120
rex::rex("Use endsWith() to detect a terminal substring."),
131-
string_boundary_linter()
121+
linter
132122
)
133123
})
134124

135125
test_that("R>=4 raw strings are detected", {
126+
linter <- string_boundary_linter()
127+
136128
skip_if_not_r_version("4.0.0")
137-
expect_lint('grepl(R"(^.{3})", x)', NULL, string_boundary_linter())
129+
expect_lint('grepl(R"(^.{3})", x)', NULL, linter)
138130
expect_lint(
139131
'grepl(R"(^abc)", x)',
140132
rex::rex("Use !is.na(x) & startsWith(x, string) to detect a fixed initial substring,"),
141-
string_boundary_linter()
133+
linter
142134
)
143135
})
144136

145137
test_that("grepl() can optionally be ignored", {
146-
expect_lint("grepl('^abc', x)", NULL, string_boundary_linter(allow_grepl = TRUE))
147-
expect_lint("grepl('xyz$', x)", NULL, string_boundary_linter(allow_grepl = TRUE))
138+
linter <- string_boundary_linter(allow_grepl = TRUE)
139+
140+
expect_lint("grepl('^abc', x)", NULL, linter)
141+
expect_lint("grepl('xyz$', x)", NULL, linter)
142+
})
143+
144+
test_that("whole-string regex recommends ==, not {starts,ends}With()", {
145+
linter <- string_boundary_linter()
146+
lint_msg <- rex::rex("Use == to check for an exact string match.")
147+
148+
expect_lint("grepl('^abc$', x)", lint_msg, linter)
149+
expect_lint("grepl('^a\\\\.b$', x)", lint_msg, linter)
150+
expect_lint("str_detect(x, '^abc$')", lint_msg, linter)
151+
expect_lint("str_detect(x, '^a[.]b$')", lint_msg, linter)
152+
})
153+
154+
test_that("vectorization + metadata work as intended", {
155+
expect_lint(
156+
trim_some("{
157+
substring(a, 1, 3) == 'abc'
158+
substring(b, nchar(b) - 3, nchar(b)) == 'defg'
159+
substr(c, 1, 3) == 'hij'
160+
substr(d, nchar(d) - 3, nchar(d)) == 'klmn'
161+
grepl('^abc', e)
162+
grepl('abc$', f)
163+
grepl('^abc$', g)
164+
str_detect(h, '^abc')
165+
str_detect(i, 'abc$')
166+
str_detect(j, '^abc$')
167+
}"),
168+
list(
169+
list("startsWith", line_number = 2L),
170+
list("endsWith", line_number = 3L),
171+
list("startsWith", line_number = 4L),
172+
list("endsWith", line_number = 5L),
173+
list("startsWith", line_number = 6L),
174+
list("endsWith", line_number = 7L),
175+
list("==", line_number = 8L),
176+
list("startsWith", line_number = 9L),
177+
list("endsWith", line_number = 10L),
178+
list("==", line_number = 11L)
179+
),
180+
string_boundary_linter()
181+
)
148182
})

0 commit comments

Comments
 (0)