-
Notifications
You must be signed in to change notification settings - Fork 187
New nzchar_linter #2275
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New nzchar_linter #2275
Changes from all commits
e258c58
7de3816
4620aa7
377b7a4
0a0a021
91b1910
9d7b3b3
ab8ebbe
dc1e515
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#' Require usage of nzchar where appropriate | ||
#' | ||
#' [nzchar()] efficiently determines which of a vector of strings are empty | ||
#' (i.e., are `""`). It should in most cases be used instead of | ||
#' constructions like `string == ""` or `nchar(string) == 0`. | ||
#' | ||
#' One crucial difference is in the default handling of `NA_character_`, i.e., | ||
#' missing strings. `nzchar(NA_character_)` is `TRUE`, while `NA_character_ == ""` | ||
#' and `nchar(NA_character_) == 0` are both `NA`. | ||
#' | ||
#' @examples | ||
#' # will produce lints | ||
#' lint( | ||
#' text = "x[x == '']", | ||
#' linters = nzchar_linter() | ||
#' ) | ||
#' | ||
#' lint( | ||
#' text = "x[nchar(x) > 0]", | ||
#' linters = nzchar_linter() | ||
#' ) | ||
#' | ||
#' # okay | ||
#' lint( | ||
#' text = "x[nchar(x) > 1]", | ||
#' linters = nzchar_linter() | ||
#' ) | ||
#' | ||
#' # nzchar()'s primary benefit is for vector input; | ||
#' # for guaranteed-scalar cases like if() conditions, comparing to "" is OK. | ||
#' lint( | ||
#' text = "if (x == '') y", | ||
#' linters = nzchar_linter() | ||
#' ) | ||
#' | ||
#' @evalRd rd_tags("nzchar_linter") | ||
#' @seealso [linters] for a complete list of linters available in lintr. | ||
#' @export | ||
nzchar_linter <- function() { | ||
comparator_nodes <- infix_metadata$xml_tag[infix_metadata$comparator] | ||
|
||
# use string-length to capture both "" and '' | ||
# if (any(x == "")) is not treated like it's part of if(), but | ||
# any(if (x == "") y else z) _is_ treated so. this condition looks for the | ||
# expr to be inside a call that's _not_ above an IF/WHILE. | ||
comparison_xpath <- glue(" | ||
//STR_CONST[string-length(text()) = 2] | ||
/parent::expr | ||
/parent::expr[ | ||
({ xp_or(comparator_nodes) }) | ||
and ( | ||
not(ancestor-or-self::expr[ | ||
preceding-sibling::IF | ||
or preceding-sibling::WHILE | ||
]) | ||
or ancestor-or-self::expr[ | ||
( | ||
preceding-sibling::expr[SYMBOL_FUNCTION_CALL] | ||
or preceding-sibling::OP-LEFT-BRACKET | ||
) and not( | ||
descendant-or-self::expr[IF or WHILE] | ||
) | ||
] | ||
) | ||
] | ||
") | ||
|
||
# nchar(., type="width") not strictly compatible with nzchar | ||
# unsure allowNA compatible, so allow it just in case (see TODO in tests) | ||
nchar_xpath <- glue(" | ||
//SYMBOL_FUNCTION_CALL[text() = 'nchar'] | ||
/parent::expr | ||
/parent::expr | ||
/parent::expr[ | ||
({ xp_or(comparator_nodes) }) | ||
and not(expr[SYMBOL_SUB[ | ||
( | ||
text() = 'type' | ||
and following-sibling::expr[1][STR_CONST[contains(text(), 'width')]] | ||
) or ( | ||
text() = 'allowNA' | ||
and following-sibling::expr[1][NUM_CONST[text() = 'TRUE']] | ||
) | ||
]]) | ||
and expr[NUM_CONST[text() = '0' or text() = '0L' or text() = '0.0']] | ||
] | ||
") | ||
|
||
keepna_note <- paste( | ||
"Whenever missing data is possible,", | ||
"please take care to use nzchar(., keepNA = TRUE);", | ||
"nzchar(NA) is TRUE by default." | ||
) | ||
|
||
Linter(function(source_expression) { | ||
if (!is_lint_level(source_expression, "expression")) { | ||
return(list()) | ||
} | ||
|
||
xml <- source_expression$xml_parsed_content | ||
|
||
comparison_expr <- xml_find_all(xml, comparison_xpath) | ||
comparison_lints <- xml_nodes_to_lints( | ||
comparison_expr, | ||
source_expression = source_expression, | ||
lint_message = paste( | ||
'Instead of comparing strings to "", use nzchar().', | ||
"Note that if x is a factor, you'll have use ", | ||
'as.character() to replicate an implicit conversion that happens in x == "".', | ||
keepna_note | ||
), | ||
type = "warning" | ||
) | ||
|
||
nchar_expr <- xml_find_all(xml, nchar_xpath) | ||
nchar_lints <- xml_nodes_to_lints( | ||
nchar_expr, | ||
source_expression = source_expression, | ||
lint_message = paste( | ||
"Instead of comparing nchar(x) to 0, use nzchar().", | ||
keepna_note | ||
), | ||
type = "warning" | ||
) | ||
|
||
c(comparison_lints, nchar_lints) | ||
}) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
test_that("nzchar_linter skips allowed usages", { | ||
linter <- nzchar_linter() | ||
|
||
expect_lint("if (any(nzchar(x))) TRUE", NULL, linter) | ||
|
||
expect_lint("letters == 'a'", NULL, linter) | ||
|
||
expect_lint("which(nchar(x) == 4)", NULL, linter) | ||
expect_lint("which(nchar(x) != 2)", NULL, linter) | ||
}) | ||
|
||
test_that("nzchar_linter skips as appropriate for other nchar args", { | ||
linter <- nzchar_linter() | ||
|
||
# using type="width" can lead to 0-width strings that are counted as | ||
# nzchar, c.f. nchar("\u200b", type="width"), so don't lint this. | ||
# type="bytes" should be >= the value for the default (type="chars") | ||
expect_lint("nchar(x, type='width') == 0L", NULL, linter) | ||
|
||
# TODO(michaelchirico): check compatibility of nchar(., allowNA=TRUE). | ||
# there are no examples in ?nchar, nor any relevant usages on StackOverflow. | ||
# just assume they are incompatible now to be conservative. | ||
expect_lint("nchar(x, allowNA=TRUE) == 0L", NULL, linter) | ||
|
||
# nzchar also has keepNA argument so a drop-in switch is easy | ||
expect_lint( | ||
"nchar(x, keepNA=TRUE) == 0", | ||
rex::rex("Instead of comparing nchar(x) to 0"), | ||
linter | ||
) | ||
}) | ||
|
||
test_that("nzchar_linter blocks simple disallowed usages", { | ||
linter <- nzchar_linter() | ||
lint_msg_quote <- rex::rex('Instead of comparing strings to "", use nzchar()') | ||
lint_msg_nchar <- rex::rex("Instead of comparing nchar(x) to 0") | ||
|
||
expect_lint("which(x == '')", lint_msg_quote, linter) | ||
expect_lint("any(nchar(x) >= 0)", lint_msg_nchar, linter) | ||
expect_lint("all(nchar(x) == 0L)", lint_msg_nchar, linter) | ||
expect_lint("sum(0.0 < nchar(x))", lint_msg_nchar, linter) | ||
}) | ||
|
||
test_that("nzchar_linter skips comparison to '' in if/while statements", { | ||
linter <- nzchar_linter() | ||
lint_msg_quote <- rex::rex('Instead of comparing strings to "", use nzchar()') | ||
lint_msg_nchar <- rex::rex("Instead of comparing nchar(x) to 0") | ||
|
||
# still lint nchar() comparisons | ||
expect_lint("if (nchar(x) > 0) TRUE", lint_msg_nchar, linter) | ||
expect_lint('if (x == "") TRUE', NULL, linter) | ||
expect_lint('while (x == "") TRUE', NULL, linter) | ||
|
||
# nested versions, a la nesting issues with vector_logic_linter | ||
expect_lint('if (TRUE || (x == "" && FALSE)) TRUE', NULL, linter) | ||
expect_lint('if (TRUE && x == "" && FALSE) TRUE', NULL, linter) | ||
expect_lint('if (any(x == "")) TRUE', lint_msg_quote, linter) | ||
expect_lint('if (TRUE || any(x == "" | FALSE)) TRUE', lint_msg_quote, linter) | ||
expect_lint('foo(if (x == "") y else z)', NULL, linter) | ||
}) | ||
|
||
test_that("multiple lints are generated correctly", { | ||
expect_lint( | ||
trim_some("{ | ||
a == '' | ||
nchar(b) != 0 | ||
}"), | ||
list( | ||
list(rex::rex('Instead of comparing strings to ""'), line_number = 2L), | ||
list(rex::rex("Instead of comparing nchar(x) to 0"), line_number = 3L) | ||
), | ||
nzchar_linter() | ||
) | ||
}) |
Uh oh!
There was an error while loading. Please reload this page.