From 5c16444c4017ba2cf7d20127ae2673f6d3f39698 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 22 Nov 2021 19:28:22 -0800
Subject: [PATCH] CLN: TODOs/noqas/xfails

---
 asv_bench/benchmarks/pandas_vb_common.py      |  2 +-
 pandas/core/tools/datetimes.py                |  2 +-
 pandas/io/parquet.py                          |  2 +-
 .../arrays/categorical/test_constructors.py   |  6 +-
 pandas/tests/arrays/sparse/test_libsparse.py  |  5 +-
 pandas/tests/extension/base/dim2.py           | 10 ++--
 .../tests/groupby/aggregate/test_aggregate.py |  2 -
 pandas/tests/indexing/test_coercion.py        |  7 +--
 pandas/tests/io/pytables/test_append.py       |  4 +-
 pandas/tests/io/pytables/test_select.py       | 12 ++--
 pandas/tests/io/pytables/test_store.py        |  4 +-
 pandas/tests/io/test_sql.py                   |  2 +-
 pandas/tests/libs/test_join.py                |  8 +--
 pandas/tests/plotting/test_boxplot_method.py  |  1 -
 pandas/tests/series/test_arithmetic.py        | 59 ++++++-------------
 pandas/tests/test_downstream.py               | 16 +++--
 pandas/tests/tseries/offsets/test_dst.py      | 12 ++--
 17 files changed, 64 insertions(+), 90 deletions(-)

diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
index ed44102700dc6..d3168bde0a783 100644
--- a/asv_bench/benchmarks/pandas_vb_common.py
+++ b/asv_bench/benchmarks/pandas_vb_common.py
@@ -17,7 +17,7 @@
 try:
     import pandas._testing as tm
 except ImportError:
-    import pandas.util.testing as tm  # noqa
+    import pandas.util.testing as tm  # noqa:F401
 
 
 numeric_dtypes = [
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 67a6975c21fdd..f40f227259998 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -28,7 +28,7 @@
     nat_strings,
     parsing,
 )
-from pandas._libs.tslibs.parsing import (  # noqa
+from pandas._libs.tslibs.parsing import (  # noqa:F401
     DateParseError,
     format_is_iso,
     guess_datetime_format,
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index c4b9e36472092..40cd795c4350d 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -151,7 +151,7 @@ def __init__(self):
         import pyarrow.parquet
 
         # import utils to register the pyarrow extension types
-        import pandas.core.arrays._arrow_utils  # noqa
+        import pandas.core.arrays._arrow_utils  # noqa:F401
 
         self.api = pyarrow
 
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index 50ecbb9eb705a..c144c82486be9 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -236,14 +236,14 @@ def test_constructor(self):
         #  - when the first is an integer dtype and the second is not
         #  - when the resulting codes are all -1/NaN
         with tm.assert_produces_warning(None):
-            c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"])
+            Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"])
 
         with tm.assert_produces_warning(None):
-            c_old = Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5])  # noqa
+            Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5])
 
         # the next one are from the old docs
         with tm.assert_produces_warning(None):
-            c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])  # noqa
+            Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])
             cat = Categorical([1, 2], categories=[1, 2, 3])
 
         # this is a legitimate constructor
diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py
index c1466882b8443..db63bba4d4eaf 100644
--- a/pandas/tests/arrays/sparse/test_libsparse.py
+++ b/pandas/tests/arrays/sparse/test_libsparse.py
@@ -460,11 +460,10 @@ def test_check_integrity(self):
         lengths = []
 
         # 0-length OK
-        # TODO: index variables are not used...is that right?
-        index = BlockIndex(0, locs, lengths)
+        BlockIndex(0, locs, lengths)
 
         # also OK even though empty
-        index = BlockIndex(1, locs, lengths)  # noqa
+        BlockIndex(1, locs, lengths)
 
         msg = "Block 0 extends beyond end"
         with pytest.raises(ValueError, match=msg):
diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
index b4a817cbc37ec..b4ddc05367af5 100644
--- a/pandas/tests/extension/base/dim2.py
+++ b/pandas/tests/extension/base/dim2.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 
+from pandas._libs.missing import is_matching_na
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -168,7 +169,7 @@ def test_reductions_2d_axis_none(self, data, method, request):
             assert type(err_result) == type(err_expected)
             return
 
-        assert result == expected  # TODO: or matching NA
+        assert is_matching_na(result, expected) or result == expected
 
     @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
     def test_reductions_2d_axis0(self, data, method, request):
@@ -247,8 +248,5 @@ def test_reductions_2d_axis1(self, data, method, request):
         # not necessarily type/dtype-preserving, so weaker assertions
         assert result.shape == (1,)
         expected_scalar = getattr(data, method)()
-        if pd.isna(result[0]):
-            # TODO: require matching NA
-            assert pd.isna(expected_scalar), expected_scalar
-        else:
-            assert result[0] == expected_scalar
+        res = result[0]
+        assert is_matching_na(res, expected_scalar) or res == expected_scalar
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index f178f85154319..9ebe6f8d8c97e 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -57,8 +57,6 @@ def test_agg_must_agg(df):
 
 
 def test_agg_ser_multi_key(df):
-    # TODO(wesm): unused
-    ser = df.C  # noqa
 
     f = lambda x: x.sum()
     results = df.C.groupby([df.A, df.B]).aggregate(f)
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 27aeb411e36f0..0174219892d92 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -780,7 +780,6 @@ def test_where_index_datetime(self, fill_val):
 
         self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
 
-    @pytest.mark.xfail(reason="GH 22839: do not ignore timezone, must be object")
     def test_where_index_datetime64tz(self):
         fill_val = pd.Timestamp("2012-01-01", tz="US/Eastern")
         exp_dtype = object
@@ -795,9 +794,9 @@ def test_where_index_datetime64tz(self):
         assert obj.dtype == "datetime64[ns]"
         cond = pd.Index([True, False, True, False])
 
-        msg = "Index\\(\\.\\.\\.\\) must be called with a collection of some kind"
-        with pytest.raises(TypeError, match=msg):
-            obj.where(cond, fill_val)
+        res = obj.where(cond, fill_val)
+        expected = pd.Index([obj[0], fill_val, obj[2], fill_val], dtype=object)
+        tm.assert_index_equal(res, expected)
 
         values = pd.Index(pd.date_range(fill_val, periods=4))
         exp = pd.Index(
diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py
index b5f9e6e74ece9..5845bdc5df437 100644
--- a/pandas/tests/io/pytables/test_append.py
+++ b/pandas/tests/io/pytables/test_append.py
@@ -896,9 +896,7 @@ def test_append_to_multiple_dropna(setup_path):
         tm.assert_index_equal(store.select("df1").index, store.select("df2").index)
 
 
-@pytest.mark.xfail(
-    run=False, reason="append_to_multiple_dropna_false is not raising as failed"
-)
+@pytest.mark.xfail(reason="append_to_multiple_dropna_false is not raising as failed")
 def test_append_to_multiple_dropna_false(setup_path):
     df1 = tm.makeTimeDataFrame()
     df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py
index 56d48945d5852..fc8d4506abda0 100644
--- a/pandas/tests/io/pytables/test_select.py
+++ b/pandas/tests/io/pytables/test_select.py
@@ -265,7 +265,7 @@ def test_select_dtypes(setup_path):
         expected = df[df["A"] > 0]
 
         store.append("df", df, data_columns=True)
-        np_zero = np.float64(0)  # noqa
+        np_zero = np.float64(0)  # noqa:F841
         result = store.select("df", where=["A>np_zero"])
         tm.assert_frame_equal(expected, result)
 
@@ -683,17 +683,17 @@ def test_frame_select_complex2(setup_path):
         expected = read_hdf(hh, "df", where="l1=[2, 3, 4]")
 
         # scope with list like
-        l = selection.index.tolist()  # noqa
+        l0 = selection.index.tolist()  # noqa:F841
         store = HDFStore(hh)
-        result = store.select("df", where="l1=l")
+        result = store.select("df", where="l1=l0")
         tm.assert_frame_equal(result, expected)
         store.close()
 
-        result = read_hdf(hh, "df", where="l1=l")
+        result = read_hdf(hh, "df", where="l1=l0")
         tm.assert_frame_equal(result, expected)
 
         # index
-        index = selection.index  # noqa
+        index = selection.index  # noqa:F841
         result = read_hdf(hh, "df", where="l1=index")
         tm.assert_frame_equal(result, expected)
 
@@ -928,7 +928,7 @@ def test_query_compare_column_type(setup_path):
     with ensure_clean_store(setup_path) as store:
         store.append("test", df, format="table", data_columns=True)
 
-        ts = Timestamp("2014-01-01")  # noqa
+        ts = Timestamp("2014-01-01")  # noqa:F841
         result = store.select("test", where="real_date > ts")
         expected = df.loc[[1], :]
         tm.assert_frame_equal(expected, result)
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index 856a2ca15ec4a..83c86d4da05e6 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -533,7 +533,9 @@ def test_same_name_scoping(setup_path):
         result = store.select("df", "index>datetime.datetime(2013,1,5)")
         tm.assert_frame_equal(result, expected)
 
-        from datetime import datetime  # noqa
+        # changes what 'datetime' points to in the namespace where
+        #  'select' does the lookup
+        from datetime import datetime  # noqa:F401
 
         # technically an error, but allow it
         result = store.select("df", "index>datetime.datetime(2013,1,5)")
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 52c1fc51a4c8d..92a53a443b217 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1251,7 +1251,7 @@ def test_database_uri_string(self, test_frame1):
         # in sqlalchemy.create_engine -> test passing of this error to user
         try:
             # the rest of this test depends on pg8000's being absent
-            import pg8000  # noqa
+            import pg8000  # noqa:F401
 
             pytest.skip("pg8000 is installed")
         except ImportError:
diff --git a/pandas/tests/libs/test_join.py b/pandas/tests/libs/test_join.py
index 17601d30739e3..ba2e6e7130929 100644
--- a/pandas/tests/libs/test_join.py
+++ b/pandas/tests/libs/test_join.py
@@ -112,8 +112,8 @@ def test_cython_right_outer_join(self):
         exp_rs = exp_rs.take(exp_ri)
         exp_rs[exp_ri == -1] = -1
 
-        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
-        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+        tm.assert_numpy_array_equal(ls, exp_ls)
+        tm.assert_numpy_array_equal(rs, exp_rs)
 
     def test_cython_inner_join(self):
         left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp)
@@ -134,8 +134,8 @@ def test_cython_inner_join(self):
         exp_rs = exp_rs.take(exp_ri)
         exp_rs[exp_ri == -1] = -1
 
-        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
-        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+        tm.assert_numpy_array_equal(ls, exp_ls)
+        tm.assert_numpy_array_equal(rs, exp_rs)
 
 
 @pytest.mark.parametrize("readonly", [True, False])
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
index ce32e5801e461..5c543f96cb55f 100644
--- a/pandas/tests/plotting/test_boxplot_method.py
+++ b/pandas/tests/plotting/test_boxplot_method.py
@@ -93,7 +93,6 @@ def test_boxplot_return_type_none(self):
 
     def test_boxplot_return_type_legacy(self):
         # API change in https://github.com/pandas-dev/pandas/pull/7096
-        import matplotlib as mpl  # noqa
 
         df = DataFrame(
             np.random.randn(6, 4),
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index 099a6bada1460..efb7b61534619 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -96,10 +96,10 @@ def _constructor(self):
 
     def test_flex_add_scalar_fill_value(self):
         # GH12723
-        s = Series([0, 1, np.nan, 3, 4, 5])
+        ser = Series([0, 1, np.nan, 3, 4, 5])
 
-        exp = s.fillna(0).add(2)
-        res = s.add(2, fill_value=0)
+        exp = ser.fillna(0).add(2)
+        res = ser.add(2, fill_value=0)
         tm.assert_series_equal(res, exp)
 
     pairings = [(Series.div, operator.truediv, 1), (Series.rdiv, ops.rtruediv, 1)]
@@ -226,12 +226,12 @@ def test_add_na_handling(self):
         from datetime import date
         from decimal import Decimal
 
-        s = Series(
+        ser = Series(
             [Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)]
         )
 
-        result = s + s.shift(1)
-        result2 = s.shift(1) + s
+        result = ser + ser.shift(1)
+        result2 = ser.shift(1) + ser
         assert isna(result[0])
         assert isna(result2[0])
 
@@ -399,15 +399,12 @@ def test_ser_flex_cmp_return_dtypes_empty(self, opname):
         expected = np.dtype("bool")
         assert result == expected
 
-    @pytest.mark.parametrize(
-        "op",
-        [operator.eq, operator.ne, operator.le, operator.lt, operator.ge, operator.gt],
-    )
     @pytest.mark.parametrize(
         "names", [(None, None, None), ("foo", "bar", None), ("baz", "baz", "baz")]
     )
-    def test_ser_cmp_result_names(self, names, op):
+    def test_ser_cmp_result_names(self, names, comparison_op):
         # datetime64 dtype
+        op = comparison_op
         dti = date_range("1949-06-07 03:00:00", freq="H", periods=5, name=names[0])
         ser = Series(dti).rename(names[1])
         result = op(ser, dti)
@@ -583,9 +580,10 @@ def test_comparison_tuples(self):
         expected = Series([False, False])
         tm.assert_series_equal(result, expected)
 
-        s = Series([frozenset([1]), frozenset([1, 2])])
+    def test_comparison_frozenset(self):
+        ser = Series([frozenset([1]), frozenset([1, 2])])
 
-        result = s == frozenset([1])
+        result = ser == frozenset([1])
         expected = Series([True, False])
         tm.assert_series_equal(result, expected)
 
@@ -649,8 +647,8 @@ def test_comp_ops_df_compat(self, left, right, frame_or_series):
 
     def test_compare_series_interval_keyword(self):
         # GH#25338
-        s = Series(["IntervalA", "IntervalB", "IntervalC"])
-        result = s == "IntervalA"
+        ser = Series(["IntervalA", "IntervalB", "IntervalC"])
+        result = ser == "IntervalA"
         expected = Series([True, False, False])
         tm.assert_series_equal(result, expected)
 
@@ -662,19 +660,6 @@ def test_compare_series_interval_keyword(self):
 
 
 class TestTimeSeriesArithmetic:
-    # TODO: De-duplicate with test below
-    def test_series_add_tz_mismatch_converts_to_utc_duplicate(self):
-        rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern")
-        ser = Series(np.random.randn(len(rng)), index=rng)
-
-        ts_moscow = ser.tz_convert("Europe/Moscow")
-
-        result = ser + ts_moscow
-        assert result.index.tz is pytz.utc
-
-        result = ts_moscow + ser
-        assert result.index.tz is pytz.utc
-
     def test_series_add_tz_mismatch_converts_to_utc(self):
         rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
 
@@ -697,16 +682,6 @@ def test_series_add_tz_mismatch_converts_to_utc(self):
         assert result.index.tz == pytz.UTC
         tm.assert_series_equal(result, expected)
 
-    # TODO: redundant with test_series_add_tz_mismatch_converts_to_utc?
-    def test_series_arithmetic_mismatched_tzs_convert_to_utc(self):
-        base = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
-        idx1 = base.tz_convert("Asia/Tokyo")[:2]
-        idx2 = base.tz_convert("US/Eastern")[1:]
-
-        res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
-        expected = Series([np.nan, 3, np.nan], index=base)
-        tm.assert_series_equal(res, expected)
-
     def test_series_add_aware_naive_raises(self):
         rng = date_range("1/1/2011", periods=10, freq="H")
         ser = Series(np.random.randn(len(rng)), index=rng)
@@ -871,20 +846,20 @@ def test_none_comparison(series_with_simple_index):
     series.iloc[0] = np.nan
 
     # noinspection PyComparisonWithNone
-    result = series == None  # noqa
+    result = series == None  # noqa:E711
     assert not result.iat[0]
     assert not result.iat[1]
 
     # noinspection PyComparisonWithNone
-    result = series != None  # noqa
+    result = series != None  # noqa:E711
     assert result.iat[0]
     assert result.iat[1]
 
-    result = None == series  # noqa
+    result = None == series  # noqa:E711
     assert not result.iat[0]
     assert not result.iat[1]
 
-    result = None != series  # noqa
+    result = None != series  # noqa:E711
     assert result.iat[0]
     assert result.iat[1]
 
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index 1afd431a5da2d..3ea95b8a56eea 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -32,8 +32,8 @@ def df():
 @pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning")
 def test_dask(df):
 
-    toolz = import_module("toolz")  # noqa
-    dask = import_module("dask")  # noqa
+    toolz = import_module("toolz")  # noqa:F841
+    dask = import_module("dask")  # noqa:F841
 
     import dask.dataframe as dd
 
@@ -44,7 +44,7 @@ def test_dask(df):
 
 def test_xarray(df):
 
-    xarray = import_module("xarray")  # noqa
+    xarray = import_module("xarray")  # noqa:F841
 
     assert df.to_xarray() is not None
 
@@ -109,7 +109,7 @@ def test_statsmodels():
 @pytest.mark.filterwarnings("ignore:can't:ImportWarning")
 def test_scikit_learn(df):
 
-    sklearn = import_module("sklearn")  # noqa
+    sklearn = import_module("sklearn")  # noqa:F841
     from sklearn import (
         datasets,
         svm,
@@ -133,10 +133,14 @@ def test_seaborn():
 
 def test_pandas_gbq(df):
 
-    pandas_gbq = import_module("pandas_gbq")  # noqa
+    pandas_gbq = import_module("pandas_gbq")  # noqa:F841
 
 
-@pytest.mark.xfail(reason="0.8.1 tries to import urlencode from pd.io.common")
+@pytest.mark.xfail(
+    raises=ValueError,
+    reason="The Quandl API key must be provided either through the api_key "
+    "variable or through the environmental variable QUANDL_API_KEY",
+)
 @tm.network
 def test_pandas_datareader():
 
diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py
index 9721d7fbd9067..50c5a91fc2390 100644
--- a/pandas/tests/tseries/offsets/test_dst.py
+++ b/pandas/tests/tseries/offsets/test_dst.py
@@ -177,18 +177,20 @@ def test_all_offset_classes(self, tup):
         assert first == second
 
 
-@pytest.mark.xfail(
-    strict=False, reason="'Africa/Kinshasa' test case fails under pytz=2017.3"
-)
 @pytest.mark.parametrize(
     "original_dt, target_dt, offset, tz",
     [
-        (
+        pytest.param(
             Timestamp("1900-01-01"),
             Timestamp("1905-07-01"),
             MonthBegin(66),
             "Africa/Kinshasa",
-        ),  # GH41906
+            marks=pytest.mark.xfail(
+                # error: Module has no attribute "__version__"
+                float(pytz.__version__) <= 2020.1,  # type: ignore[attr-defined]
+                reason="GH#41906",
+            ),
+        ),
         (
             Timestamp("2021-10-01 01:15"),
             Timestamp("2021-10-31 01:15"),