Skip to content

TYPING: type hints for core.indexing #27527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
43 changes: 27 additions & 16 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import textwrap
from typing import Tuple
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
import warnings

import numpy as np
Expand All @@ -25,10 +25,15 @@
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import _infer_fill_value, isna

from pandas._typing import Axis
import pandas.core.common as com
from pandas.core.index import Index, InvalidIndexError, MultiIndex
from pandas.core.indexers import is_list_like_indexer, length_of_indexer

if TYPE_CHECKING:
from pandas.core.generic import NDFrame
from pandas import DataFrame, Series, DatetimeArray # noqa: F401


# the supported indexers
def get_indexers_list():
Expand Down Expand Up @@ -104,7 +109,7 @@ class _NDFrameIndexer(_NDFrameIndexerBase):
_exception = Exception
axis = None

def __call__(self, axis=None):
def __call__(self, axis: Optional[Axis] = None) -> "_NDFrameIndexer":
# we need to return a copy of ourselves
new_self = self.__class__(self.name, self.obj)

Expand Down Expand Up @@ -193,7 +198,7 @@ def _get_setitem_indexer(self, key):
raise
raise IndexingError(key)

def __setitem__(self, key, value):
def __setitem__(self, key, value) -> None:
if isinstance(key, tuple):
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
else:
Expand Down Expand Up @@ -260,7 +265,7 @@ def _convert_tuple(self, key, is_setter: bool = False):
keyidx.append(idx)
return tuple(keyidx)

def _convert_range(self, key, is_setter: bool = False):
def _convert_range(self, key: range, is_setter: bool = False) -> List[int]:
""" convert a range argument """
return list(key)

Expand Down Expand Up @@ -638,7 +643,9 @@ def _setitem_with_indexer_missing(self, indexer, value):
self.obj._maybe_update_cacher(clear=True)
return self.obj

def _align_series(self, indexer, ser, multiindex_indexer=False):
def _align_series(
self, indexer, ser: "Series", multiindex_indexer: bool = False
) -> Union[np.ndarray, "DatetimeArray"]:
"""
Parameters
----------
Expand Down Expand Up @@ -734,7 +741,7 @@ def ravel(i):

raise ValueError("Incompatible indexer with Series")

def _align_frame(self, indexer, df):
def _align_frame(self, indexer, df: "DataFrame") -> np.ndarray:
is_frame = self.obj.ndim == 2

if isinstance(indexer, tuple):
Expand Down Expand Up @@ -1328,12 +1335,12 @@ class _IXIndexer(_NDFrameIndexer):
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated""" # noqa: E501
)

def __init__(self, name, obj):
def __init__(self, name: str, obj: "NDFrame"):
warnings.warn(self._ix_deprecation_warning, FutureWarning, stacklevel=2)
super().__init__(name, obj)

@Appender(_NDFrameIndexer._validate_key.__doc__)
def _validate_key(self, key, axis: int):
def _validate_key(self, key, axis: int) -> bool:
if isinstance(key, slice):
return True

Expand All @@ -1349,7 +1356,7 @@ def _validate_key(self, key, axis: int):

return True

def _convert_for_reindex(self, key, axis: int):
def _convert_for_reindex(self, key, axis: int) -> Union[Index, np.ndarray]:
"""
Transform a list of keys into a new array ready to be used as axis of
the object we return (e.g. including NaNs).
Expand Down Expand Up @@ -1418,7 +1425,7 @@ def _getitem_scalar(self, key):
def _getitem_axis(self, key, axis: int):
raise NotImplementedError()

def _getbool_axis(self, key, axis: int):
def _getbool_axis(self, key, axis: int) -> "NDFrame":
# caller is responsible for ensuring non-None axis
labels = self.obj._get_axis(axis)
key = check_bool_indexer(labels, key)
Expand All @@ -1428,7 +1435,7 @@ def _getbool_axis(self, key, axis: int):
except Exception as detail:
raise self._exception(detail)

def _get_slice_axis(self, slice_obj: slice, axis: int):
def _get_slice_axis(self, slice_obj: slice, axis: int) -> "NDFrame":
""" this is pretty simple as we just have to deal with labels """
# caller is responsible for ensuring non-None axis
obj = self.obj
Expand Down Expand Up @@ -1694,7 +1701,7 @@ class _LocIndexer(_LocationIndexer):
_exception = KeyError

@Appender(_NDFrameIndexer._validate_key.__doc__)
def _validate_key(self, key, axis: int):
def _validate_key(self, key, axis: int) -> None:

# valid for a collection of labels (we check their presence later)
# slice of labels (where start-end in labels)
Expand All @@ -1710,7 +1717,7 @@ def _validate_key(self, key, axis: int):
if not is_list_like_indexer(key):
self._convert_scalar_indexer(key, axis)

def _is_scalar_access(self, key: Tuple):
def _is_scalar_access(self, key: Tuple) -> bool:
# this is a shortcut accessor to both .loc and .iloc
# that provide the equivalent access of .at and .iat
# a) avoid getting things via sections and (to minimize dtype changes)
Expand All @@ -1737,22 +1744,26 @@ def _getitem_scalar(self, key):
values = self.obj._get_value(*key)
return values

def _get_partial_string_timestamp_match_key(self, key, labels):
def _get_partial_string_timestamp_match_key(self, key, labels: Index):
"""Translate any partial string timestamp matches in key, returning the
new key (GH 10331)"""
if isinstance(labels, MultiIndex):
if isinstance(key, str) and labels.levels[0].is_all_dates:
# Convert key '2016-01-01' to
# ('2016-01-01'[, slice(None, None, None)]+)
key = tuple([key] + [slice(None)] * (len(labels.levels) - 1))
list_items = [key] # type: List[Union[slice,str]]
list_items += [slice(None)] * (len(labels.levels) - 1)
key = tuple(list_items)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no happy changing code. the original is perfectly valid. will probably revert this and add a type: ignore and wait for a mypy update.

@WillAyd should we add --warn-unused-ignores to either the CI, or warn-unused-ignores=True to the ini file.

or happy to just run ad-hoc periodically.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not reuse key and just call it something else?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the issue is that you can't add a list of slices to a list of strings in an inline expression.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok that's strange. Let me open an issue on Mypy tracker and we can ref that in ignore comment here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I think the slice is a red herring. This fails with just int and str as well:

values: Union[List[int], List[str]] = [1]  + [""]
# error: List item 0 has incompatible type "str"; expected "int"

values: List[Union[int, str]] = [1]  + [""]
# error: Incompatible types in assignment (expression has type "List[int]", variable has type "List[Union[int, str]]")
# note: "List" is invariant -- see http://mypy.readthedocs.io/en/latest/common_issues.html#variance
# note: Consider using "Sequence" instead, which is covariant
# error: List item 0 has incompatible type "str"; expected "int"

So I think

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea good find. Can just add that as comment and ignore (suggested approach from mypy for dealing with these types of things)


if isinstance(key, tuple):
# Convert (..., '2016-01-01', ...) in tuple to
# (..., slice('2016-01-01', '2016-01-01', None), ...)
new_key = []
for i, component in enumerate(key):
if isinstance(component, str) and labels.levels[i].is_all_dates:
new_key.append(slice(component, component, None))
new_key.append(
slice(component, component, None) # type: ignore
)
else:
new_key.append(component)
key = tuple(new_key)
Expand Down