Skip to content

Commit 82c449a

Browse files
authored
Merge branch 'main' into detect-sas-encoding
2 parents 6426fc6 + e5bfbdc commit 82c449a

File tree

151 files changed

+2086
-1139
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

151 files changed

+2086
-1139
lines changed

.pre-commit-config.yaml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ repos:
1111
- id: absolufy-imports
1212
files: ^pandas/
1313
- repo: https://github.com/jendrikseipp/vulture
14-
rev: 'v2.4'
14+
rev: 'v2.5'
1515
hooks:
1616
- id: vulture
1717
entry: python scripts/run_vulture.py
@@ -46,20 +46,19 @@ repos:
4646
exclude: ^pandas/_libs/src/(klib|headers)/
4747
args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
4848
- repo: https://github.com/PyCQA/flake8
49-
rev: 4.0.1
49+
rev: 5.0.4
5050
hooks:
5151
- id: flake8
5252
additional_dependencies: &flake8_dependencies
53-
- flake8==4.0.1
54-
- flake8-comprehensions==3.7.0
55-
- flake8-bugbear==21.3.2
53+
- flake8==5.0.4
54+
- flake8-bugbear==22.7.1
5655
- pandas-dev-flaker==0.5.0
5756
- repo: https://github.com/PyCQA/isort
5857
rev: 5.10.1
5958
hooks:
6059
- id: isort
6160
- repo: https://github.com/asottile/pyupgrade
62-
rev: v2.34.0
61+
rev: v2.37.3
6362
hooks:
6463
- id: pyupgrade
6564
args: [--py38-plus]
@@ -239,8 +238,8 @@ repos:
239238
types: [pyi]
240239
language: python
241240
additional_dependencies:
242-
- flake8==4.0.1
243-
- flake8-pyi==22.7.0
241+
- flake8==5.0.4
242+
- flake8-pyi==22.8.1
244243
- id: future-annotations
245244
name: import annotations from __future__
246245
entry: 'from __future__ import annotations'

asv_bench/benchmarks/series_methods.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,16 @@ def time_clip(self, n):
144144
self.s.clip(0, 1)
145145

146146

147+
class ClipDt:
148+
def setup(self):
149+
dr = date_range("20220101", periods=100_000, freq="s", tz="UTC")
150+
self.clipper_dt = dr[0:1_000].repeat(100)
151+
self.s = Series(dr)
152+
153+
def time_clip(self):
154+
self.s.clip(upper=self.clipper_dt)
155+
156+
147157
class ValueCounts:
148158

149159
params = [[10**3, 10**4, 10**5], ["int", "uint", "float", "object"]]

ci/deps/actions-310.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ dependencies:
4747
- scipy
4848
- sqlalchemy
4949
- tabulate
50+
- tzdata>=2022a
5051
- xarray
5152
- xlrd
5253
- xlsxwriter

ci/deps/actions-38-minimum_versions.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ dependencies:
4949
- scipy=1.7.1
5050
- sqlalchemy=1.4.16
5151
- tabulate=0.8.9
52+
- tzdata=2022a
5253
- xarray=0.19.0
5354
- xlrd=2.0.1
5455
- xlsxwriter=1.4.3

ci/deps/actions-39.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ dependencies:
4747
- scipy
4848
- sqlalchemy
4949
- tabulate
50+
- tzdata>=2022a
5051
- xarray
5152
- xlrd
5253
- xlsxwriter

doc/source/conf.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,12 +653,20 @@ def linkcode_resolve(domain, info):
653653
try:
654654
fn = inspect.getsourcefile(inspect.unwrap(obj))
655655
except TypeError:
656-
fn = None
656+
try: # property
657+
fn = inspect.getsourcefile(inspect.unwrap(obj.fget))
658+
except (AttributeError, TypeError):
659+
fn = None
657660
if not fn:
658661
return None
659662

660663
try:
661664
source, lineno = inspect.getsourcelines(obj)
665+
except TypeError:
666+
try: # property
667+
source, lineno = inspect.getsourcelines(obj.fget)
668+
except (AttributeError, TypeError):
669+
lineno = None
662670
except OSError:
663671
lineno = None
664672

doc/source/getting_started/install.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,23 @@ For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
270270
optional dependency is not installed, pandas will raise an ``ImportError`` when
271271
the method requiring that dependency is called.
272272

273+
Timezones
274+
^^^^^^^^^
275+
276+
========================= ========================= =============================================================
277+
Dependency Minimum Version Notes
278+
========================= ========================= =============================================================
279+
tzdata 2022.1(pypi)/ Allows the use of ``zoneinfo`` timezones with pandas.
280+
2022a(for system tzdata) **Note**: You only need to install the pypi package if your
281+
system does not already provide the IANA tz database.
282+
However, the minimum tzdata version still applies, even if it
283+
is not enforced through an error.
284+
285+
If you would like to keep your system tzdata version updated,
286+
it is recommended to use the ``tzdata`` package from
287+
conda-forge.
288+
========================= ========================= =============================================================
289+
273290
Visualization
274291
^^^^^^^^^^^^^
275292

doc/source/user_guide/io.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2743,6 +2743,30 @@ succeeds, the function will return*.
27432743
27442744
dfs = pd.read_html(url, "Metcalf Bank", index_col=0, flavor=["lxml", "bs4"])
27452745
2746+
Links can be extracted from cells along with the text using ``extract_links="all"``.
2747+
2748+
.. ipython:: python
2749+
2750+
html_table = """
2751+
<table>
2752+
<tr>
2753+
<th>GitHub</th>
2754+
</tr>
2755+
<tr>
2756+
<td><a href="https://github.com/pandas-dev/pandas">pandas</a></td>
2757+
</tr>
2758+
</table>
2759+
"""
2760+
2761+
df = pd.read_html(
2762+
html_table,
2763+
extract_links="all"
2764+
)[0]
2765+
df
2766+
df[("GitHub", None)]
2767+
df[("GitHub", None)].str[1]
2768+
2769+
.. versionadded:: 1.5.0
27462770

27472771
.. _io.html:
27482772

doc/source/whatsnew/v1.5.0.rst

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,13 +287,17 @@ Other enhancements
287287
- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`)
288288
- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError, :class:`.PossiblePrecisionLoss, :class:`.ValueLabelTypeMismatch, :class:`.InvalidColumnName, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`)
289289
- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
290+
- Add support for :meth:`GroupBy.ohlc` for extension array dtypes (:issue:`37493`)
290291
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
291292
- :func:`read_sas` now supports using ``encoding='infer'`` to correctly read and use the encoding specified by the sas file. (:issue:`48048`)
293+
- :func:`pandas.read_html` now supports extracting links from table cells (:issue:`13141`)
292294
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
293295
- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
294296
- :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`)
295297
- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
298+
- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`)
296299
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`)
300+
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
297301

298302
.. ---------------------------------------------------------------------------
299303
.. _whatsnew_150.notable_bug_fixes:
@@ -405,7 +409,6 @@ upon serialization. (Related issue :issue:`12997`)
405409
# Roundtripping now works
406410
pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
407411
408-
409412
.. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical:
410413

411414
DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True``
@@ -844,6 +847,8 @@ Other Deprecations
844847
- Deprecated setting a categorical's categories with ``cat.categories = ['a', 'b', 'c']``, use :meth:`Categorical.rename_categories` instead (:issue:`37643`)
845848
- Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`)
846849
- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
850+
- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
851+
- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).
847852

848853
.. ---------------------------------------------------------------------------
849854
.. _whatsnew_150.performance:
@@ -884,8 +889,9 @@ Bug fixes
884889

885890
Categorical
886891
^^^^^^^^^^^
887-
- Bug in :meth:`Categorical.view` not accepting integer dtypes (:issue:`25464`)
888-
- Bug in :meth:`CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
892+
- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`)
893+
- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
894+
- Bug in :meth:`DataFrame.concat` when concatenating two (or more) unordered ``CategoricalIndex`` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`)
889895

890896
Datetimelike
891897
^^^^^^^^^^^^
@@ -898,6 +904,7 @@ Datetimelike
898904
- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
899905
- Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`)
900906
- Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`)
907+
- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`)
901908
- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`)
902909
-
903910

@@ -933,6 +940,8 @@ Conversion
933940
- Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`)
934941
- Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`)
935942
- Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`)
943+
- Bug in :meth:`DataFrame.eval` when pandas objects (e.g. ``'Timestamp'``) were column names (:issue:`44603`)
944+
-
936945

937946
Strings
938947
^^^^^^^
@@ -1067,10 +1076,12 @@ Groupby/resample/rolling
10671076
- Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`)
10681077
- Bug in :meth:`.DataFrameGroupBy.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`)
10691078
- Bug in :meth:`DataFrameGroupBy.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`)
1079+
- Bug in :meth:`GroupBy.sum` with integer dtypes losing precision (:issue:`37493`)
10701080
- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
10711081
- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
10721082
- Bug in :meth:`DataFrame.groupby` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`)
10731083
- Bug in :meth:`.GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
1084+
- Bug in :meth:`GroupBy.cumprod` ``NaN`` influences calculation in different columns with ``skipna=False`` (:issue:`48064`)
10741085
- Bug in :meth:`.GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
10751086
- Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)
10761087
- Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`)

environment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ dependencies:
4848
- scipy
4949
- sqlalchemy
5050
- tabulate
51+
- tzdata>=2022a
5152
- xarray
5253
- xlrd
5354
- xlsxwriter
@@ -84,9 +85,8 @@ dependencies:
8485
# code checks
8586
- black=22.3.0
8687
- cpplint
87-
- flake8=4.0.1
88-
- flake8-bugbear=21.3.2 # used by flake8, find likely bugs
89-
- flake8-comprehensions=3.7.0 # used by flake8, linting of unnecessary comprehensions
88+
- flake8=5.0.4
89+
- flake8-bugbear=22.7.1 # used by flake8, find likely bugs
9090
- isort>=5.2.1 # check that imports are in the right order
9191
- mypy=0.971
9292
- pre-commit>=2.15.0

pandas/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
from pandas.core.api import (
4949
# dtype
50+
ArrowDtype,
5051
Int8Dtype,
5152
Int16Dtype,
5253
Int32Dtype,
@@ -308,6 +309,7 @@ def __getattr__(name):
308309
# Pandas is not (yet) a py.typed library: the public API is determined
309310
# based on the documentation.
310311
__all__ = [
312+
"ArrowDtype",
311313
"BooleanDtype",
312314
"Categorical",
313315
"CategoricalDtype",

pandas/_libs/algos.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def ensure_int8(arr: object, copy=...) -> npt.NDArray[np.int8]: ...
132132
def ensure_int16(arr: object, copy=...) -> npt.NDArray[np.int16]: ...
133133
def ensure_int32(arr: object, copy=...) -> npt.NDArray[np.int32]: ...
134134
def ensure_int64(arr: object, copy=...) -> npt.NDArray[np.int64]: ...
135+
def ensure_uint64(arr: object, copy=...) -> npt.NDArray[np.uint64]: ...
135136
def take_1d_int8_int8(
136137
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
137138
) -> None: ...

pandas/_libs/algos_common_helper.pxi.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
4141
('int16', 'INT16', 'int16'),
4242
('int32', 'INT32', 'int32'),
4343
('int64', 'INT64', 'int64'),
44+
('uint64', 'UINT64', 'uint64'),
4445
# Disabling uint and complex dtypes because we do not use them
45-
# (and compiling them increases wheel size)
46+
# (and compiling them increases wheel size) (except uint64)
4647
# ('uint8', 'UINT8', 'uint8'),
4748
# ('uint16', 'UINT16', 'uint16'),
4849
# ('uint32', 'UINT32', 'uint32'),
49-
# ('uint64', 'UINT64', 'uint64'),
5050
# ('complex64', 'COMPLEX64', 'complex64'),
5151
# ('complex128', 'COMPLEX128', 'complex128')
5252
]

pandas/_libs/groupby.pyi

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,12 @@ def group_any_all(
5151
skipna: bool,
5252
) -> None: ...
5353
def group_sum(
54-
out: np.ndarray, # complexfloating_t[:, ::1]
54+
out: np.ndarray, # complexfloatingintuint_t[:, ::1]
5555
counts: np.ndarray, # int64_t[::1]
56-
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]
56+
values: np.ndarray, # ndarray[complexfloatingintuint_t, ndim=2]
5757
labels: np.ndarray, # const intp_t[:]
58+
mask: np.ndarray | None,
59+
result_mask: np.ndarray | None = ...,
5860
min_count: int = ...,
5961
is_datetimelike: bool = ...,
6062
) -> None: ...
@@ -84,11 +86,13 @@ def group_mean(
8486
result_mask: np.ndarray | None = ...,
8587
) -> None: ...
8688
def group_ohlc(
87-
out: np.ndarray, # floating[:, ::1]
89+
out: np.ndarray, # floatingintuint_t[:, ::1]
8890
counts: np.ndarray, # int64_t[::1]
89-
values: np.ndarray, # ndarray[floating, ndim=2]
91+
values: np.ndarray, # ndarray[floatingintuint_t, ndim=2]
9092
labels: np.ndarray, # const intp_t[:]
9193
min_count: int = ...,
94+
mask: np.ndarray | None = ...,
95+
result_mask: np.ndarray | None = ...,
9296
) -> None: ...
9397
def group_quantile(
9498
out: npt.NDArray[np.float64],

0 commit comments

Comments
 (0)