pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 7 additions & 8 deletions b/‎.pre-commit-config.yaml
Lines changed: 7 additions & 8 deletions
diff --git a/‎asv_bench/benchmarks/series_methods.py
Lines changed: 10 additions & 0 deletions b/‎asv_bench/benchmarks/series_methods.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎ci/deps/actions-310.yaml
Lines changed: 1 addition & 0 deletions b/‎ci/deps/actions-310.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/deps/actions-38-minimum_versions.yaml
Lines changed: 1 addition & 0 deletions b/‎ci/deps/actions-38-minimum_versions.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/deps/actions-39.yaml
Lines changed: 1 addition & 0 deletions b/‎ci/deps/actions-39.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/conf.py
Lines changed: 9 additions & 1 deletion b/‎doc/source/conf.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 17 additions & 0 deletions b/‎doc/source/getting_started/install.rst
Lines changed: 17 additions & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 24 additions & 0 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 24 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 14 additions & 3 deletions b/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 14 additions & 3 deletions
diff --git a/‎environment.yml
Lines changed: 3 additions & 3 deletions b/‎environment.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎pandas/__init__.py
Lines changed: 2 additions & 0 deletions b/‎pandas/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 2 additions & 2 deletions b/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 8 additions & 4 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 8 additions & 4 deletions
@@ -11,7 +11,7 @@ repos:
     -   id: absolufy-imports
         files: ^pandas/
 -   repo: https://github.com/jendrikseipp/vulture
-    rev: 'v2.4'
+    rev: 'v2.5'
     hooks:
       - id: vulture
         entry: python scripts/run_vulture.py
@@ -46,20 +46,19 @@ repos:
         exclude: ^pandas/_libs/src/(klib|headers)/
         args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
 -   repo: https://github.com/PyCQA/flake8
-    rev: 4.0.1
+    rev: 5.0.4
     hooks:
     -   id: flake8
         additional_dependencies: &flake8_dependencies
-        - flake8==4.0.1
-        - flake8-comprehensions==3.7.0
-        - flake8-bugbear==21.3.2
+        - flake8==5.0.4
+        - flake8-bugbear==22.7.1
         - pandas-dev-flaker==0.5.0
 -   repo: https://github.com/PyCQA/isort
     rev: 5.10.1
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.34.0
+    rev: v2.37.3
     hooks:
     -   id: pyupgrade
         args: [--py38-plus]
@@ -239,8 +238,8 @@ repos:
         types: [pyi]
         language: python
         additional_dependencies:
-        - flake8==4.0.1
-        - flake8-pyi==22.7.0
+        - flake8==5.0.4
+        - flake8-pyi==22.8.1
     -   id: future-annotations
         name: import annotations from __future__
         entry: 'from __future__ import annotations'
 
@@ -144,6 +144,16 @@ def time_clip(self, n):
         self.s.clip(0, 1)
 
 
+class ClipDt:
+    def setup(self):
+        dr = date_range("20220101", periods=100_000, freq="s", tz="UTC")
+        self.clipper_dt = dr[0:1_000].repeat(100)
+        self.s = Series(dr)
+
+    def time_clip(self):
+        self.s.clip(upper=self.clipper_dt)
+
+
 class ValueCounts:
 
     params = [[10**3, 10**4, 10**5], ["int", "uint", "float", "object"]]
 
@@ -47,6 +47,7 @@ dependencies:
   - scipy
   - sqlalchemy
   - tabulate
+  - tzdata>=2022a
   - xarray
   - xlrd
   - xlsxwriter
 
@@ -49,6 +49,7 @@ dependencies:
   - scipy=1.7.1
   - sqlalchemy=1.4.16
   - tabulate=0.8.9
+  - tzdata=2022a
   - xarray=0.19.0
   - xlrd=2.0.1
   - xlsxwriter=1.4.3
 
@@ -47,6 +47,7 @@ dependencies:
   - scipy
   - sqlalchemy
   - tabulate
+  - tzdata>=2022a
   - xarray
   - xlrd
   - xlsxwriter
 
@@ -653,12 +653,20 @@ def linkcode_resolve(domain, info):
     try:
         fn = inspect.getsourcefile(inspect.unwrap(obj))
     except TypeError:
-        fn = None
+        try:  # property
+            fn = inspect.getsourcefile(inspect.unwrap(obj.fget))
+        except (AttributeError, TypeError):
+            fn = None
     if not fn:
         return None
 
     try:
         source, lineno = inspect.getsourcelines(obj)
+    except TypeError:
+        try:  # property
+            source, lineno = inspect.getsourcelines(obj.fget)
+        except (AttributeError, TypeError):
+            lineno = None
     except OSError:
         lineno = None
 
 
@@ -270,6 +270,23 @@ For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
 optional dependency is not installed, pandas will raise an ``ImportError`` when
 the method requiring that dependency is called.
 
+Timezones
+^^^^^^^^^
+
+========================= ========================= =============================================================
+Dependency                Minimum Version           Notes
+========================= ========================= =============================================================
+tzdata                    2022.1(pypi)/             Allows the use of ``zoneinfo`` timezones with pandas.
+                          2022a(for system tzdata)  **Note**: You only need to install the pypi package if your
+                                                    system does not already provide the IANA tz database.
+                                                    However, the minimum tzdata version still applies, even if it
+                                                    is not enforced through an error.
+
+                                                    If you would like to keep your system tzdata version updated,
+                                                    it is recommended to use the ``tzdata`` package from
+                                                    conda-forge.
+========================= ========================= =============================================================
+
 Visualization
 ^^^^^^^^^^^^^
 
 
@@ -2743,6 +2743,30 @@ succeeds, the function will return*.
 
    dfs = pd.read_html(url, "Metcalf Bank", index_col=0, flavor=["lxml", "bs4"])
 
+Links can be extracted from cells along with the text using ``extract_links="all"``.
+
+.. ipython:: python
+
+    html_table = """
+    <table>
+      <tr>
+        <th>GitHub</th>
+      </tr>
+      <tr>
+        <td><a href="https://github.com/pandas-dev/pandas">pandas</a></td>
+      </tr>
+    </table>
+    """
+
+    df = pd.read_html(
+        html_table,
+        extract_links="all"
+    )[0]
+    df
+    df[("GitHub", None)]
+    df[("GitHub", None)].str[1]
+
+.. versionadded:: 1.5.0
 
 .. _io.html:
 
 
@@ -287,13 +287,17 @@ Other enhancements
 - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`)
 - :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError, :class:`.PossiblePrecisionLoss, :class:`.ValueLabelTypeMismatch, :class:`.InvalidColumnName, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`)
 - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
+- Add support for :meth:`GroupBy.ohlc` for extension array dtypes (:issue:`37493`)
 - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
 - :func:`read_sas` now supports using ``encoding='infer'`` to correctly read and use the encoding specified by the sas file. (:issue:`48048`)
+- :func:`pandas.read_html` now supports extracting links from table cells (:issue:`13141`)
 - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
 - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
 - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`)
 - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
+- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`)
 - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`)
+- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.notable_bug_fixes:
@@ -405,7 +409,6 @@ upon serialization. (Related issue :issue:`12997`)
     # Roundtripping now works
     pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
 
-
 .. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical:
 
 DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True``
@@ -844,6 +847,8 @@ Other Deprecations
 - Deprecated setting a categorical's categories with ``cat.categories = ['a', 'b', 'c']``, use :meth:`Categorical.rename_categories` instead (:issue:`37643`)
 - Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`)
 - Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
+- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
+- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.performance:
@@ -884,8 +889,9 @@ Bug fixes
 
 Categorical
 ^^^^^^^^^^^
-- Bug in :meth:`Categorical.view` not accepting integer dtypes (:issue:`25464`)
-- Bug in :meth:`CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
+- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`)
+- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
+- Bug in :meth:`DataFrame.concat` when concatenating two (or more) unordered ``CategoricalIndex`` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`)
 
 Datetimelike
 ^^^^^^^^^^^^
@@ -898,6 +904,7 @@ Datetimelike
 - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
 - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`)
 - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`)
+- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`)
 - Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`)
 -
 
@@ -933,6 +940,8 @@ Conversion
 - Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`)
 - Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`)
 - Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`)
+- Bug in :meth:`DataFrame.eval` when pandas objects (e.g. ``'Timestamp'``) were column names (:issue:`44603`)
+-
 
 Strings
 ^^^^^^^
@@ -1067,10 +1076,12 @@ Groupby/resample/rolling
 - Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`)
 - Bug in :meth:`.DataFrameGroupBy.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`)
+- Bug in :meth:`GroupBy.sum` with integer dtypes losing precision (:issue:`37493`)
 - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
 - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
 - Bug in :meth:`DataFrame.groupby` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`)
 - Bug in :meth:`.GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
+- Bug in :meth:`GroupBy.cumprod` ``NaN`` influences calculation in different columns with ``skipna=False`` (:issue:`48064`)
 - Bug in :meth:`.GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
 - Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)
 - Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`)
 
@@ -48,6 +48,7 @@ dependencies:
   - scipy
   - sqlalchemy
   - tabulate
+  - tzdata>=2022a
   - xarray
   - xlrd
   - xlsxwriter
@@ -84,9 +85,8 @@ dependencies:
   # code checks
   - black=22.3.0
   - cpplint
-  - flake8=4.0.1
-  - flake8-bugbear=21.3.2  # used by flake8, find likely bugs
-  - flake8-comprehensions=3.7.0  # used by flake8, linting of unnecessary comprehensions
+  - flake8=5.0.4
+  - flake8-bugbear=22.7.1 # used by flake8, find likely bugs
   - isort>=5.2.1  # check that imports are in the right order
   - mypy=0.971
   - pre-commit>=2.15.0
 
@@ -47,6 +47,7 @@
 
 from pandas.core.api import (
     # dtype
+    ArrowDtype,
     Int8Dtype,
     Int16Dtype,
     Int32Dtype,
@@ -308,6 +309,7 @@ def __getattr__(name):
 # Pandas is not (yet) a py.typed library: the public API is determined
 # based on the documentation.
 __all__ = [
+    "ArrowDtype",
     "BooleanDtype",
     "Categorical",
     "CategoricalDtype",
 
@@ -132,6 +132,7 @@ def ensure_int8(arr: object, copy=...) -> npt.NDArray[np.int8]: ...
 def ensure_int16(arr: object, copy=...) -> npt.NDArray[np.int16]: ...
 def ensure_int32(arr: object, copy=...) -> npt.NDArray[np.int32]: ...
 def ensure_int64(arr: object, copy=...) -> npt.NDArray[np.int64]: ...
+def ensure_uint64(arr: object, copy=...) -> npt.NDArray[np.uint64]: ...
 def take_1d_int8_int8(
     values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
 ) -> None: ...
 
@@ -41,12 +41,12 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
           ('int16', 'INT16', 'int16'),
           ('int32', 'INT32', 'int32'),
           ('int64', 'INT64', 'int64'),
+          ('uint64', 'UINT64', 'uint64'),
           # Disabling uint and complex dtypes because we do not use them
-          #  (and compiling them increases wheel size)
+          #  (and compiling them increases wheel size) (except uint64)
           # ('uint8', 'UINT8', 'uint8'),
           # ('uint16', 'UINT16', 'uint16'),
           # ('uint32', 'UINT32', 'uint32'),
-          # ('uint64', 'UINT64', 'uint64'),
           # ('complex64', 'COMPLEX64', 'complex64'),
           # ('complex128', 'COMPLEX128', 'complex128')
 ]
 
@@ -51,10 +51,12 @@ def group_any_all(
     skipna: bool,
 ) -> None: ...
 def group_sum(
-    out: np.ndarray,  # complexfloating_t[:, ::1]
+    out: np.ndarray,  # complexfloatingintuint_t[:, ::1]
     counts: np.ndarray,  # int64_t[::1]
-    values: np.ndarray,  # ndarray[complexfloating_t, ndim=2]
+    values: np.ndarray,  # ndarray[complexfloatingintuint_t, ndim=2]
     labels: np.ndarray,  # const intp_t[:]
+    mask: np.ndarray | None,
+    result_mask: np.ndarray | None = ...,
     min_count: int = ...,
     is_datetimelike: bool = ...,
 ) -> None: ...
@@ -84,11 +86,13 @@ def group_mean(
     result_mask: np.ndarray | None = ...,
 ) -> None: ...
 def group_ohlc(
-    out: np.ndarray,  # floating[:, ::1]
+    out: np.ndarray,  # floatingintuint_t[:, ::1]
     counts: np.ndarray,  # int64_t[::1]
-    values: np.ndarray,  # ndarray[floating, ndim=2]
+    values: np.ndarray,  # ndarray[floatingintuint_t, ndim=2]
     labels: np.ndarray,  # const intp_t[:]
     min_count: int = ...,
+    mask: np.ndarray | None = ...,
+    result_mask: np.ndarray | None = ...,
 ) -> None: ...
 def group_quantile(
     out: npt.NDArray[np.float64],