Skip to content

Commit b03ecd8

Browse files
committed
Merge remote-tracking branch 'upstream/main' into enh/convert_dtypes/pyarrow
2 parents 30c5c16 + 627d1b6 commit b03ecd8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1111
-412
lines changed

asv_bench/benchmarks/io/excel.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,8 @@ def setup(self, engine):
4242
def time_write_excel(self, engine):
4343
bio = BytesIO()
4444
bio.seek(0)
45-
writer = ExcelWriter(bio, engine=engine)
46-
self.df.to_excel(writer, sheet_name="Sheet1")
47-
writer.save()
45+
with ExcelWriter(bio, engine=engine) as writer:
46+
self.df.to_excel(writer, sheet_name="Sheet1")
4847

4948

5049
class WriteExcelStyled:
@@ -57,13 +56,12 @@ def setup(self, engine):
5756
def time_write_excel_style(self, engine):
5857
bio = BytesIO()
5958
bio.seek(0)
60-
writer = ExcelWriter(bio, engine=engine)
61-
df_style = self.df.style
62-
df_style.applymap(lambda x: "border: red 1px solid;")
63-
df_style.applymap(lambda x: "color: blue")
64-
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
65-
df_style.to_excel(writer, sheet_name="Sheet1")
66-
writer.save()
59+
with ExcelWriter(bio, engine=engine) as writer:
60+
df_style = self.df.style
61+
df_style.applymap(lambda x: "border: red 1px solid;")
62+
df_style.applymap(lambda x: "color: blue")
63+
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
64+
df_style.to_excel(writer, sheet_name="Sheet1")
6765

6866

6967
class ReadExcel:

ci/deps/actions-38-downstream_compat.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ dependencies:
3939
- numexpr
4040
- openpyxl
4141
- odfpy
42-
- pandas-gbq
4342
- psycopg2
4443
- pyarrow<10
4544
- pymysql
@@ -68,5 +67,6 @@ dependencies:
6867
- statsmodels
6968
- coverage
7069
- pandas-datareader
70+
- pandas-gbq
7171
- pyyaml
7272
- py

doc/source/reference/extensions.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ objects.
3232
.. autosummary::
3333
:toctree: api/
3434
35+
api.extensions.ExtensionArray._accumulate
3536
api.extensions.ExtensionArray._concat_same_type
3637
api.extensions.ExtensionArray._formatter
3738
api.extensions.ExtensionArray._from_factorized

doc/source/user_guide/basics.rst

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -827,20 +827,54 @@ In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``.
827827

828828
For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.ols, 'data')`` to ``pipe``:
829829

830-
.. ipython:: python
831-
:okwarning:
832-
833-
import statsmodels.formula.api as sm
834-
835-
bb = pd.read_csv("data/baseball.csv", index_col="id")
830+
.. code-block:: ipython
836831
837-
(
838-
bb.query("h > 0")
839-
.assign(ln_h=lambda df: np.log(df.h))
840-
.pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
841-
.fit()
842-
.summary()
843-
)
832+
In [147]: import statsmodels.formula.api as sm
833+
834+
In [148]: bb = pd.read_csv("data/baseball.csv", index_col="id")
835+
836+
In [149]: (
837+
.....: bb.query("h > 0")
838+
.....: .assign(ln_h=lambda df: np.log(df.h))
839+
.....: .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
840+
.....: .fit()
841+
.....: .summary()
842+
.....: )
843+
.....:
844+
Out[149]:
845+
<class 'statsmodels.iolib.summary.Summary'>
846+
"""
847+
OLS Regression Results
848+
==============================================================================
849+
Dep. Variable: hr R-squared: 0.685
850+
Model: OLS Adj. R-squared: 0.665
851+
Method: Least Squares F-statistic: 34.28
852+
Date: Tue, 22 Nov 2022 Prob (F-statistic): 3.48e-15
853+
Time: 05:34:17 Log-Likelihood: -205.92
854+
No. Observations: 68 AIC: 421.8
855+
Df Residuals: 63 BIC: 432.9
856+
Df Model: 4
857+
Covariance Type: nonrobust
858+
===============================================================================
859+
coef std err t P>|t| [0.025 0.975]
860+
-------------------------------------------------------------------------------
861+
Intercept -8484.7720 4664.146 -1.819 0.074 -1.78e+04 835.780
862+
C(lg)[T.NL] -2.2736 1.325 -1.716 0.091 -4.922 0.375
863+
ln_h -1.3542 0.875 -1.547 0.127 -3.103 0.395
864+
year 4.2277 2.324 1.819 0.074 -0.417 8.872
865+
g 0.1841 0.029 6.258 0.000 0.125 0.243
866+
==============================================================================
867+
Omnibus: 10.875 Durbin-Watson: 1.999
868+
Prob(Omnibus): 0.004 Jarque-Bera (JB): 17.298
869+
Skew: 0.537 Prob(JB): 0.000175
870+
Kurtosis: 5.225 Cond. No. 1.49e+07
871+
==============================================================================
872+
873+
Notes:
874+
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
875+
[2] The condition number is large, 1.49e+07. This might indicate that there are
876+
strong multicollinearity or other numerical problems.
877+
"""
844878
845879
The pipe method is inspired by unix pipes and more recently dplyr_ and magrittr_, which
846880
have introduced the popular ``(%>%)`` (read pipe) operator for R_.

doc/source/whatsnew/v0.16.2.rst

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -61,21 +61,55 @@ In the example above, the functions ``f``, ``g``, and ``h`` each expected the Da
6161
When the function you wish to apply takes its data anywhere other than the first argument, pass a tuple
6262
of ``(function, keyword)`` indicating where the DataFrame should flow. For example:
6363

64-
.. ipython:: python
65-
:okwarning:
66-
67-
import statsmodels.formula.api as sm
68-
69-
bb = pd.read_csv("data/baseball.csv", index_col="id")
70-
71-
# sm.ols takes (formula, data)
72-
(
73-
bb.query("h > 0")
74-
.assign(ln_h=lambda df: np.log(df.h))
75-
.pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
76-
.fit()
77-
.summary()
78-
)
64+
.. code-block:: ipython
65+
66+
In [1]: import statsmodels.formula.api as sm
67+
68+
In [2]: bb = pd.read_csv("data/baseball.csv", index_col="id")
69+
70+
# sm.ols takes (formula, data)
71+
In [3]: (
72+
...: bb.query("h > 0")
73+
...: .assign(ln_h=lambda df: np.log(df.h))
74+
...: .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
75+
...: .fit()
76+
...: .summary()
77+
...: )
78+
...:
79+
Out[3]:
80+
<class 'statsmodels.iolib.summary.Summary'>
81+
"""
82+
OLS Regression Results
83+
==============================================================================
84+
Dep. Variable: hr R-squared: 0.685
85+
Model: OLS Adj. R-squared: 0.665
86+
Method: Least Squares F-statistic: 34.28
87+
Date: Tue, 22 Nov 2022 Prob (F-statistic): 3.48e-15
88+
Time: 05:35:23 Log-Likelihood: -205.92
89+
No. Observations: 68 AIC: 421.8
90+
Df Residuals: 63 BIC: 432.9
91+
Df Model: 4
92+
Covariance Type: nonrobust
93+
===============================================================================
94+
coef std err t P>|t| [0.025 0.975]
95+
-------------------------------------------------------------------------------
96+
Intercept -8484.7720 4664.146 -1.819 0.074 -1.78e+04 835.780
97+
C(lg)[T.NL] -2.2736 1.325 -1.716 0.091 -4.922 0.375
98+
ln_h -1.3542 0.875 -1.547 0.127 -3.103 0.395
99+
year 4.2277 2.324 1.819 0.074 -0.417 8.872
100+
g 0.1841 0.029 6.258 0.000 0.125 0.243
101+
==============================================================================
102+
Omnibus: 10.875 Durbin-Watson: 1.999
103+
Prob(Omnibus): 0.004 Jarque-Bera (JB): 17.298
104+
Skew: 0.537 Prob(JB): 0.000175
105+
Kurtosis: 5.225 Cond. No. 1.49e+07
106+
==============================================================================
107+
108+
Notes:
109+
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
110+
[2] The condition number is large, 1.49e+07. This might indicate that there are
111+
strong multicollinearity or other numerical problems.
112+
"""
79113
80114
The pipe method is inspired by unix pipes, which stream text through
81115
processes. More recently dplyr_ and magrittr_ have introduced the

doc/source/whatsnew/v1.5.3.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Bug fixes
2828
~~~~~~~~~
2929
- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`)
3030
- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`)
31+
- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)
3132
-
3233

3334
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v2.0.0.rst

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
3737

3838
* :func:`read_csv`
3939
* :func:`read_excel`
40+
* :func:`read_sql`
4041

4142
Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
4243
to select the nullable dtypes implementation.
@@ -82,13 +83,15 @@ Other enhancements
8283
- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
8384
- Added support for extension array dtypes in :func:`merge` (:issue:`44240`)
8485
- Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
86+
- Added ``cumsum``, ``cumprod``, ``cummin`` and ``cummax`` to the ``ExtensionArray`` interface via ``_accumulate`` (:issue:`28385`)
8587
- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
8688
- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
8789
- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`)
8890
- :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`)
8991
- :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`)
9092
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
9193
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
94+
- Improved error message for :func:`merge_asof` when join-columns were duplicated (:issue:`50102`)
9295
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
9396
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
9497
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
@@ -318,6 +321,40 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or
318321
ser.astype("timedelta64[s]")
319322
ser.astype("timedelta64[D]")
320323
324+
.. _whatsnew_200.api_breaking.default_to_stdlib_tzinfos:
325+
326+
UTC and fixed-offset timezones default to standard-library tzinfo objects
327+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
328+
In previous versions, the default ``tzinfo`` object used to represent UTC
329+
was ``pytz.UTC``. In pandas 2.0, we default to ``datetime.timezone.utc`` instead.
330+
Similarly, for timezones represent fixed UTC offsets, we use ``datetime.timezone``
331+
objects instead of ``pytz.FixedOffset`` objects. See (:issue:`34916`)
332+
333+
*Previous behavior*:
334+
335+
.. code-block:: ipython
336+
337+
In [2]: ts = pd.Timestamp("2016-01-01", tz="UTC")
338+
In [3]: type(ts.tzinfo)
339+
Out[3]: pytz.UTC
340+
341+
In [4]: ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
342+
In [3]: type(ts2.tzinfo)
343+
Out[5]: pytz._FixedOffset
344+
345+
*New behavior*:
346+
347+
.. ipython:: python
348+
349+
ts = pd.Timestamp("2016-01-01", tz="UTC")
350+
type(ts.tzinfo)
351+
352+
ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
353+
type(ts2.tzinfo)
354+
355+
For timezones that are neither UTC nor fixed offsets, e.g. "US/Pacific", we
356+
continue to default to ``pytz`` objects.
357+
321358
.. _whatsnew_200.api_breaking.zero_len_indexes:
322359

323360
Empty DataFrames/Series will now default to have a ``RangeIndex``
@@ -646,6 +683,8 @@ Removal of prior version deprecations/changes
646683
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
647684
- Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`)
648685
- When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`)
686+
- Removed deprecated methods :meth:`ExcelWriter.write_cells`, :meth:`ExcelWriter.save`, :meth:`ExcelWriter.cur_sheet`, :meth:`ExcelWriter.handles`, :meth:`ExcelWriter.path` (:issue:`45795`)
687+
- The :class:`ExcelWriter` attribute ``book`` can no longer be set; it is still available to be accessed and mutated (:issue:`48943`)
649688
-
650689

651690
.. ---------------------------------------------------------------------------
@@ -760,6 +799,7 @@ Indexing
760799
- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
761800
- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`)
762801
- Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
802+
- Bug in :meth:`DataFrame.iloc` raising ``IndexError`` when indexer is a :class:`Series` with numeric extension array dtype (:issue:`49521`)
763803
- Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`)
764804
- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
765805
-

environment.yml

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies:
1717
- psutil
1818
- pytest-asyncio>=0.17
1919
- boto3
20+
- coverage
2021

2122
# required dependencies
2223
- python-dateutil
@@ -27,20 +28,22 @@ dependencies:
2728
- beautifulsoup4
2829
- blosc
2930
- brotlipy
31+
- botocore
3032
- bottleneck
3133
- fastparquet
3234
- fsspec
3335
- html5lib
3436
- hypothesis
3537
- gcsfs
38+
- ipython
3639
- jinja2
3740
- lxml
3841
- matplotlib>=3.6.1
3942
- numba>=0.53.1
4043
- numexpr>=2.8.0 # pin for "Run checks on imported code" job
4144
- openpyxl
4245
- odfpy
43-
- pandas-gbq
46+
- py
4447
- psycopg2
4548
- pyarrow<10
4649
- pymysql
@@ -60,17 +63,8 @@ dependencies:
6063

6164
# downstream packages
6265
- aiobotocore<2.0.0 # GH#44311 pinned to fix docbuild
63-
- botocore
64-
- cftime
65-
- dask
66-
- ipython
67-
- seaborn
68-
- scikit-learn
69-
- statsmodels
70-
- coverage
71-
- pandas-datareader
72-
- pyyaml
73-
- py
66+
- dask-core
67+
- seaborn-base
7468

7569
# local testing dependencies
7670
- moto

pandas/_libs/lib.pyx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from collections import abc
22
from decimal import Decimal
33
from enum import Enum
4-
from typing import Literal
4+
from typing import (
5+
Literal,
6+
_GenericAlias,
7+
)
58

69
cimport cython
710
from cpython.datetime cimport (
@@ -1119,7 +1122,8 @@ cdef bint c_is_list_like(object obj, bint allow_sets) except -1:
11191122
# equiv: `isinstance(obj, abc.Iterable)`
11201123
getattr(obj, "__iter__", None) is not None and not isinstance(obj, type)
11211124
# we do not count strings/unicode/bytes as list-like
1122-
and not isinstance(obj, (str, bytes))
1125+
# exclude Generic types that have __iter__
1126+
and not isinstance(obj, (str, bytes, _GenericAlias))
11231127
# exclude zero-dimensional duck-arrays, effectively scalars
11241128
and not (hasattr(obj, "ndim") and obj.ndim == 0)
11251129
# exclude sets if allow_sets is False

0 commit comments

Comments
 (0)