Skip to content

Commit f85d72c

Browse files
committed
Merge remote-tracking branch 'upstream/master' into deprecate-nonkeyword-args-replace
2 parents f90896e + a246270 commit f85d72c

File tree

118 files changed

+2027
-1628
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+2027
-1628
lines changed

.github/workflows/database.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ jobs:
7070
- uses: conda-incubator/setup-miniconda@v2
7171
with:
7272
activate-environment: pandas-dev
73-
channel-priority: strict
73+
channel-priority: flexible
7474
environment-file: ${{ matrix.ENV_FILE }}
7575
use-only-tar-bz2: true
7676

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ repos:
2121
- repo: https://github.com/pre-commit/pre-commit-hooks
2222
rev: v3.4.0
2323
hooks:
24+
- id: debug-statements
2425
- id: end-of-file-fixer
2526
exclude: \.txt$
2627
- id: trailing-whitespace

asv_bench/benchmarks/strings.py

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,19 @@
1111
from .pandas_vb_common import tm
1212

1313

14+
class Dtypes:
15+
params = ["str", "string", "arrow_string"]
16+
param_names = ["dtype"]
17+
18+
def setup(self, dtype):
19+
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
20+
21+
try:
22+
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
23+
except ImportError:
24+
raise NotImplementedError
25+
26+
1427
class Construction:
1528

1629
params = ["str", "string"]
@@ -49,18 +62,7 @@ def peakmem_cat_frame_construction(self, dtype):
4962
DataFrame(self.frame_cat_arr, dtype=dtype)
5063

5164

52-
class Methods:
53-
params = ["str", "string", "arrow_string"]
54-
param_names = ["dtype"]
55-
56-
def setup(self, dtype):
57-
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
58-
59-
try:
60-
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
61-
except ImportError:
62-
raise NotImplementedError
63-
65+
class Methods(Dtypes):
6466
def time_center(self, dtype):
6567
self.s.str.center(100)
6668

@@ -83,6 +85,9 @@ def time_find(self, dtype):
8385
def time_rfind(self, dtype):
8486
self.s.str.rfind("[A-Z]+")
8587

88+
def time_fullmatch(self, dtype):
89+
self.s.str.fullmatch("A")
90+
8691
def time_get(self, dtype):
8792
self.s.str.get(0)
8893

@@ -211,35 +216,26 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
211216
self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep)
212217

213218

214-
class Contains:
219+
class Contains(Dtypes):
215220

216-
params = (["str", "string", "arrow_string"], [True, False])
221+
params = (Dtypes.params, [True, False])
217222
param_names = ["dtype", "regex"]
218223

219224
def setup(self, dtype, regex):
220-
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
221-
222-
try:
223-
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
224-
except ImportError:
225-
raise NotImplementedError
225+
super().setup(dtype)
226226

227227
def time_contains(self, dtype, regex):
228228
self.s.str.contains("A", regex=regex)
229229

230230

231-
class Split:
231+
class Split(Dtypes):
232232

233-
params = (["str", "string", "arrow_string"], [True, False])
233+
params = (Dtypes.params, [True, False])
234234
param_names = ["dtype", "expand"]
235235

236236
def setup(self, dtype, expand):
237-
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
238-
239-
try:
240-
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype).str.join("--")
241-
except ImportError:
242-
raise NotImplementedError
237+
super().setup(dtype)
238+
self.s = self.s.str.join("--")
243239

244240
def time_split(self, dtype, expand):
245241
self.s.str.split("--", expand=expand)
@@ -248,17 +244,23 @@ def time_rsplit(self, dtype, expand):
248244
self.s.str.rsplit("--", expand=expand)
249245

250246

251-
class Dummies:
252-
params = ["str", "string", "arrow_string"]
253-
param_names = ["dtype"]
247+
class Extract(Dtypes):
254248

255-
def setup(self, dtype):
256-
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
249+
params = (Dtypes.params, [True, False])
250+
param_names = ["dtype", "expand"]
257251

258-
try:
259-
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype).str.join("|")
260-
except ImportError:
261-
raise NotImplementedError
252+
def setup(self, dtype, expand):
253+
super().setup(dtype)
254+
255+
def time_extract_single_group(self, dtype, expand):
256+
with warnings.catch_warnings(record=True):
257+
self.s.str.extract("(\\w*)A", expand=expand)
258+
259+
260+
class Dummies(Dtypes):
261+
def setup(self, dtype):
262+
super().setup(dtype)
263+
self.s = self.s.str.join("|")
262264

263265
def time_get_dummies(self, dtype):
264266
self.s.str.get_dummies("|")
@@ -279,3 +281,9 @@ def setup(self):
279281
def time_vector_slice(self):
280282
# GH 2602
281283
self.s.str[:5]
284+
285+
286+
class Iter(Dtypes):
287+
def time_iter(self, dtype):
288+
for i in self.s:
289+
pass

ci/deps/actions-37-db-min.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ dependencies:
3131
- openpyxl
3232
- pandas-gbq
3333
- google-cloud-bigquery>=1.27.2 # GH 36436
34-
- pyarrow=0.17 # GH 38803
34+
- protobuf>=3.12.4
35+
- pyarrow=0.17.1 # GH 38803
3536
- pytables>=3.5.1
3637
- scipy
3738
- xarray=0.12.3

ci/deps/actions-37-db.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ dependencies:
3131
- pandas-gbq
3232
- google-cloud-bigquery>=1.27.2 # GH 36436
3333
- psycopg2
34-
- pyarrow>=0.15.0
34+
- pyarrow>=0.17.0
3535
- pymysql
3636
- pytables
3737
- python-snappy

ci/deps/actions-37-minimum_versions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ dependencies:
2323
- pytables=3.5.1
2424
- python-dateutil=2.7.3
2525
- pytz=2017.3
26-
- pyarrow=0.15
26+
- pyarrow=0.17.0
2727
- scipy=1.2
2828
- xlrd=1.2.0
2929
- xlsxwriter=1.0.2

ci/deps/actions-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- numpy=1.19
1919
- python-dateutil
2020
- nomkl
21-
- pyarrow=0.15.1
21+
- pyarrow
2222
- pytz
2323
- s3fs>=0.4.0
2424
- moto>=1.3.14

ci/deps/azure-macos-37.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
name: pandas-dev
22
channels:
33
- defaults
4+
- conda-forge
45
dependencies:
56
- python=3.7.*
67

@@ -21,7 +22,7 @@ dependencies:
2122
- numexpr
2223
- numpy=1.17.3
2324
- openpyxl
24-
- pyarrow=0.15.1
25+
- pyarrow=0.17.0
2526
- pytables
2627
- python-dateutil==2.7.3
2728
- pytz

ci/deps/azure-windows-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ dependencies:
2626
- numexpr
2727
- numpy=1.17.*
2828
- openpyxl
29-
- pyarrow=0.15
29+
- pyarrow=0.17.0
3030
- pytables
3131
- python-dateutil
3232
- pytz

ci/deps/azure-windows-38.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies:
2525
- numpy=1.18.*
2626
- openpyxl
2727
- jinja2
28-
- pyarrow>=0.15.0
28+
- pyarrow>=0.17.0
2929
- pytables
3030
- python-dateutil
3131
- pytz

0 commit comments

Comments
 (0)