Skip to content

Commit 51a9d4a

Browse files
committed
Merge master
2 parents 95a9503 + f5cc078 commit 51a9d4a

File tree

175 files changed

+2078
-1363
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

175 files changed

+2078
-1363
lines changed

.github/CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Whether you are a novice or experienced software developer, all contributions and suggestions are welcome!
44

5-
Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
5+
Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/development/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
66

77
## Getting Started
88

.travis.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@ install:
8686
- ci/submit_cython_cache.sh
8787
- echo "install done"
8888

89+
before_script:
90+
# display server (for clipboard functionality) needs to be started here,
91+
# does not work if done in install:setup_env.sh (GH-26103)
92+
- export DISPLAY=":99.0"
93+
- echo "sh -e /etc/init.d/xvfb start"
94+
- sh -e /etc/init.d/xvfb start
95+
- sleep 3
96+
8997
script:
9098
- echo "script start"
9199
- source activate pandas-dev

asv_bench/benchmarks/io/csv.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import numpy as np
55
import pandas.util.testing as tm
6-
from pandas import DataFrame, Categorical, date_range, read_csv
6+
from pandas import DataFrame, Categorical, date_range, read_csv, to_datetime
77
from pandas.io.parsers import _parser_defaults
88
from io import StringIO
99

@@ -96,6 +96,35 @@ def time_read_csv(self, infer_datetime_format, format):
9696
infer_datetime_format=infer_datetime_format)
9797

9898

99+
class ReadCSVConcatDatetime(StringIORewind):
100+
101+
iso8601 = '%Y-%m-%d %H:%M:%S'
102+
103+
def setup(self):
104+
rng = date_range('1/1/2000', periods=50000, freq='S')
105+
self.StringIO_input = StringIO('\n'.join(
106+
rng.strftime(self.iso8601).tolist()))
107+
108+
def time_read_csv(self):
109+
read_csv(self.data(self.StringIO_input),
110+
header=None, names=['foo'], parse_dates=['foo'],
111+
infer_datetime_format=False)
112+
113+
114+
class ReadCSVConcatDatetimeBadDateValue(StringIORewind):
115+
116+
params = (['nan', '0', ''],)
117+
param_names = ['bad_date_value']
118+
119+
def setup(self, bad_date_value):
120+
self.StringIO_input = StringIO(('%s,\n' % bad_date_value) * 50000)
121+
122+
def time_read_csv(self, bad_date_value):
123+
read_csv(self.data(self.StringIO_input),
124+
header=None, names=['foo', 'bar'], parse_dates=['foo'],
125+
infer_datetime_format=False)
126+
127+
99128
class ReadCSVSkipRows(BaseIO):
100129

101130
fname = '__test__.csv'
@@ -273,7 +302,7 @@ def mem_parser_chunks(self):
273302

274303
class ReadCSVParseSpecialDate(StringIORewind):
275304
params = (['mY', 'mdY', 'hm'],)
276-
params_name = ['value']
305+
param_names = ['value']
277306
objects = {
278307
'mY': '01-2019\n10-2019\n02/2000\n',
279308
'mdY': '12/02/2010\n',
@@ -290,4 +319,29 @@ def time_read_special_date(self, value):
290319
names=['Date'], parse_dates=['Date'])
291320

292321

322+
class ParseDateComparison(StringIORewind):
323+
params = ([False, True],)
324+
param_names = ['cache_dates']
325+
326+
def setup(self, cache_dates):
327+
count_elem = 10000
328+
data = '12-02-2010\n' * count_elem
329+
self.StringIO_input = StringIO(data)
330+
331+
def time_read_csv_dayfirst(self, cache_dates):
332+
read_csv(self.data(self.StringIO_input), sep=',', header=None,
333+
names=['Date'], parse_dates=['Date'], cache_dates=cache_dates,
334+
dayfirst=True)
335+
336+
def time_to_datetime_dayfirst(self, cache_dates):
337+
df = read_csv(self.data(self.StringIO_input),
338+
dtype={'date': str}, names=['date'])
339+
to_datetime(df['date'], cache=cache_dates, dayfirst=True)
340+
341+
def time_to_datetime_format_DD_MM_YYYY(self, cache_dates):
342+
df = read_csv(self.data(self.StringIO_input),
343+
dtype={'date': str}, names=['date'])
344+
to_datetime(df['date'], cache=cache_dates, format='%d-%m-%Y')
345+
346+
293347
from ..pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/io/parsers.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import numpy as np
2+
3+
from pandas._libs.tslibs.parsing import (
4+
_concat_date_cols, _does_string_look_like_datetime)
5+
6+
7+
class DoesStringLookLikeDatetime(object):
8+
9+
params = (['2Q2005', '0.0', '10000'],)
10+
param_names = ['value']
11+
12+
def setup(self, value):
13+
self.objects = [value] * 1000000
14+
15+
def time_check_datetimes(self, value):
16+
for obj in self.objects:
17+
_does_string_look_like_datetime(obj)
18+
19+
20+
class ConcatDateCols(object):
21+
22+
params = ([1234567890, 'AAAA'], [1, 2])
23+
param_names = ['value', 'dim']
24+
25+
def setup(self, value, dim):
26+
count_elem = 10000
27+
if dim == 1:
28+
self.object = (np.array([value] * count_elem),)
29+
if dim == 2:
30+
self.object = (np.array([value] * count_elem),
31+
np.array([value] * count_elem))
32+
33+
def time_check_concat(self, value, dim):
34+
_concat_date_cols(self.object)

ci/azure/windows.yml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,15 @@ jobs:
1717
CONDA_PY: "37"
1818

1919
steps:
20-
- task: CondaEnvironment@1
21-
inputs:
22-
updateConda: no
23-
packageSpecs: ''
24-
25-
- script: |
26-
ci\\incremental\\setup_conda_environment.cmd
27-
displayName: 'Before Install'
20+
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
21+
displayName: Add conda to PATH
22+
- script: conda update -q -n base conda
23+
displayName: Update conda
24+
- script: conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
25+
displayName: Create anaconda environment
2826
- script: |
2927
call activate pandas-dev
28+
call conda list
3029
ci\\incremental\\build.cmd
3130
displayName: 'Build'
3231
- script: |

ci/code_checks.sh

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -169,15 +169,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
169169
invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
170170
RET=$(($RET + $?)) ; echo $MSG "DONE"
171171

172-
# Check that we use pytest.raises only as a context manager
173-
#
174-
# For any flake8-compliant code, the only way this regex gets
175-
# matched is if there is no "with" statement preceding "pytest.raises"
176-
MSG='Check for pytest.raises as context manager (a line starting with `pytest.raises` is invalid, needs a `with` to precede it)' ; echo $MSG
177-
MSG='TODO: This check is currently skipped because so many files fail this. Please enable when all are corrected (xref gh-24332)' ; echo $MSG
178-
# invgrep -R --include '*.py' -E '[[:space:]] pytest.raises' pandas/tests
179-
# RET=$(($RET + $?)) ; echo $MSG "DONE"
180-
181172
MSG='Check for wrong space after code-block directive and before colon (".. code-block ::" instead of ".. code-block::")' ; echo $MSG
182173
invgrep -R --include="*.rst" ".. code-block ::" doc/source
183174
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -239,6 +230,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
239230
pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
240231
RET=$(($RET + $?)) ; echo $MSG "DONE"
241232

233+
MSG='Doctests datetimes.py' ; echo $MSG
234+
pytest -q --doctest-modules pandas/core/tools/datetimes.py
235+
RET=$(($RET + $?)) ; echo $MSG "DONE"
236+
242237
MSG='Doctests top-level reshaping functions' ; echo $MSG
243238
pytest -q --doctest-modules \
244239
pandas/core/reshape/concat.py \

ci/incremental/setup_conda_environment.cmd

Lines changed: 0 additions & 21 deletions
This file was deleted.

ci/run_with_env.cmd

Lines changed: 0 additions & 95 deletions
This file was deleted.

ci/setup_env.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,10 @@ echo "conda list"
118118
conda list
119119

120120
# Install DB for Linux
121-
export DISPLAY=":99."
122121
if [ ${TRAVIS_OS_NAME} == "linux" ]; then
123122
echo "installing dbs"
124123
mysql -e 'create database pandas_nosetest;'
125124
psql -c 'create database pandas_nosetest;' -U postgres
126-
127-
echo
128-
echo "sh -e /etc/init.d/xvfb start"
129-
sh -e /etc/init.d/xvfb start
130-
sleep 3
131125
else
132126
echo "not using dbs on non-linux"
133127
fi

doc/source/ecosystem.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ provides a familiar ``DataFrame`` interface for out-of-core, parallel and distri
285285

286286
Dask-ML enables parallel and distributed machine learning using Dask alongside existing machine learning libraries like Scikit-Learn, XGBoost, and TensorFlow.
287287

288+
`Koalas <https://koalas.readthedocs.io/en/latest/>`__
289+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
290+
291+
Koalas provides a familiar pandas DataFrame interface on top of Apache Spark. It enables users to leverage multi-cores on one machine or a cluster of machines to speed up or scale their DataFrame code.
292+
288293
`Odo <http://odo.pydata.org>`__
289294
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
290295

0 commit comments

Comments
 (0)