From 901837fc78a0eb41ae8ff094fdc987a4007e4380 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 27 Aug 2018 15:00:10 +0200 Subject: [PATCH 1/5] adding new botocore - allows for streaming --- ci/environment-dev.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml index f3323face4144..844b984e58237 100644 --- a/ci/environment-dev.yaml +++ b/ci/environment-dev.yaml @@ -9,6 +9,7 @@ dependencies: - flake8-comprehensions - hypothesis>=3.58.0 - moto + - botocore>=1.10.47 - pytest>=3.6 - python-dateutil>=2.5.0 - python=3 From 0b74701cd6b5b7f901bd2ef60d7bb3639bb4f891 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 27 Aug 2018 15:01:35 +0200 Subject: [PATCH 2/5] testing streaming of CSV files from S3 --- pandas/tests/io/test_s3.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py index 7a3062f470ce8..9b295e1a07dbc 100644 --- a/pandas/tests/io/test_s3.py +++ b/pandas/tests/io/test_s3.py @@ -1,3 +1,7 @@ +from six import BytesIO + +from botocore.response import StreamingBody +from pandas import read_csv from pandas.io.common import is_s3_url @@ -6,3 +10,13 @@ class TestS3URL(object): def test_is_s3_url(self): assert is_s3_url("s3://pandas/somethingelse.com") assert not is_s3_url("s4://pandas/somethingelse.com") + + +def test_streaming_s3_objects(): + data = [ + b'foo,bar,baz\n1,2,3\n4,5,6\n', + b'just,the,header\n', + ] + for el in data: + body = StreamingBody(BytesIO(el), content_length=len(el)) + read_csv(body) From 86cc36ba59c6c17e2765505dd9aa09eeb402b1b9 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 27 Aug 2018 16:27:49 +0200 Subject: [PATCH 3/5] skip test if botocore not installed --- pandas/tests/io/test_s3.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py index 9b295e1a07dbc..8aca974476312 100644 --- a/pandas/tests/io/test_s3.py +++ b/pandas/tests/io/test_s3.py @@ -1,7 +1,7 @@ -from six import BytesIO +import pytest -from botocore.response import StreamingBody from pandas import read_csv +from pandas.compat import BytesIO from pandas.io.common import is_s3_url @@ -13,6 +13,9 @@ def test_is_s3_url(self): def test_streaming_s3_objects(): + pytest.importorskip('botocore', minversion='1.10.47') + from botocore.response import StreamingBody + data = [ b'foo,bar,baz\n1,2,3\n4,5,6\n', b'just,the,header\n', From 16d92aacc8add3e2a20ea95adbf2e4454aa85347 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 27 Aug 2018 16:28:23 +0200 Subject: [PATCH 4/5] reverting addition of a dependency --- ci/environment-dev.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml index 844b984e58237..f3323face4144 100644 --- a/ci/environment-dev.yaml +++ b/ci/environment-dev.yaml @@ -9,7 +9,6 @@ dependencies: - flake8-comprehensions - hypothesis>=3.58.0 - moto - - botocore>=1.10.47 - pytest>=3.6 - python-dateutil>=2.5.0 - python=3 From 14634b114460f1ac427feb0d03cc70b53f0d8b0d Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Tue, 28 Aug 2018 14:44:41 +0200 Subject: [PATCH 5/5] test docstring --- pandas/tests/io/test_s3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py index 8aca974476312..a2c3d17f8754a 100644 --- a/pandas/tests/io/test_s3.py +++ b/pandas/tests/io/test_s3.py @@ -13,6 +13,8 @@ def test_is_s3_url(self): def test_streaming_s3_objects(): + # GH17135 + # botocore gained iteration support in 1.10.47, can now be used in read_* pytest.importorskip('botocore', minversion='1.10.47') from botocore.response import StreamingBody