-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
ENH: Add defaultdict support for dtype in read_csv #46051
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
1f7534c
ac687e7
a1bdbc7
829f7b0
019fc52
759bed4
25fdc85
cdf7f2f
07060de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
Tests dtype specification during parsing | ||
for all of the parsers defined in parsers.py | ||
""" | ||
from collections import defaultdict | ||
from io import StringIO | ||
|
||
import numpy as np | ||
|
@@ -335,3 +336,40 @@ def test_nullable_int_dtype(all_parsers, any_int_ea_dtype): | |
) | ||
actual = parser.read_csv(StringIO(data), dtype=dtype) | ||
tm.assert_frame_equal(actual, expected) | ||
|
||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@pytest.mark.parametrize("default", ["float", "float64"]) | ||
def test_dtypes_defaultdict(all_parsers, default): | ||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# GH#41574 | ||
data = """a,b | ||
1,2 | ||
""" | ||
dtype = defaultdict(lambda: default, a="int64") | ||
parser = all_parsers | ||
result = parser.read_csv(StringIO(data), dtype=dtype) | ||
expected = DataFrame({"a": [1], "b": 2.0}) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_dtypes_defaultdict_mangle_dup_cols(all_parsers): | ||
# GH#41574 | ||
data = """a,b,a,b,b.1 | ||
1,2,3,4,5 | ||
""" | ||
dtype = defaultdict(lambda: "float64", a="int64") | ||
dtype["b.1"] = "int64" | ||
parser = all_parsers | ||
result = parser.read_csv(StringIO(data), dtype=dtype) | ||
expected = DataFrame({"a": [1], "b": 2.0, "a.1": [3], "b.2": [4.0], "b.1": [5]}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you miss the [] on "b", looks different but dont think there is a reason There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Forgot it yes, but does not really make a difference. Added nevertheless to be more consistent |
||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_dtypes_defaultdict_invalid(all_parsers): | ||
# GH#41574 | ||
data = """a,b | ||
1,2 | ||
""" | ||
dtype = defaultdict(lambda: "invalid_dtype", a="int64") | ||
parser = all_parsers | ||
with pytest.raises(TypeError, match="not understood"): | ||
parser.read_csv(StringIO(data), dtype=dtype) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could also support defaultdicts directly instead of converting here. but this simplifies adding support for converters to and also we do not have to keep defaultdicts in mind every time we casses the dtypes