-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Support dicts with default values in series.map #16002
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
e8f9d27
96d12a6
961ea46
d73cee8
2a2bab7
4f3dc6b
24e1478
11f5769
ddb0480
1f56c81
79cfd11
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2079,8 +2079,8 @@ def map(self, arg, na_action=None): | |
two bar | ||
three baz | ||
|
||
Mapping a dictionary keys on the index labels works similar as | ||
with a `Series`: | ||
If `arg` is a dictionary, return a new Series with values converted | ||
according to the dictionary's mapping: | ||
|
||
>>> z = {1: 'A', 2: 'B', 3: 'C'} | ||
|
||
|
@@ -2089,21 +2089,33 @@ def map(self, arg, na_action=None): | |
two B | ||
three C | ||
|
||
Values in Series that are not in the dictionary (as keys) are converted | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. put this in |
||
to ``NaN``. However, if the dictionary is a ``dict`` subclass that | ||
defines ``__missing__`` (i.e. provides a method for default values), | ||
then this default is used rather than ``NaN``: | ||
|
||
>>> from collections import Counter | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. put this example at the end |
||
>>> counter = Counter() | ||
>>> counter['bar'] += 1 | ||
>>> y.map(counter) | ||
1 0 | ||
2 1 | ||
3 0 | ||
dtype: int64 | ||
|
||
Use na_action to control whether NA values are affected by the mapping | ||
function. | ||
|
||
>>> s = pd.Series([1, 2, 3, np.nan]) | ||
|
||
>>> s2 = s.map(lambda x: 'this is a string {}'.format(x), | ||
na_action=None) | ||
>>> s2 = s.map('this is a string {}'.format, na_action=None) | ||
0 this is a string 1.0 | ||
1 this is a string 2.0 | ||
2 this is a string 3.0 | ||
3 this is a string nan | ||
dtype: object | ||
|
||
>>> s3 = s.map(lambda x: 'this is a string {}'.format(x), | ||
na_action='ignore') | ||
>>> s3 = s.map('this is a string {}'.format, na_action='ignore') | ||
0 this is a string 1.0 | ||
1 this is a string 2.0 | ||
2 this is a string 3.0 | ||
|
@@ -2132,13 +2144,23 @@ def map_f(values, f): | |
else: | ||
map_f = lib.map_infer | ||
|
||
if isinstance(arg, (dict, Series)): | ||
if isinstance(arg, dict): | ||
if isinstance(arg, dict): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a 1-line comment here on what you are doing |
||
if hasattr(arg, '__missing__'): | ||
# If a dictionary subclass defines a default value method, | ||
# convert arg to a lookup function (GH #15999). | ||
dict_with_default = arg | ||
arg = lambda x: dict_with_default[x] | ||
else: | ||
# Dictionary does not have a default. Thus it's safe to | ||
# convert to an indexed series for efficiency. | ||
arg = self._constructor(arg, index=arg.keys()) | ||
|
||
if isinstance(arg, Series): | ||
# arg is a Series | ||
indexer = arg.index.get_indexer(values) | ||
new_values = algorithms.take_1d(arg._values, indexer) | ||
else: | ||
# arg is a function | ||
new_values = map_f(values, arg) | ||
|
||
return self._constructor(new_values, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# coding=utf-8 | ||
# pylint: disable-msg=E1101,W0612 | ||
|
||
from collections import OrderedDict | ||
from collections import Counter, defaultdict, OrderedDict | ||
import numpy as np | ||
import pandas as pd | ||
|
||
|
@@ -411,6 +411,42 @@ def test_map_dict_with_tuple_keys(self): | |
tm.assert_series_equal(df['labels'], df['expected_labels'], | ||
check_names=False) | ||
|
||
def test_map_counter(self): | ||
s = Series(['a', 'b', 'c'], index=[1, 2, 3]) | ||
counter = Counter() | ||
counter['b'] = 5 | ||
counter['c'] += 1 | ||
result = s.map(counter) | ||
expected = Series([0, 5, 1], index=[1, 2, 3]) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_defaultdict(self): | ||
s = Series([1, 2, 3], index=['a', 'b', 'c']) | ||
default_dict = defaultdict(lambda: 'blank') | ||
default_dict[1] = 'stuff' | ||
result = s.map(default_dict) | ||
expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c']) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_dict_subclass_with_missing(self): | ||
class DictWithMissing(dict): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the issue number here as a comment |
||
def __missing__(self, key): | ||
return 'missing' | ||
s = Series([1, 2, 3]) | ||
dictionary = DictWithMissing({3: 'three'}) | ||
result = s.map(dictionary) | ||
expected = Series(['missing', 'missing', 'three']) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_dict_subclass_without_missing(self): | ||
class DictWithoutMissing(dict): | ||
pass | ||
s = Series([1, 2, 3]) | ||
dictionary = DictWithoutMissing({3: 'three'}) | ||
result = s.map(dictionary) | ||
expected = Series([np.nan, np.nan, 'three']) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_box(self): | ||
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] | ||
s = pd.Series(vals) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jreback let me know if you want me to revert the deletion of these blank lines.