-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
BUG: Categorical.copy deep kwarg #27024
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -457,11 +457,14 @@ def _formatter(self, boxed=False): | |
# Defer to CategoricalFormatter's formatter. | ||
return None | ||
|
||
def copy(self): | ||
def copy(self, deep: bool = False): | ||
""" | ||
Copy constructor. | ||
""" | ||
return self._constructor(values=self._codes.copy(), | ||
values = self._codes | ||
if deep: | ||
values = values.copy() | ||
return self._constructor(values=values, | ||
dtype=self.dtype, | ||
fastpath=True) | ||
|
||
|
@@ -483,7 +486,7 @@ def astype(self, dtype, copy=True): | |
if is_categorical_dtype(dtype): | ||
# GH 10696/18593 | ||
dtype = self.dtype.update_dtype(dtype) | ||
self = self.copy() if copy else self | ||
self = self.copy(deep=True) if copy else self | ||
if dtype == self.dtype: | ||
return self | ||
return self._set_dtype(dtype) | ||
|
@@ -578,7 +581,7 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes, | |
codes = _recode_for_categories(inferred_codes, cats, categories) | ||
elif not cats.is_monotonic_increasing: | ||
# Sort categories and recode for unknown categories. | ||
unsorted = cats.copy() | ||
unsorted = cats.copy(deep=True) | ||
categories = cats.sort_values() | ||
|
||
codes = _recode_for_categories(inferred_codes, unsorted, | ||
|
@@ -751,7 +754,7 @@ def set_ordered(self, value, inplace=False): | |
""" | ||
inplace = validate_bool_kwarg(inplace, 'inplace') | ||
new_dtype = CategoricalDtype(self.categories, ordered=value) | ||
cat = self if inplace else self.copy() | ||
cat = self if inplace else self.copy(deep=True) | ||
cat._dtype = new_dtype | ||
if not inplace: | ||
return cat | ||
|
@@ -849,7 +852,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, | |
ordered = self.dtype.ordered | ||
new_dtype = CategoricalDtype(new_categories, ordered=ordered) | ||
|
||
cat = self if inplace else self.copy() | ||
cat = self if inplace else self.copy(deep=True) | ||
if rename: | ||
if (cat.dtype.categories is not None and | ||
len(new_dtype.categories) < len(cat.dtype.categories)): | ||
|
@@ -937,7 +940,7 @@ def rename_categories(self, new_categories, inplace=False): | |
Categories (2, object): [A, B] | ||
""" | ||
inplace = validate_bool_kwarg(inplace, 'inplace') | ||
cat = self if inplace else self.copy() | ||
cat = self if inplace else self.copy(deep=True) | ||
|
||
if isinstance(new_categories, ABCSeries): | ||
msg = ("Treating Series 'new_categories' as a list-like and using " | ||
|
@@ -1045,7 +1048,7 @@ def add_categories(self, new_categories, inplace=False): | |
new_categories = list(self.dtype.categories) + list(new_categories) | ||
new_dtype = CategoricalDtype(new_categories, self.ordered) | ||
|
||
cat = self if inplace else self.copy() | ||
cat = self if inplace else self.copy(deep=True) | ||
cat._dtype = new_dtype | ||
cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories) | ||
if not inplace: | ||
|
@@ -1127,7 +1130,7 @@ def remove_unused_categories(self, inplace=False): | |
set_categories | ||
""" | ||
inplace = validate_bool_kwarg(inplace, 'inplace') | ||
cat = self if inplace else self.copy() | ||
cat = self if inplace else self.copy(deep=True) | ||
idx, inv = np.unique(cat._codes, return_inverse=True) | ||
|
||
if idx.size != 0 and idx[0] == -1: # na sentinel | ||
|
@@ -2295,7 +2298,7 @@ def unique(self): | |
|
||
# unlike np.unique, unique1d does not sort | ||
unique_codes = unique1d(self.codes) | ||
cat = self.copy() | ||
cat = self.copy() # Don't need deep here | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm...if you're going to state this, I might briefly explain why. |
||
|
||
# keep nan in codes | ||
cat._codes = unique_codes | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -720,7 +720,10 @@ def copy(self, deep=True): | |
""" copy constructor """ | ||
values = self.values | ||
if deep: | ||
values = values.copy() | ||
if self.is_extension: | ||
values = values.copy(deep=True) | ||
else: | ||
values = values.copy() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is a deep-copy needed here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gfyoung just pushed addressing most of your comments. for this one: because without deep=True, it isn't a "real" copy. i.e. |
||
return self.make_block_same_class(values, ndim=self.ndim) | ||
|
||
def replace(self, to_replace, value, inplace=False, filter=None, | ||
|
@@ -1855,7 +1858,7 @@ def where(self, other, cond, align=True, errors='raise', | |
dtype = self.dtype | ||
|
||
try: | ||
result = self.values.copy() | ||
result = self.values.copy(deep=True) | ||
icond = ~cond | ||
if lib.is_scalar(other): | ||
result[icond] = other | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -243,3 +243,28 @@ def _compare_other(self, s, data, op_name, other): | |
|
||
class TestParsing(base.BaseParsingTests): | ||
pass | ||
|
||
|
||
def test_copy_deep(data): | ||
assert data[0] != data[1] | ||
gfyoung marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
orig = data.copy(deep=True) | ||
other = data.copy(deep=True) | ||
|
||
# Modifying other will _not_ modify `data` | ||
other[0] = other[1] | ||
assert other[0] == other[1] | ||
assert data[0] != data[1] | ||
|
||
# Modifying other _will_ modify `data` | ||
other2 = data.copy(deep=False) | ||
other2[0] = other2[1] | ||
assert other2[0] == other2[1] | ||
assert data[0] == data[1] | ||
|
||
# Default behavior should be deep=False | ||
data = orig.copy(deep=True) | ||
other3 = data.copy() | ||
|
||
other3[0] = other3[1] | ||
assert data[0] == data[1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lots of tests bundled into one function. I would consider breaking this up into at least four different tests, especially since you "reset" every time by doing a copy of some kind of |
Uh oh!
There was an error while loading. Please reload this page.