From 0f754b02b8c36ff88ae4230c293c0bf6e9f7e88d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 17 Feb 2016 17:33:48 -0500 Subject: [PATCH 1/2] API: correctly provide __name__, __qualname__ for cum functions closes #12021 --- doc/source/whatsnew/v0.18.1.txt | 2 + pandas/core/generic.py | 71 ++++++++++++++++++--------------- pandas/tests/test_generic.py | 14 ++++++- 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index c14a0c0961a2d..87525e6edfba0 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -103,6 +103,7 @@ API changes - ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`) - ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`) +- Provide a proper ``__name__`` and ``__qualname__`` attributes for generic functions (:issue:`12021`) .. _whatsnew_0181.apply_resample: @@ -170,6 +171,7 @@ Performance Improvements +- Bug in ``__name__`` of ``.cum*`` functions (:issue:`12021`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 848ed7c3baa94..4ffa65deb0b3c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -20,7 +20,7 @@ import pandas.core.missing as mis import pandas.core.datetools as datetools from pandas import compat -from pandas.compat import map, zip, lrange, string_types, isidentifier +from pandas.compat import map, zip, lrange, string_types, isidentifier, PY3 from pandas.core.common import (isnull, notnull, is_list_like, _values_from_object, _maybe_promote, _maybe_box_datetimelike, ABCSeries, @@ -4976,11 +4976,11 @@ def _add_numeric_operations(cls): axis_descr, name, name2 = _doc_parms(cls) cls.any = _make_logical_function( - 'any', name, name2, axis_descr, + cls, 'any', name, name2, axis_descr, 'Return whether any element is True over requested axis', nanops.nanany) cls.all = _make_logical_function( - 'all', name, name2, axis_descr, + cls, 'all', name, name2, axis_descr, 'Return whether all elements are True over requested axis', nanops.nanall) @@ -5008,18 +5008,18 @@ def mad(self, axis=None, skipna=None, level=None): cls.mad = mad cls.sem = _make_stat_function_ddof( - 'sem', name, name2, axis_descr, + cls, 'sem', name, name2, axis_descr, "Return unbiased standard error of the mean over requested " "axis.\n\nNormalized by N-1 by default. This can be changed " "using the ddof argument", nanops.nansem) cls.var = _make_stat_function_ddof( - 'var', name, name2, axis_descr, + cls, 'var', name, name2, axis_descr, "Return unbiased variance over requested axis.\n\nNormalized by " "N-1 by default. This can be changed using the ddof argument", nanops.nanvar) cls.std = _make_stat_function_ddof( - 'std', name, name2, axis_descr, + cls, 'std', name, name2, axis_descr, "Return sample standard deviation over requested axis." "\n\nNormalized by N-1 by default. This can be changed using the " "ddof argument", @@ -5038,54 +5038,54 @@ def compound(self, axis=None, skipna=None, level=None): cls.compound = compound cls.cummin = _make_cum_function( - 'min', name, name2, axis_descr, "cumulative minimum", + cls, 'cummin', name, name2, axis_descr, "cumulative minimum", lambda y, axis: np.minimum.accumulate(y, axis), np.inf, np.nan) cls.cumsum = _make_cum_function( - 'sum', name, name2, axis_descr, "cumulative sum", + cls, 'cumsum', name, name2, axis_descr, "cumulative sum", lambda y, axis: y.cumsum(axis), 0., np.nan) cls.cumprod = _make_cum_function( - 'prod', name, name2, axis_descr, "cumulative product", + cls, 'cumprod', name, name2, axis_descr, "cumulative product", lambda y, axis: y.cumprod(axis), 1., np.nan) cls.cummax = _make_cum_function( - 'max', name, name2, axis_descr, "cumulative max", + cls, 'cummax', name, name2, axis_descr, "cumulative max", lambda y, axis: np.maximum.accumulate(y, axis), -np.inf, np.nan) cls.sum = _make_stat_function( - 'sum', name, name2, axis_descr, + cls, 'sum', name, name2, axis_descr, 'Return the sum of the values for the requested axis', nanops.nansum) cls.mean = _make_stat_function( - 'mean', name, name2, axis_descr, + cls, 'mean', name, name2, axis_descr, 'Return the mean of the values for the requested axis', nanops.nanmean) cls.skew = _make_stat_function( - 'skew', name, name2, axis_descr, + cls, 'skew', name, name2, axis_descr, 'Return unbiased skew over requested axis\nNormalized by N-1', nanops.nanskew) cls.kurt = _make_stat_function( - 'kurt', name, name2, axis_descr, + cls, 'kurt', name, name2, axis_descr, "Return unbiased kurtosis over requested axis using Fisher's " "definition of\nkurtosis (kurtosis of normal == 0.0). Normalized " "by N-1\n", nanops.nankurt) cls.kurtosis = cls.kurt cls.prod = _make_stat_function( - 'prod', name, name2, axis_descr, + cls, 'prod', name, name2, axis_descr, 'Return the product of the values for the requested axis', nanops.nanprod) cls.product = cls.prod cls.median = _make_stat_function( - 'median', name, name2, axis_descr, + cls, 'median', name, name2, axis_descr, 'Return the median of the values for the requested axis', nanops.nanmedian) cls.max = _make_stat_function( - 'max', name, name2, axis_descr, + cls, 'max', name, name2, axis_descr, """This method returns the maximum of the values in the object. If you want the *index* of the maximum, use ``idxmax``. This is the equivalent of the ``numpy.ndarray`` method ``argmax``.""", nanops.nanmax) cls.min = _make_stat_function( - 'min', name, name2, axis_descr, + cls, 'min', name, name2, axis_descr, """This method returns the minimum of the values in the object. If you want the *index* of the minimum, use ``idxmin``. This is the equivalent of the ``numpy.ndarray`` method ``argmin``.""", @@ -5105,7 +5105,7 @@ def nanptp(values, axis=0, skipna=True): return nmax - nmin cls.ptp = _make_stat_function( - 'ptp', name, name2, axis_descr, + cls, 'ptp', name, name2, axis_descr, """Returns the difference between the maximum value and the minimum value in the object. This is the equivalent of the ``numpy.ndarray`` method ``ptp``.""", @@ -5238,7 +5238,17 @@ def _doc_parms(cls): %(outname)s : %(name1)s\n""" -def _make_stat_function(name, name1, name2, axis_descr, desc, f): +def _set_function_name(f, name, cls): + f.__name__ = name + if PY3: + f.__qualname__ = '{klass}.{name}'.format( + klass=cls.__name__, + name=name) + f.__module__ = cls.__module__ + return f + + +def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @Appender(_num_doc) @@ -5255,11 +5265,10 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, return self._reduce(f, name, axis=axis, skipna=skipna, numeric_only=numeric_only) - stat_func.__name__ = name - return stat_func + return _set_function_name(stat_func, name, cls) -def _make_stat_function_ddof(name, name1, name2, axis_descr, desc, f): +def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @Appender(_num_ddof_doc) @@ -5276,17 +5285,16 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, return self._reduce(f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof) - stat_func.__name__ = name - return stat_func + return _set_function_name(stat_func, name, cls) -def _make_cum_function(name, name1, name2, axis_descr, desc, accum_func, +def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func, mask_a, mask_b): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @Appender("Return cumulative {0} over requested axis.".format(name) + _cnum_doc) - def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): + def cum_func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): validate_kwargs(name, kwargs, 'out', 'dtype') if axis is None: axis = self._stat_axis_number @@ -5312,11 +5320,10 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): d['copy'] = False return self._constructor(result, **d).__finalize__(self) - func.__name__ = name - return func + return _set_function_name(cum_func, name, cls) -def _make_logical_function(name, name1, name2, axis_descr, desc, f): +def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @Appender(_bool_doc) @@ -5337,8 +5344,8 @@ def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, numeric_only=bool_only, filter_type='bool', name=name) - logical_func.__name__ = name - return logical_func + return _set_function_name(logical_func, name, cls) + # install the indexes for _name, _indexer in indexing.get_indexers_list(): diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 68cc74e010781..71f2551e89ccf 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -15,7 +15,7 @@ import pandas.core.common as com import pandas.lib as lib -from pandas.compat import range, zip +from pandas.compat import range, zip, PY3 from pandas import compat from pandas.util.testing import (assertRaisesRegexp, assert_series_equal, @@ -549,6 +549,18 @@ def test_stat_unexpected_keyword(self): with assertRaisesRegexp(TypeError, 'unexpected keyword'): obj.any(epic=starwars) # logical_function + def test_api_compat(self): + + # GH 12021 + # compat for __name__, __qualname__ + + obj = self._construct(5) + for func in ['sum', 'cumsum', 'any', 'var']: + f = getattr(obj, func) + self.assertEqual(f.__name__, func) + if PY3: + self.assertTrue(f.__qualname__.endswith(func)) + class TestSeries(tm.TestCase, Generic): _typ = Series From 2630345f78fb57c1a3b283c88802b80e57e31aed Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 5 Apr 2016 08:31:57 -0400 Subject: [PATCH 2/2] CLN: remove pandas.util.misc CLN: move _fill_zeros -> missing.py CLN: rename missing.* methods w/o leading _ --- pandas/compat/__init__.py | 18 ++++++ pandas/core/common.py | 80 ------------------------- pandas/core/generic.py | 35 ++++------- pandas/core/internals.py | 32 +++++----- pandas/core/missing.py | 68 ++++++++++++++++++--- pandas/core/ops.py | 7 ++- pandas/core/panel.py | 3 +- pandas/indexes/base.py | 4 +- pandas/indexes/category.py | 4 +- pandas/indexes/multi.py | 5 +- pandas/io/tests/test_pickle.py | 5 +- pandas/tests/frame/test_constructors.py | 5 +- pandas/util/misc.py | 12 ---- 13 files changed, 125 insertions(+), 153 deletions(-) delete mode 100644 pandas/util/misc.py diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index a364a7ffa1c32..e9f8e9757cbae 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -242,6 +242,15 @@ def import_lzma(): import lzma return lzma + def set_function_name(f, name, cls): + """ Bind the name/qualname attributes of the function """ + f.__name__ = name + f.__qualname__ = '{klass}.{name}'.format( + klass=cls.__name__, + name=name) + f.__module__ = cls.__module__ + return f + else: string_types = basestring, integer_types = (int, long) @@ -284,6 +293,11 @@ def import_lzma(): from backports import lzma return lzma + def set_function_name(f, name, cls): + """ Bind the name attributes of the function """ + f.__name__ = name + return f + string_and_binary_types = string_types + (binary_type,) @@ -369,6 +383,10 @@ def __reduce__(self): # optional, for pickle support # https://github.com/pydata/pandas/pull/9123 +def is_platform_little_endian(): + """ am I little endian """ + return sys.byteorder == 'little' + def is_platform_windows(): return sys.platform == 'win32' or sys.platform == 'cygwin' diff --git a/pandas/core/common.py b/pandas/core/common.py index 6de6da4afedc8..4275870cb8543 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -5,7 +5,6 @@ import re import collections import numbers -import types from datetime import datetime, timedelta from functools import partial @@ -130,31 +129,6 @@ def __instancecheck__(cls, inst): ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) -def bind_method(cls, name, func): - """Bind a method to class, python 2 and python 3 compatible. - - Parameters - ---------- - - cls : type - class to receive bound method - name : basestring - name of method on class instance - func : function - function to be bound as method - - - Returns - ------- - None - """ - # only python 2 has bound/unbound method issue - if not compat.PY3: - setattr(cls, name, types.MethodType(func, None, cls)) - else: - setattr(cls, name, func) - - def isnull(obj): """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) @@ -1466,60 +1440,6 @@ def _lcd_dtypes(a_dtype, b_dtype): return np.object -def _fill_zeros(result, x, y, name, fill): - """ - if this is a reversed op, then flip x,y - - if we have an integer value (or array in y) - and we have 0's, fill them with the fill, - return the result - - mask the nan's from x - """ - if fill is None or is_float_dtype(result): - return result - - if name.startswith(('r', '__r')): - x, y = y, x - - is_typed_variable = (hasattr(y, 'dtype') or hasattr(y, 'type')) - is_scalar = lib.isscalar(y) - - if not is_typed_variable and not is_scalar: - return result - - if is_scalar: - y = np.array(y) - - if is_integer_dtype(y): - - if (y == 0).any(): - - # GH 7325, mask and nans must be broadcastable (also: PR 9308) - # Raveling and then reshaping makes np.putmask faster - mask = ((y == 0) & ~np.isnan(result)).ravel() - - shape = result.shape - result = result.astype('float64', copy=False).ravel() - - np.putmask(result, mask, fill) - - # if we have a fill of inf, then sign it correctly - # (GH 6178 and PR 9308) - if np.isinf(fill): - signs = np.sign(y if name.startswith(('r', '__r')) else x) - negative_inf_mask = (signs.ravel() < 0) & mask - np.putmask(result, negative_inf_mask, -fill) - - if "floordiv" in name: # (PR 9308) - nan_mask = ((y == 0) & (x == 0)).ravel() - np.putmask(result, nan_mask, np.nan) - - result = result.reshape(shape) - - return result - - def _consensus_name_attr(objs): name = objs[0].name for obj in objs[1:]: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4ffa65deb0b3c..d8ee85df58e11 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -17,10 +17,11 @@ from pandas.core.internals import BlockManager import pandas.core.algorithms as algos import pandas.core.common as com -import pandas.core.missing as mis +import pandas.core.missing as missing import pandas.core.datetools as datetools from pandas import compat -from pandas.compat import map, zip, lrange, string_types, isidentifier, PY3 +from pandas.compat import (map, zip, lrange, string_types, + isidentifier, set_function_name) from pandas.core.common import (isnull, notnull, is_list_like, _values_from_object, _maybe_promote, _maybe_box_datetimelike, ABCSeries, @@ -51,7 +52,7 @@ def _single_replace(self, to_replace, method, inplace, limit): orig_dtype = self.dtype result = self if inplace else self.copy() - fill_f = mis._get_fill_func(method) + fill_f = missing.get_fill_func(method) mask = com.mask_missing(result.values, to_replace) values = fill_f(result.values, limit=limit, mask=mask) @@ -2189,7 +2190,7 @@ def reindex(self, *args, **kwargs): # construct the args axes, kwargs = self._construct_axes_from_arguments(args, kwargs) - method = mis._clean_reindex_fill_method(kwargs.pop('method', None)) + method = missing.clean_reindex_fill_method(kwargs.pop('method', None)) level = kwargs.pop('level', None) copy = kwargs.pop('copy', True) limit = kwargs.pop('limit', None) @@ -2304,7 +2305,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, axis_name = self._get_axis_name(axis) axis_values = self._get_axis(axis_name) - method = mis._clean_reindex_fill_method(method) + method = missing.clean_reindex_fill_method(method) new_index, indexer = axis_values.reindex(labels, method, level, limit=limit) return self._reindex_with_indexers({axis: [new_index, indexer]}, @@ -3099,7 +3100,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if axis is None: axis = 0 axis = self._get_axis_number(axis) - method = mis._clean_fill_method(method) + method = missing.clean_fill_method(method) from pandas import DataFrame if value is None: @@ -3132,7 +3133,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, else: # 2d or less - method = mis._clean_fill_method(method) + method = missing.clean_fill_method(method) new_data = self._data.interpolate(method=method, axis=axis, limit=limit, inplace=inplace, coerce=True, @@ -4121,7 +4122,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None): from pandas import DataFrame, Series - method = mis._clean_fill_method(method) + method = missing.clean_fill_method(method) if broadcast_axis == 1 and self.ndim != other.ndim: if isinstance(self, Series): @@ -5238,16 +5239,6 @@ def _doc_parms(cls): %(outname)s : %(name1)s\n""" -def _set_function_name(f, name, cls): - f.__name__ = name - if PY3: - f.__qualname__ = '{klass}.{name}'.format( - klass=cls.__name__, - name=name) - f.__module__ = cls.__module__ - return f - - def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @@ -5265,7 +5256,7 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, return self._reduce(f, name, axis=axis, skipna=skipna, numeric_only=numeric_only) - return _set_function_name(stat_func, name, cls) + return set_function_name(stat_func, name, cls) def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): @@ -5285,7 +5276,7 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, return self._reduce(f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof) - return _set_function_name(stat_func, name, cls) + return set_function_name(stat_func, name, cls) def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func, @@ -5320,7 +5311,7 @@ def cum_func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs): d['copy'] = False return self._constructor(result, **d).__finalize__(self) - return _set_function_name(cum_func, name, cls) + return set_function_name(cum_func, name, cls) def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f): @@ -5344,7 +5335,7 @@ def logical_func(self, axis=None, bool_only=None, skipna=None, level=None, numeric_only=bool_only, filter_type='bool', name=name) - return _set_function_name(logical_func, name, cls) + return set_function_name(logical_func, name, cls) # install the indexes diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 613140e242239..c5353f6fef6dc 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -27,7 +27,7 @@ from pandas.core.categorical import Categorical, maybe_to_categorical from pandas.tseries.index import DatetimeIndex import pandas.core.common as com -import pandas.core.missing as mis +import pandas.core.missing as missing import pandas.core.convert as convert from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -872,7 +872,7 @@ def check_int_bool(self, inplace): # a fill na type method try: - m = mis._clean_fill_method(method) + m = missing.clean_fill_method(method) except: m = None @@ -887,7 +887,7 @@ def check_int_bool(self, inplace): downcast=downcast, mgr=mgr) # try an interp method try: - m = mis._clean_interp_method(method, **kwargs) + m = missing.clean_interp_method(method, **kwargs) except: m = None @@ -920,9 +920,9 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, values = self.values if inplace else self.values.copy() values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) - values = mis.interpolate_2d(values, method=method, axis=axis, - limit=limit, fill_value=fill_value, - dtype=self.dtype) + values = missing.interpolate_2d(values, method=method, axis=axis, + limit=limit, fill_value=fill_value, + dtype=self.dtype) values = self._try_coerce_result(values) blocks = [self.make_block(values, klass=self.__class__, fastpath=True)] @@ -955,11 +955,11 @@ def func(x): # process a 1-d slice, returning it # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to mis.interpolate_1d - return mis.interpolate_1d(index, x, method=method, limit=limit, - limit_direction=limit_direction, - fill_value=fill_value, - bounds_error=False, **kwargs) + # i.e. not an arg to missing.interpolate_1d + return missing.interpolate_1d(index, x, method=method, limit=limit, + limit_direction=limit_direction, + fill_value=fill_value, + bounds_error=False, **kwargs) # interp each column independently interp_values = np.apply_along_axis(func, axis, data) @@ -2414,8 +2414,8 @@ def make_block_same_class(self, values, placement, sparse_index=None, def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): - values = mis.interpolate_2d(self.values.to_dense(), method, axis, - limit, fill_value) + values = missing.interpolate_2d(self.values.to_dense(), method, axis, + limit, fill_value) return self.make_block_same_class(values=values, placement=self.mgr_locs) @@ -3851,8 +3851,10 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, # fill if needed if method is not None or limit is not None: - new_values = mis.interpolate_2d(new_values, method=method, - limit=limit, fill_value=fill_value) + new_values = missing.interpolate_2d(new_values, + method=method, + limit=limit, + fill_value=fill_value) if self._block.is_sparse: make_block = self._block.make_block_same_class diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 86640cffc136e..a8ca5e452c7ac 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -10,7 +10,7 @@ from pandas.compat import range -def _clean_fill_method(method, allow_nearest=False): +def clean_fill_method(method, allow_nearest=False): if method is None: return None method = method.lower() @@ -31,7 +31,7 @@ def _clean_fill_method(method, allow_nearest=False): return method -def _clean_interp_method(method, **kwargs): +def clean_interp_method(method, **kwargs): order = kwargs.get('order') valid = ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial', 'krogh', @@ -241,7 +241,7 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, else: # todo create faster fill func without masking mask = com.mask_missing(transf(values), fill_value) - method = _clean_fill_method(method) + method = clean_fill_method(method) if method == 'pad': values = transf(pad_2d( transf(values), limit=limit, mask=mask, dtype=dtype)) @@ -385,10 +385,64 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): _fill_methods = {'pad': pad_1d, 'backfill': backfill_1d} -def _get_fill_func(method): - method = _clean_fill_method(method) +def get_fill_func(method): + method = clean_fill_method(method) return _fill_methods[method] -def _clean_reindex_fill_method(method): - return _clean_fill_method(method, allow_nearest=True) +def clean_reindex_fill_method(method): + return clean_fill_method(method, allow_nearest=True) + + +def fill_zeros(result, x, y, name, fill): + """ + if this is a reversed op, then flip x,y + + if we have an integer value (or array in y) + and we have 0's, fill them with the fill, + return the result + + mask the nan's from x + """ + if fill is None or com.is_float_dtype(result): + return result + + if name.startswith(('r', '__r')): + x, y = y, x + + is_typed_variable = (hasattr(y, 'dtype') or hasattr(y, 'type')) + is_scalar = lib.isscalar(y) + + if not is_typed_variable and not is_scalar: + return result + + if is_scalar: + y = np.array(y) + + if com.is_integer_dtype(y): + + if (y == 0).any(): + + # GH 7325, mask and nans must be broadcastable (also: PR 9308) + # Raveling and then reshaping makes np.putmask faster + mask = ((y == 0) & ~np.isnan(result)).ravel() + + shape = result.shape + result = result.astype('float64', copy=False).ravel() + + np.putmask(result, mask, fill) + + # if we have a fill of inf, then sign it correctly + # (GH 6178 and PR 9308) + if np.isinf(fill): + signs = np.sign(y if name.startswith(('r', '__r')) else x) + negative_inf_mask = (signs.ravel() < 0) & mask + np.putmask(result, negative_inf_mask, -fill) + + if "floordiv" in name: # (PR 9308) + nan_mask = ((y == 0) & (x == 0)).ravel() + np.putmask(result, nan_mask, np.nan) + + result = result.reshape(shape) + + return result diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 01df9218c1936..11161d8a5d186 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -18,6 +18,7 @@ from pandas.lib import isscalar from pandas.tslib import iNaT from pandas.compat import bind_method +import pandas.core.missing as missing from pandas.core.common import (is_list_like, notnull, isnull, _values_from_object, _maybe_match_name, needs_i8_conversion, is_datetimelike_v_numeric, @@ -595,7 +596,7 @@ def na_op(x, y): result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) - result = com._fill_zeros(result, x, y, name, fill_zeros) + result = missing.fill_zeros(result, x, y, name, fill_zeros) return result def wrapper(left, right, name=name, na_op=na_op): @@ -1004,7 +1005,7 @@ def na_op(x, y): result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) - result = com._fill_zeros(result, x, y, name, fill_zeros) + result = missing.fill_zeros(result, x, y, name, fill_zeros) return result @@ -1207,7 +1208,7 @@ def na_op(x, y): result[mask] = op(x[mask], y) result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) - result = com._fill_zeros(result, x, y, name, fill_zeros) + result = missing.fill_zeros(result, x, y, name, fill_zeros) return result # work only for scalars diff --git a/pandas/core/panel.py b/pandas/core/panel.py index adfbd6646b048..f0f3803c62566 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -11,6 +11,7 @@ import pandas.computation.expressions as expressions import pandas.core.common as com import pandas.core.ops as ops +import pandas.core.missing as missing from pandas import compat from pandas import lib from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) @@ -1505,7 +1506,7 @@ def na_op(x, y): # handles discrepancy between numpy and numexpr on division/mod # by 0 though, given that these are generally (always?) # non-scalars, I'm not sure whether it's worth it at the moment - result = com._fill_zeros(result, x, y, name, fill_zeros) + result = missing.fill_zeros(result, x, y, name, fill_zeros) return result if name in _op_descriptions: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index e1bc843eb5d88..dedabd1126b09 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -18,7 +18,7 @@ from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate, deprecate_kwarg) import pandas.core.common as com -from pandas.core.missing import _clean_reindex_fill_method +import pandas.core.missing as missing from pandas.core.common import (isnull, array_equivalent, is_object_dtype, is_datetimetz, ABCSeries, ABCPeriodIndex, ABCMultiIndex, @@ -2034,7 +2034,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): positions matches the corresponding target values. Missing values in the target are marked by -1. """ - method = _clean_reindex_fill_method(method) + method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) if tolerance is not None: tolerance = self._convert_tolerance(tolerance) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 5844c69c57f0e..16b8fd8df4e2a 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -5,11 +5,11 @@ from pandas import compat from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg) -from pandas.core.missing import _clean_reindex_fill_method from pandas.core.config import get_option from pandas.indexes.base import Index, _index_shared_docs import pandas.core.base as base import pandas.core.common as com +import pandas.core.missing as missing import pandas.indexes.base as ibase @@ -415,7 +415,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): ------- (indexer, mask) : (ndarray, ndarray) """ - method = _clean_reindex_fill_method(method) + method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) if isinstance(target, CategoricalIndex): diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index de3a67ebc1abf..773852f986fe1 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -17,7 +17,7 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate, deprecate_kwarg) import pandas.core.common as com -from pandas.core.missing import _clean_reindex_fill_method +import pandas.core.missing as missing from pandas.core.common import (isnull, array_equivalent, is_object_dtype, _values_from_object, @@ -1334,8 +1334,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): ------- (indexer, mask) : (ndarray, ndarray) """ - method = _clean_reindex_fill_method(method) - + method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) target_index = target diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index e8218ca5950ba..2d3adce236b40 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -9,8 +9,7 @@ import pandas as pd from pandas import Index -from pandas.compat import u -from pandas.util.misc import is_little_endian +from pandas.compat import u, is_platform_little_endian import pandas import pandas.util.testing as tm from pandas.tseries.offsets import Day, MonthEnd @@ -97,7 +96,7 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): tm.assert_frame_equal(result, expected) def read_pickles(self, version): - if not is_little_endian(): + if not is_platform_little_endian(): raise nose.SkipTest("known failure on non-little endian") pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version))) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 861be35f6a2b4..4a7c5c3b79de8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -15,12 +15,11 @@ import numpy.ma.mrecords as mrecords from pandas.compat import (lmap, long, zip, range, lrange, lzip, - OrderedDict) + OrderedDict, is_platform_little_endian) from pandas import compat from pandas import (DataFrame, Index, Series, notnull, isnull, MultiIndex, Timedelta, Timestamp, date_range) -from pandas.util.misc import is_little_endian from pandas.core.common import PandasError import pandas as pd import pandas.core.common as com @@ -1835,7 +1834,7 @@ def test_from_records_with_datetimes(self): # this may fail on certain platforms because of a numpy issue # related GH6140 - if not is_little_endian(): + if not is_platform_little_endian(): raise nose.SkipTest("known failure of test on non-little endian") # construction with a null in a recarray diff --git a/pandas/util/misc.py b/pandas/util/misc.py deleted file mode 100644 index 2dd59043b5f63..0000000000000 --- a/pandas/util/misc.py +++ /dev/null @@ -1,12 +0,0 @@ -""" various miscellaneous utilities """ - - -def is_little_endian(): - """ am I little endian """ - import sys - return sys.byteorder == 'little' - - -def exclusive(*args): - count = sum([arg is not None for arg in args]) - return count == 1