diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a4d3e7058d7de..5a4a4fa114fa9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -118,6 +118,7 @@ to_arrays, ) from pandas.core.ops.missing import dispatch_fill_zeros +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.io.formats import console, format as fmt @@ -6991,8 +6992,6 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=None): if (self.columns.get_indexer(other.columns) >= 0).all(): other = other.reindex(columns=self.columns) - from pandas.core.reshape.concat import concat - if isinstance(other, (list, tuple)): to_concat = [self] + other else: @@ -7130,7 +7129,6 @@ def _join_compat( self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False ): from pandas.core.reshape.merge import merge - from pandas.core.reshape.concat import concat if isinstance(other, Series): if other.name is None: @@ -7291,7 +7289,6 @@ def round(self, decimals=0, *args, **kwargs): 2 0.7 0.0 3 0.2 0.0 """ - from pandas.core.reshape.concat import concat def _dict_round(df, decimals): for col, vals in df.items(): @@ -8297,8 +8294,6 @@ def isin(self, values): dog False False """ if isinstance(values, dict): - from pandas.core.reshape.concat import concat - values = collections.defaultdict(list, values) return concat( ( diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c766fcaa4f849..2396a53374bae 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -67,6 +67,7 @@ from pandas.core.index import Index, MultiIndex, _all_indexes_same import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.plotting import boxplot_frame_groupby @@ -275,8 +276,6 @@ def aggregate(self, func=None, *args, **kwargs): # _level handled at higher if not _level and isinstance(ret, dict): - from pandas import concat - ret = concat(ret, axis=1) return ret @@ -443,8 +442,6 @@ def transform(self, func, *args, **kwargs): # check for empty "results" to avoid concat ValueError if results: - from pandas.core.reshape.concat import concat - result = concat(results).sort_index() else: result = Series() @@ -1221,8 +1218,6 @@ def first_not_none(values): # still a series # path added as of GH 5545 elif all_indexed_same: - from pandas.core.reshape.concat import concat - return concat(values) if not all_indexed_same: @@ -1257,8 +1252,6 @@ def first_not_none(values): else: # GH5788 instead of stacking; concat gets the # dtypes correct - from pandas.core.reshape.concat import concat - result = concat( values, keys=key_index, @@ -1303,8 +1296,6 @@ def first_not_none(values): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) def _transform_general(self, func, *args, **kwargs): - from pandas.core.reshape.concat import concat - applied = [] obj = self._obj_with_exclusions gen = self.grouper.get_iterator(obj, axis=self.axis) @@ -1653,8 +1644,6 @@ def _iterate_column_groupbys(self): ) def _apply_to_column_groupbys(self, func): - from pandas.core.reshape.concat import concat - return concat( (func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()), keys=self._selected_obj.columns, @@ -1741,8 +1730,6 @@ def groupby_series(obj, col=None): if isinstance(obj, Series): results = groupby_series(obj) else: - from pandas.core.reshape.concat import concat - results = [groupby_series(obj[col], col) for col in obj.columns] results = concat(results, axis=1) results.columns.names = obj.columns.names diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7d1c74e415658..f9deea496f7b7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -51,6 +51,7 @@ class providing the base-class of operations. from pandas.core.generic import NDFrame from pandas.core.groupby import base from pandas.core.index import CategoricalIndex, Index, MultiIndex +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.core.sorting import get_group_index_sorter @@ -922,8 +923,6 @@ def _python_agg_general(self, func, *args, **kwargs): return self._wrap_aggregated_output(output) def _concat_objects(self, keys, values, not_indexed_same=False): - from pandas.core.reshape.concat import concat - def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing @@ -1852,7 +1851,6 @@ def quantile(self, q=0.5, interpolation="linear"): a 2.0 b 3.0 """ - from pandas import concat def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: if is_object_dtype(vals): diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 60bab817705e3..25768c481ce51 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -6,14 +6,16 @@ import numpy as np -from pandas import DataFrame, Index, MultiIndex, Series +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + from pandas.core import common as com from pandas.core.arrays.categorical import ( _factorize_from_iterable, _factorize_from_iterables, ) -from pandas.core.generic import NDFrame from pandas.core.index import ( + Index, + MultiIndex, _all_indexes_same, _get_consensus_names, _get_objs_combined_axis, @@ -275,7 +277,7 @@ def __init__( copy=True, sort=False, ): - if isinstance(objs, (NDFrame, str)): + if isinstance(objs, (ABCDataFrame, ABCSeries, str)): raise TypeError( "first argument must be an iterable of pandas " "objects, you passed an object of type " @@ -322,7 +324,7 @@ def __init__( # consolidate data & figure out what our result ndim is going to be ndims = set() for obj in objs: - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): msg = ( "cannot concatenate object of type '{}';" " only Series and DataFrame objs are valid".format(type(obj)) @@ -348,7 +350,7 @@ def __init__( # filter out the empties if we have not multi-index possibilities # note to keep empty Series as it affect to result columns / name non_empties = [ - obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series) + obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries) ] if len(non_empties) and ( @@ -362,17 +364,26 @@ def __init__( self.objs = objs # Standardize axis parameter to int - if isinstance(sample, Series): + # TODO: Should this really require a class import? + """ + if isinstance(sample, ABCSeries): axis = DataFrame._get_axis_number(axis) else: axis = sample._get_axis_number(axis) + """ + # TODO: implement universal axis validation; align with core.generic + if not isinstance(axis, int): + try: + axis = {"index": 0, "rows": 0, "columns": 1}[axis] + except KeyError: + raise ValueError("No axis named {}".format(axis)) # Need to flip BlockManager axis in the DataFrame special case - self._is_frame = isinstance(sample, DataFrame) + self._is_frame = isinstance(sample, ABCDataFrame) if self._is_frame: axis = 1 if axis == 0 else 0 - self._is_series = isinstance(sample, Series) + self._is_series = isinstance(sample, ABCSeries) if not 0 <= axis <= sample.ndim: raise AssertionError( "axis must be between 0 and {ndim}, input was" @@ -545,7 +556,7 @@ def _get_concat_axis(self): num = 0 has_names = False for i, x in enumerate(self.objs): - if not isinstance(x, Series): + if not isinstance(x, ABCSeries): raise TypeError( "Cannot concatenate type 'Series' " "with object of type {type!r}".format(type=type(x).__name__) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 7e593ddb91d3a..63025c45ecf44 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -44,6 +44,7 @@ import pandas.core.common as com from pandas.core.frame import _merge_doc from pandas.core.internals import _transform_index, concatenate_block_managers +from pandas.core.reshape.concat import concat import pandas.core.sorting as sorting from pandas.core.sorting import is_int64_overflow_possible @@ -156,8 +157,6 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces, check_duplicates=True # preserve the original order # if we have a missing piece this can be reset - from pandas.core.reshape.concat import concat - result = concat(pieces, ignore_index=True) result = result.reindex(columns=pieces[0].columns, copy=False) return result, lby diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index ad7081fb17703..17f4da1f77f11 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -26,6 +26,7 @@ from pandas.core.construction import extract_array from pandas.core.frame import DataFrame from pandas.core.index import Index, MultiIndex +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.core.sorting import ( compress_group_index, @@ -469,8 +470,6 @@ def _unstack_extension_series(series, level, fill_value): # 2. Followup with a columnwise take. # We use the dummy take to discover newly-created missing values # introduced by the reshape. - from pandas.core.reshape.concat import concat - dummy_arr = np.arange(len(series)) # fill_value=-1, since we will do a series.values.take later result = _Unstacker( @@ -855,8 +854,6 @@ def get_dummies( 1 0.0 1.0 0.0 2 0.0 0.0 1.0 """ - from pandas.core.reshape.concat import concat - dtypes_to_encode = ["object", "category"] if isinstance(data, DataFrame): @@ -946,8 +943,6 @@ def _get_dummies_1d( drop_first=False, dtype=None, ): - from pandas.core.reshape.concat import concat - # Series avoids inconsistent NaN handling codes, levels = _factorize_from_iterable(Series(data)) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3e9d3d5c04559..02dcb56fadc57 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -75,6 +75,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager +from pandas.core.reshape.concat import concat from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -2705,8 +2706,6 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): ... ValueError: Indexes have overlapping values: [0, 1, 2] """ - from pandas.core.reshape.concat import concat - if isinstance(to_append, (list, tuple)): to_concat = [self] to_concat.extend(to_append) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 0f2920b3558c9..4e2fe56e74483 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -11,6 +11,7 @@ from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin from pandas.core.index import MultiIndex +from pandas.core.reshape.concat import concat _shared_docs = dict(**_shared_docs) _doc_template = """ @@ -128,8 +129,6 @@ def dataframe_from_int_dict(data, frame_template): *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) ) - from pandas import concat - result_index = arg1.index.union(arg2.index) if len(result_index): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6ddba6a297bdc..38e6025d942c5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -71,6 +71,7 @@ from pandas.core.index import Index, ensure_index from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.reshape.concat import concat from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing @@ -260,8 +261,6 @@ def __init__( self._chk_truncate() def _chk_truncate(self) -> None: - from pandas.core.reshape.concat import concat - min_rows = self.min_rows max_rows = self.max_rows # truncation determined by max_rows, actual truncated number of rows @@ -609,8 +608,6 @@ def _chk_truncate(self) -> None: Checks whether the frame should be truncated. If so, slices the frame up. """ - from pandas.core.reshape.concat import concat - # Cut the data to the information actually printed max_cols = self.max_cols max_rows = self.max_rows