pandas-dev · mroeschke · Mar 29, 2023 · Mar 25, 2023 · Mar 26, 2023 · Mar 29, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -38,6 +38,7 @@ Other enhancements
 - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
 - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
 - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
+- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -253,11 +253,11 @@ def _from_values_or_dtype(
         Examples
         --------
         >>> pd.CategoricalDtype._from_values_or_dtype()
-        CategoricalDtype(categories=None, ordered=None)
+        CategoricalDtype(categories=None, ordered=None, categories_dtype=None)
         >>> pd.CategoricalDtype._from_values_or_dtype(
         ...     categories=['a', 'b'], ordered=True
         ... )
-        CategoricalDtype(categories=['a', 'b'], ordered=True)
+        CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object)
         >>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True)
         >>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False)
         >>> c = pd.Categorical([0, 1], dtype=dtype1, fastpath=True)
@@ -272,7 +272,7 @@ def _from_values_or_dtype(
         The supplied dtype takes precedence over values' dtype:
 
         >>> pd.CategoricalDtype._from_values_or_dtype(c, dtype=dtype2)
-        CategoricalDtype(categories=['x', 'y'], ordered=False)
+        CategoricalDtype(categories=['x', 'y'], ordered=False, categories_dtype=object)
         """
 
         if dtype is not None:
@@ -429,13 +429,19 @@ def __eq__(self, other: Any) -> bool:
     def __repr__(self) -> str_type:
         if self.categories is None:
             data = "None"
+            dtype = "None"
         else:
             data = self.categories._format_data(name=type(self).__name__)
             if data is None:
                 # self.categories is RangeIndex
                 data = str(self.categories._range)
             data = data.rstrip(", ")
-        return f"CategoricalDtype(categories={data}, ordered={self.ordered})"
+            dtype = self.categories.dtype
+
+        return (
+            f"CategoricalDtype(categories={data}, ordered={self.ordered}, "
+            f"categories_dtype={dtype})"
+        )
 
     @cache_readonly
     def _hash_categories(self) -> int:

diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
@@ -181,7 +181,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
     ...         "ordered": True,
     ...     }
     ... )
-    CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)
+    CategoricalDtype(categories=['a', 'b', 'c'], ordered=True, categories_dtype=object)
 
     >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"})
     'datetime64[ns]'

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -211,7 +211,10 @@ def test_repr_range_categories(self):
         dtype = CategoricalDtype(categories=rng, ordered=False)
         result = repr(dtype)
 
-        expected = "CategoricalDtype(categories=range(0, 3), ordered=False)"
+        expected = (
+            "CategoricalDtype(categories=range(0, 3), ordered=False, "
+            "categories_dtype=int64)"
+        )
         assert result == expected
 
     def test_update_dtype(self):
@@ -220,6 +223,15 @@ def test_update_dtype(self):
         expected = CategoricalDtype(["b"], ordered=True)
         assert result == expected
 
+    def test_repr(self):
+        cat = Categorical(pd.Index([1, 2, 3], dtype="int32"))
+        result = cat.dtype.__repr__()
+        expected = (
+            "CategoricalDtype(categories=[1, 2, 3], ordered=False, "
+            "categories_dtype=int32)"
+        )
+        assert result == expected
+
 
 class TestDatetimeTZDtype(Base):
     @pytest.fixture
@@ -980,7 +992,10 @@ def test_str_vs_repr(self, ordered):
         c1 = CategoricalDtype(["a", "b"], ordered=ordered)
         assert str(c1) == "category"
         # Py2 will have unicode prefixes
-        pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)"
+        pat = (
+            r"CategoricalDtype\(categories=\[.*\], ordered={ordered}, "
+            r"categories_dtype=object\)"
+        )
         assert re.match(pat.format(ordered=ordered), repr(c1))
 
     def test_categorical_categories(self):

diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py
@@ -209,9 +209,10 @@ def test_index_equal_category_mismatch(check_categorical):
     msg = """Index are different
 
 Attribute "dtype" are different
-\\[left\\]:  CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\)
+\\[left\\]:  CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False, \
+categories_dtype=object\\)
 \\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], \
-ordered=False\\)"""
+ordered=False, categories_dtype=object\\)"""
 
     idx1 = Index(Categorical(["a", "b"]))
     idx2 = Index(Categorical(["a", "b"], categories=["a", "b", "c"]))

diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
@@ -250,9 +250,10 @@ def test_series_equal_categorical_mismatch(check_categorical):
     msg = """Attributes of Series are different
 
 Attribute "dtype" are different
-\\[left\\]:  CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\)
+\\[left\\]:  CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False, \
+categories_dtype=object\\)
 \\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], \
-ordered=False\\)"""
+ordered=False, categories_dtype=object\\)"""
 
     s1 = Series(Categorical(["a", "b"]))
     s2 = Series(Categorical(["a", "b"], categories=list("abc")))