From ce37450deca789e6d9ac4f457416971d258fd1a7 Mon Sep 17 00:00:00 2001 From: myenugula Date: Wed, 23 Apr 2025 14:32:44 +0800 Subject: [PATCH 1/4] BUG: Fix scatter plot colors in groupby context to match line plot behavior (#59846) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/plotting/_matplotlib/core.py | 4 +-- pandas/tests/plotting/test_groupby.py | 49 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 184ca581902ee..b9a95951cf49b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -763,6 +763,7 @@ Plotting - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) +- Bug in :meth:`DataFrameGroupBy.plot` with ``kind="scatter"`` where all groups used the same color instead of different colors for each group like line plots do (:issue:`59846`) - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 24aa848de1b4c..94eace2982a17 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1371,7 +1371,7 @@ def _make_plot(self, fig: Figure) -> None: # if a list of non-color strings is passed in as c, color points # by uniqueness of the strings, such same strings get same color create_colors = not self._are_valid_colors(c_values) - if create_colors: + if c_values is not None and create_colors: color_mapping = self._get_color_mapping(c_values) c_values = [color_mapping[s] for s in c_values] @@ -1422,7 +1422,7 @@ def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool): if c is not None and color is not None: raise TypeError("Specify exactly one of `c` and `color`") if c is None and color is None: - c_values = mpl.rcParams["patch.facecolor"] + c_values = None elif color is not None: c_values = color elif color_by_categorical: diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 0cb125d822fd1..e274610a1349e 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -152,3 +152,52 @@ def test_groupby_hist_series_with_legend_raises(self): with pytest.raises(ValueError, match="Cannot use both legend and label"): g.hist(legend=True, label="d") + + def test_groupby_scatter_colors_differ(self): + # GH 59846 - Test that scatter plots use different colors for different groups + # similar to how line plots do + from matplotlib.collections import PathCollection + import matplotlib.pyplot as plt + + # Create test data with distinct groups + df = DataFrame( + { + "x": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "y": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "group": ["A", "A", "A", "B", "B", "B", "C", "C", "C"], + } + ) + + # Set up a figure with both line and scatter plots + fig, (ax1, ax2) = plt.subplots(1, 2) + + # Plot line chart (known to use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax1, kind="line") + + # Plot scatter chart (should also use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax2, kind="scatter") + + # Get the colors used in the line plot and scatter plot + line_colors = [line.get_color() for line in ax1.get_lines()] + + # Get scatter colors + scatter_colors = [] + for collection in ax2.collections: + if isinstance(collection, PathCollection): # This is a scatter plot + # Get the face colors (might be array of RGBA values) + face_colors = collection.get_facecolor() + # If multiple points with same color, we get the first one + if face_colors.ndim > 1: + scatter_colors.append(tuple(face_colors[0])) + else: + scatter_colors.append(tuple(face_colors)) + + # Assert that we have the right number of colors (one per group) + assert len(line_colors) == 3 + assert len(scatter_colors) == 3 + + # Assert that the colors are all different + assert len(set(scatter_colors)) == 3 + assert len(line_colors) == 3 + + plt.close(fig) From c3d388f1d26a2cbe790d1cdf324f6ceedaf7d74c Mon Sep 17 00:00:00 2001 From: myenugula Date: Wed, 23 Apr 2025 14:49:53 +0800 Subject: [PATCH 2/4] BUG: Fix scatter plot colors in groupby context to match line plot behavior (#59846) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/plotting/_matplotlib/core.py | 4 +-- pandas/tests/plotting/test_groupby.py | 49 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f76d94036c6d8..2297b0db3dcc1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -798,6 +798,7 @@ Plotting - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) +- Bug in :meth:`DataFrameGroupBy.plot` with ``kind="scatter"`` where all groups used the same color instead of different colors for each group like line plots do (:issue:`59846`) - Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`61161`) - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a217ee8a86a16..8b8e39c28cb19 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1365,7 +1365,7 @@ def _make_plot(self, fig: Figure) -> None: # if a list of non-color strings is passed in as c, color points # by uniqueness of the strings, such same strings get same color create_colors = not self._are_valid_colors(c_values) - if create_colors: + if c_values is not None and create_colors: color_mapping = self._get_color_mapping(c_values) c_values = [color_mapping[s] for s in c_values] @@ -1416,7 +1416,7 @@ def _get_c_values(self, color, color_by_categorical: bool, c_is_column: bool): if c is not None and color is not None: raise TypeError("Specify exactly one of `c` and `color`") if c is None and color is None: - c_values = mpl.rcParams["patch.facecolor"] + c_values = None elif color is not None: c_values = color elif color_by_categorical: diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 0cb125d822fd1..e274610a1349e 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -152,3 +152,52 @@ def test_groupby_hist_series_with_legend_raises(self): with pytest.raises(ValueError, match="Cannot use both legend and label"): g.hist(legend=True, label="d") + + def test_groupby_scatter_colors_differ(self): + # GH 59846 - Test that scatter plots use different colors for different groups + # similar to how line plots do + from matplotlib.collections import PathCollection + import matplotlib.pyplot as plt + + # Create test data with distinct groups + df = DataFrame( + { + "x": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "y": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "group": ["A", "A", "A", "B", "B", "B", "C", "C", "C"], + } + ) + + # Set up a figure with both line and scatter plots + fig, (ax1, ax2) = plt.subplots(1, 2) + + # Plot line chart (known to use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax1, kind="line") + + # Plot scatter chart (should also use different colors for different groups) + df.groupby("group").plot(x="x", y="y", ax=ax2, kind="scatter") + + # Get the colors used in the line plot and scatter plot + line_colors = [line.get_color() for line in ax1.get_lines()] + + # Get scatter colors + scatter_colors = [] + for collection in ax2.collections: + if isinstance(collection, PathCollection): # This is a scatter plot + # Get the face colors (might be array of RGBA values) + face_colors = collection.get_facecolor() + # If multiple points with same color, we get the first one + if face_colors.ndim > 1: + scatter_colors.append(tuple(face_colors[0])) + else: + scatter_colors.append(tuple(face_colors)) + + # Assert that we have the right number of colors (one per group) + assert len(line_colors) == 3 + assert len(scatter_colors) == 3 + + # Assert that the colors are all different + assert len(set(scatter_colors)) == 3 + assert len(line_colors) == 3 + + plt.close(fig) From 526b77f8fc18d528b4bb7fde7353df7a7382c4e6 Mon Sep 17 00:00:00 2001 From: myenugula Date: Tue, 6 May 2025 12:35:28 +0700 Subject: [PATCH 3/4] Remove unnecessary comments in test_groupby_scatter_colors_differ --- pandas/tests/plotting/test_groupby.py | 31 ++++++--------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index e274610a1349e..c0aaca10ea264 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -1,5 +1,6 @@ """Test cases for GroupBy.plot""" +import matplotlib.pyplot as plt import numpy as np import pytest @@ -156,10 +157,6 @@ def test_groupby_hist_series_with_legend_raises(self): def test_groupby_scatter_colors_differ(self): # GH 59846 - Test that scatter plots use different colors for different groups # similar to how line plots do - from matplotlib.collections import PathCollection - import matplotlib.pyplot as plt - - # Create test data with distinct groups df = DataFrame( { "x": [1, 2, 3, 4, 5, 6, 7, 8, 9], @@ -168,36 +165,20 @@ def test_groupby_scatter_colors_differ(self): } ) - # Set up a figure with both line and scatter plots fig, (ax1, ax2) = plt.subplots(1, 2) - - # Plot line chart (known to use different colors for different groups) df.groupby("group").plot(x="x", y="y", ax=ax1, kind="line") - - # Plot scatter chart (should also use different colors for different groups) df.groupby("group").plot(x="x", y="y", ax=ax2, kind="scatter") - # Get the colors used in the line plot and scatter plot line_colors = [line.get_color() for line in ax1.get_lines()] + scatter_colors = [ + tuple(tuple(fc) for fc in scatter.get_facecolor()) + for scatter in ax2.collections + ] - # Get scatter colors - scatter_colors = [] - for collection in ax2.collections: - if isinstance(collection, PathCollection): # This is a scatter plot - # Get the face colors (might be array of RGBA values) - face_colors = collection.get_facecolor() - # If multiple points with same color, we get the first one - if face_colors.ndim > 1: - scatter_colors.append(tuple(face_colors[0])) - else: - scatter_colors.append(tuple(face_colors)) - - # Assert that we have the right number of colors (one per group) assert len(line_colors) == 3 assert len(scatter_colors) == 3 - # Assert that the colors are all different + assert len(set(line_colors)) == 3 assert len(set(scatter_colors)) == 3 - assert len(line_colors) == 3 plt.close(fig) From 8d273f0deb9b33b5adc818b6618f1f2a7c2eba93 Mon Sep 17 00:00:00 2001 From: myenugula Date: Tue, 6 May 2025 14:38:36 +0700 Subject: [PATCH 4/4] Fix dependency issues in CI workflows --- pandas/tests/plotting/test_groupby.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index c0aaca10ea264..06327bcad7aa1 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -1,6 +1,5 @@ """Test cases for GroupBy.plot""" -import matplotlib.pyplot as plt import numpy as np import pytest @@ -157,6 +156,8 @@ def test_groupby_hist_series_with_legend_raises(self): def test_groupby_scatter_colors_differ(self): # GH 59846 - Test that scatter plots use different colors for different groups # similar to how line plots do + import matplotlib.pyplot as plt + df = DataFrame( { "x": [1, 2, 3, 4, 5, 6, 7, 8, 9],