Skip to content

Commit 5ff0641

Browse files
committed
feat: (Series|DataFrame).plot
1 parent 276f228 commit 5ff0641

File tree

3 files changed

+214
-29
lines changed

3 files changed

+214
-29
lines changed

Diff for: bigframes/operations/plotting.py

+28-29
Original file line numberDiff line numberDiff line change
@@ -23,31 +23,45 @@
2323
class PlotAccessor:
2424
__doc__ = vendordt.PlotAccessor.__doc__
2525

26+
_common_kinds = ("line", "area", "hist")
27+
_dataframe_kinds = ("scatter",)
28+
_all_kinds = _common_kinds + _dataframe_kinds
29+
30+
def __call__(self, **kwargs):
31+
import bigframes.series as series
32+
33+
if kwargs.pop("backend", None) is not None:
34+
raise NotImplementedError(
35+
f"Only support matplotlib backend for now. {constants.FEEDBACK_LINK}"
36+
)
37+
38+
kind = kwargs.pop("kind", "line")
39+
if kind not in self._all_kinds:
40+
raise NotImplementedError(
41+
f"{kind} is not a valid plot kind supported for now. {constants.FEEDBACK_LINK}"
42+
)
43+
44+
data = self._parent.copy()
45+
if kind in self._dataframe_kinds and isinstance(data, series.Series):
46+
raise ValueError(f"plot kind {kind} can only be used for data frames")
47+
48+
return bfplt.plot(data, kind=kind, **kwargs)
49+
2650
def __init__(self, data) -> None:
2751
self._parent = data
2852

2953
def hist(
3054
self, by: typing.Optional[typing.Sequence[str]] = None, bins: int = 10, **kwargs
3155
):
32-
if kwargs.pop("backend", None) is not None:
33-
raise NotImplementedError(
34-
f"Only support matplotlib backend for now. {constants.FEEDBACK_LINK}"
35-
)
36-
return bfplt.plot(self._parent.copy(), kind="hist", by=by, bins=bins, **kwargs)
56+
return self(kind="hist", by=by, bins=bins, **kwargs)
3757

3858
def line(
3959
self,
4060
x: typing.Optional[typing.Hashable] = None,
4161
y: typing.Optional[typing.Hashable] = None,
4262
**kwargs,
4363
):
44-
return bfplt.plot(
45-
self._parent,
46-
kind="line",
47-
x=x,
48-
y=y,
49-
**kwargs,
50-
)
64+
return self(kind="line", x=x, y=y, **kwargs)
5165

5266
def area(
5367
self,
@@ -56,14 +70,7 @@ def area(
5670
stacked: bool = True,
5771
**kwargs,
5872
):
59-
return bfplt.plot(
60-
self._parent.copy(),
61-
kind="area",
62-
x=x,
63-
y=y,
64-
stacked=stacked,
65-
**kwargs,
66-
)
73+
return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)
6774

6875
def scatter(
6976
self,
@@ -73,12 +80,4 @@ def scatter(
7380
c: typing.Union[typing.Hashable, typing.Sequence[typing.Hashable]] = None,
7481
**kwargs,
7582
):
76-
return bfplt.plot(
77-
self._parent.copy(),
78-
kind="scatter",
79-
x=x,
80-
y=y,
81-
s=s,
82-
c=c,
83-
**kwargs,
84-
)
83+
return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)

Diff for: tests/system/small/operations/test_plotting.py

+28
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,31 @@ def test_sampling_plot_args_random_state():
233233
msg = "numpy array are different"
234234
with pytest.raises(AssertionError, match=msg):
235235
tm.assert_almost_equal(ax_0.lines[0].get_data()[1], ax_2.lines[0].get_data()[1])
236+
237+
238+
@pytest.mark.parametrize(
239+
("kind", "col_names", "kwargs"),
240+
[
241+
pytest.param("hist", ["int64_col", "int64_too"], {}),
242+
pytest.param("line", ["int64_col", "int64_too"], {}),
243+
pytest.param("area", ["int64_col", "int64_too"], {"stacked": False}),
244+
pytest.param(
245+
"scatter", ["int64_col", "int64_too"], {"x": "int64_col", "y": "int64_too"}
246+
),
247+
pytest.param(
248+
"scatter",
249+
["int64_col"],
250+
{},
251+
marks=pytest.mark.xfail(raises=ValueError),
252+
),
253+
pytest.param(
254+
"uknown",
255+
["int64_col", "int64_too"],
256+
{},
257+
marks=pytest.mark.xfail(raises=NotImplementedError),
258+
),
259+
],
260+
)
261+
def test_plot_call(scalars_dfs, kind, col_names, kwargs):
262+
scalars_df, _ = scalars_dfs
263+
scalars_df[col_names].plot(kind=kind, **kwargs)

Diff for: third_party/bigframes_vendored/pandas/plotting/_core.py

+158
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,166 @@
66
class PlotAccessor:
77
"""
88
Make plots of Series or DataFrame with the `matplotlib` backend.
9+
10+
Parameters
11+
----------
12+
data : Series or DataFrame
13+
The object for which the method is called.
14+
kind : str
15+
The kind of plot to produce:
16+
17+
- 'line' : line plot (default)
18+
- 'hist' : histogram
19+
- 'area' : area plot
20+
- 'scatter' : scatter plot (DataFrame only)
21+
ax : matplotlib axes object, default None
22+
An axes of the current figure.
23+
subplots : bool or sequence of iterables, default False
24+
Whether to group columns into subplots:
25+
26+
- ``False`` : No subplots will be used
27+
- ``True`` : Make separate subplots for each column.
28+
- sequence of iterables of column labels: Create a subplot for each
29+
group of columns. For example `[('a', 'c'), ('b', 'd')]` will
30+
create 2 subplots: one with columns 'a' and 'c', and one
31+
with columns 'b' and 'd'. Remaining columns that aren't specified
32+
will be plotted in additional subplots (one per column).
33+
34+
.. versionadded:: 1.5.0
35+
36+
sharex : bool, default True if ax is None else False
37+
In case ``subplots=True``, share x axis and set some x axis labels
38+
to invisible; defaults to True if ax is None otherwise False if
39+
an ax is passed in; Be aware, that passing in both an ax and
40+
``sharex=True`` will alter all x axis labels for all axis in a figure.
41+
sharey : bool, default False
42+
In case ``subplots=True``, share y axis and set some y axis labels to invisible.
43+
layout : tuple, optional
44+
(rows, columns) for the layout of subplots.
45+
figsize : a tuple (width, height) in inches
46+
Size of a figure object.
47+
use_index : bool, default True
48+
Use index as ticks for x axis.
49+
title : str or list
50+
Title to use for the plot. If a string is passed, print the string
51+
at the top of the figure. If a list is passed and `subplots` is
52+
True, print each item in the list above the corresponding subplot.
53+
grid : bool, default None (matlab style default)
54+
Axis grid lines.
55+
legend : bool or {'reverse'}
56+
Place legend on axis subplots.
57+
style : list or dict
58+
The matplotlib line style per column.
59+
logx : bool or 'sym', default False
60+
Use log scaling or symlog scaling on x axis.
61+
62+
logy : bool or 'sym' default False
63+
Use log scaling or symlog scaling on y axis.
64+
65+
loglog : bool or 'sym', default False
66+
Use log scaling or symlog scaling on both x and y axes.
67+
68+
xticks : sequence
69+
Values to use for the xticks.
70+
yticks : sequence
71+
Values to use for the yticks.
72+
xlim : 2-tuple/list
73+
Set the x limits of the current axes.
74+
ylim : 2-tuple/list
75+
Set the y limits of the current axes.
76+
xlabel : label, optional
77+
Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the
78+
x-column name for planar plots.
79+
80+
.. versionchanged:: 1.2.0
81+
82+
Now applicable to planar plots (`scatter`, `hexbin`).
83+
84+
.. versionchanged:: 2.0.0
85+
86+
Now applicable to histograms.
87+
88+
ylabel : label, optional
89+
Name to use for the ylabel on y-axis. Default will show no ylabel, or the
90+
y-column name for planar plots.
91+
92+
.. versionchanged:: 1.2.0
93+
94+
Now applicable to planar plots (`scatter`, `hexbin`).
95+
96+
.. versionchanged:: 2.0.0
97+
98+
Now applicable to histograms.
99+
100+
rot : float, default None
101+
Rotation for ticks (xticks for vertical, yticks for horizontal
102+
plots).
103+
fontsize : float, default None
104+
Font size for xticks and yticks.
105+
colormap : str or matplotlib colormap object, default None
106+
Colormap to select colors from. If string, load colormap with that
107+
name from matplotlib.
108+
colorbar : bool, optional
109+
If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
110+
plots).
111+
position : float
112+
Specify relative alignments for bar plot layout.
113+
From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
114+
(center).
115+
table : bool, Series or DataFrame, default False
116+
If True, draw a table using the data in the DataFrame and the data
117+
will be transposed to meet matplotlib's default layout.
118+
If a Series or DataFrame is passed, use passed data to draw a
119+
table.
120+
yerr : DataFrame, Series, array-like, dict and str
121+
See :ref:`Plotting with Error Bars <visualization.errorbars>` for
122+
detail.
123+
xerr : DataFrame, Series, array-like, dict and str
124+
Equivalent to yerr.
125+
stacked : bool, default False in line and bar plots, and True in area plot
126+
If True, create stacked plot.
127+
secondary_y : bool or sequence, default False
128+
Whether to plot on the secondary y-axis if a list/tuple, which
129+
columns to plot on secondary y-axis.
130+
mark_right : bool, default True
131+
When using a secondary_y axis, automatically mark the column
132+
labels with "(right)" in the legend.
133+
include_bool : bool, default is False
134+
If True, boolean values can be plotted.
135+
**kwargs
136+
Options to pass to matplotlib plotting method.
137+
138+
Returns
139+
-------
140+
:class:`matplotlib.axes.Axes` or numpy.ndarray of them
141+
142+
Notes
143+
-----
144+
- See matplotlib documentation online for more on this subject
145+
146+
Examples
147+
--------
148+
For Series:
149+
150+
.. plot::
151+
:context: close-figs
152+
153+
>>> import bigframes.pandas as bpd
154+
>>> ser = bpd.Series([1, 2, 3, 3])
155+
>>> plot = ser.plot(kind='hist', title="My plot")
156+
157+
For DataFrame:
158+
159+
.. plot::
160+
:context: close-figs
161+
162+
>>> df = bpd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3],
163+
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]},
164+
... index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
165+
>>> plot = df.plot(title="DataFrame Plot")
9166
"""
10167

168+
11169
def hist(
12170
self, by: typing.Optional[typing.Sequence[str]] = None, bins: int = 10, **kwargs
13171
):

0 commit comments

Comments
 (0)