df["X"] = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
df
Out[92]:
Col1 Col2 X
0 0.047633 0.150047 A
1 0.296385 0.212826 A
2 0.562141 0.136243 A
3 0.997786 0.224560 A
4 0.585457 0.178914 A
5 0.551201 0.867102 B
6 0.740142 0.003872 B
7 0.959130 0.581506 B
8 0.114489 0.534242 B
9 0.042882 0.314845 B
bp = df.boxplot(by="X")
In [88]: from pandas.plotting import andrews_curves
In [89]: data = pd.read_csv("data/iris.data")
In [90]: plt.figure();
In [91]: andrews_curves(data, "Name");
In [92]: from pandas.plotting import parallel_coordinates
In [93]: data = pd.read_csv("data/iris.data")
In [94]: plt.figure();
In [95]: parallel_coordinates(data, "Name");
滞后图lag plot
滞后图是用时间序列和相应的滞后阶数序列做出的散点图。可以用于观测自相关性。
In [96]: from pandas.plotting import lag_plot
In [97]: plt.figure();
In [98]: spacing = np.linspace(-99 * np.pi, 99 * np.pi, num=1000)
In [99]: data = pd.Series(0.1 * np.random.rand(1000) + 0.9 * np.sin(spacing))
In [100]: lag_plot(data);
In [101]: from pandas.plotting import autocorrelation_plot
In [102]: plt.figure();
In [103]: spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
In [104]: data = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
In [105]: autocorrelation_plot(data);
In [106]: from pandas.plotting import bootstrap_plot
In [107]: data = pd.Series(np.random.rand(1000))
In [108]: bootstrap_plot(data, size=50, samples=500, color="grey");
In [120]: ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000))
In [121]: ts = np.exp(ts.cumsum())
In [122]: ts.plot(logy=True);
多个Y轴
使用secondary_y=True 可以绘制多个Y轴数据:
In [125]: plt.figure();
In [126]: ax = df.plot(secondary_y=["A", "B"])
In [127]: ax.set_ylabel("CD scale");
In [128]: ax.right_ax.set_ylabel("AB scale");
小图标上面默认会添加right字样,想要去掉的话可以设置mark_right=False:
In [129]: plt.figure();
In [130]: df.plot(secondary_y=["A", "B"], mark_right=False);
In [133]: plt.figure();
In [134]: df["A"].plot(x_compat=True);
如果有多个图像需要调整,可以使用with:
In [135]: plt.figure();
In [136]: with pd.plotting.plot_params.use("x_compat", True):
.....: df["A"].plot(color="r")
.....: df["B"].plot(color="g")
.....: df["C"].plot(color="b")
.....:
In [139]: df.plot(subplots=True, layout=(2, -1), figsize=(6, 6), sharex=False);
一个更复杂的例子:
In [140]: fig, axes = plt.subplots(4, 4, figsize=(9, 9))
In [141]: plt.subplots_adjust(wspace=0.5, hspace=0.5)
In [142]: target1 = [axes[0][0], axes[1][1], axes[2][2], axes[3][3]]
In [143]: target2 = [axes[3][0], axes[2][1], axes[1][2], axes[0][3]]
In [144]: df.plot(subplots=True, ax=target1, legend=False, sharex=False, sharey=False);
In [145]: (-df).plot(subplots=True, ax=target2, legend=False, sharex=False, sharey=False);
画表格
如果设置table=True , 可以直接将表格数据一并显示在图中:
In [165]: fig, ax = plt.subplots(1, 1, figsize=(7, 6.5))
In [166]: df = pd.DataFrame(np.random.rand(5, 3), columns=["a", "b", "c"])
In [167]: ax.xaxis.tick_top() # Display x-axis ticks on top.
In [168]: df.plot(table=True, ax=ax)
fig
table还可以显示在图片上面:
In [172]: from pandas.plotting import table
In [173]: fig, ax = plt.subplots(1, 1)
In [174]: table(ax, np.round(df.describe(), 2), loc="upper right", colWidths=[0.2, 0.2, 0.2]);
In [175]: df.plot(ax=ax, ylim=(0, 2), legend=None);
使用Colormaps
如果Y轴的数据太多的话,使用默认的线的颜色可能不好分辨。这种情况下可以传入colormap 。
In [176]: df = pd.DataFrame(np.random.randn(1000, 10), index=ts.index)
In [177]: df = df.cumsum()
In [178]: plt.figure();
In [179]: df.plot(colormap="cubehelix");