Python 如何使用matplotlib正确注释同一图形中的多个绘图?

Python 如何使用matplotlib正确注释同一图形中的多个绘图?,python,pandas,matplotlib,dataframe,plot,Python,Pandas,Matplotlib,Dataframe,Plot,我将两组不同的数据帧垂直添加到一个系列中。我想在我们使用RandomForestRegressionor拟合模型后对图进行注释,并绘制实际值和预测值。我正在考虑的两个数据集在 我的预测解决方案和尝试绘制数值图如下所示 import glob import os import pandas as pd import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.metrics impo

我将两组不同的数据帧垂直添加到一个系列中。我想在我们使用
RandomForestRegressionor
拟合模型后对图进行注释,并绘制实际值和预测值。我正在考虑的两个数据集在

我的预测解决方案和尝试绘制数值图如下所示

import glob
import os    
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns


df = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', "cubic*.csv"))),ignore_index=True)
#df = pd.read_csv('cubic31.csv')

#df.sort_values(['time'], inplace=True)

df['time'] = pd.Series(["{0:.10f}".format(val * 100) for val in df['time']], index = df.index)

for i in range(1,3):
    df['X_t'+str(i)] = df['X'].shift(i)

print(df)

df.dropna(inplace=True)

X =np.array (pd.DataFrame({ 'X_%d'%i : df['X'].shift(i) for i in range(3)}).apply(np.nan_to_num, axis=0).values)

X = df.drop('Y', axis=1)
y = df['Y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40)
X_train = X_train.drop('time', axis=1)
X_test = X_test.drop('time', axis=1)

print(X.shape)
print(df['Y'].shape)

print()
print("Size of X_train:",(len(X_train)))
print("Size of Y_train:",(len(X_train)))
print("Size of X_test:",(len(X_test)))
print("Size of Y_test:",(len(y_test)))

print(X_train.shape)
print(y_train.shape)

print()

####### to add the trendline
fig, ax = plt.subplots()
#df.plot(x='time', y='Y', ax=ax)
ax.plot(df['time'].values, df['Y'].values)
fig, ax = plt.subplots()


plt.annotate('annote test!', 
             xy=(len(modelPred_test), modelPred_test[-1]),  
             xycoords='data',
             xytext=(-30,30),
             textcoords='offset points',
             arrowprops=dict(arrowstyle="->"))

index_values=range(0,len(y_test))
y_test.sort_index(inplace=True)
X_test.sort_index(inplace=True)

modelPred_test = reg.predict(X_test)
ax.plot(pd.Series(index_values), y_test.values)


plotsInOne=pd.DataFrame(pd.concat([pd.Series(modelPred_test), pd.Series(y_test.values)], axis=1))


plt.figure(); plotsInOne.plot(); plt.legend(loc='best')
plt.annotate('annote test!', 
             xy=(len(modelPred_test), modelPred_test[-1]),  
             xycoords='data',
             xytext=(-30,30),
             textcoords='offset points',
             arrowprops=dict(arrowstyle="->"))
当我仅将单个数据集(例如:
cubic31.csv
)作为
df=pd.read_csv('cubic31.csv')
并应用绘图命令

    fig, ax = plt.subplots()
    ax.plot(df['time'].values, df['Y'].values)
    fig, ax = plt.subplots()
    fig, ax = plt.subplots()
    ax.plot(df['time'].values, df['Y'].values)
    fig, ax = plt.subplots()
这就是我的阴谋

当我们将第二个数据集
cubic32.csv
)作为

df=pd.read\u csv('testdata2.csv')
并应用绘图命令

    fig, ax = plt.subplots()
    ax.plot(df['time'].values, df['Y'].values)
    fig, ax = plt.subplots()
    fig, ax = plt.subplots()
    ax.plot(df['time'].values, df['Y'].values)
    fig, ax = plt.subplots()
这就是我的阴谋

但是,如果我将这两个数据集合并为

df=pd.concat(map(pd.read\u csv,glob.glob(os.path.join(“”,“cubic*.csv”))),这是我得到的图

我想在每个图的结尾处做个标记(如图中的红色箭头所示)。我试过使用下面的方法,但它只指向第二个图的末尾,而不是第一个图,如下所示

import glob
import os    
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns


df = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', "cubic*.csv"))),ignore_index=True)
#df = pd.read_csv('cubic31.csv')

#df.sort_values(['time'], inplace=True)

df['time'] = pd.Series(["{0:.10f}".format(val * 100) for val in df['time']], index = df.index)

for i in range(1,3):
    df['X_t'+str(i)] = df['X'].shift(i)

print(df)

df.dropna(inplace=True)

X =np.array (pd.DataFrame({ 'X_%d'%i : df['X'].shift(i) for i in range(3)}).apply(np.nan_to_num, axis=0).values)

X = df.drop('Y', axis=1)
y = df['Y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40)
X_train = X_train.drop('time', axis=1)
X_test = X_test.drop('time', axis=1)

print(X.shape)
print(df['Y'].shape)

print()
print("Size of X_train:",(len(X_train)))
print("Size of Y_train:",(len(X_train)))
print("Size of X_test:",(len(X_test)))
print("Size of Y_test:",(len(y_test)))

print(X_train.shape)
print(y_train.shape)

print()

####### to add the trendline
fig, ax = plt.subplots()
#df.plot(x='time', y='Y', ax=ax)
ax.plot(df['time'].values, df['Y'].values)
fig, ax = plt.subplots()


plt.annotate('annote test!', 
             xy=(len(modelPred_test), modelPred_test[-1]),  
             xycoords='data',
             xytext=(-30,30),
             textcoords='offset points',
             arrowprops=dict(arrowstyle="->"))

index_values=range(0,len(y_test))
y_test.sort_index(inplace=True)
X_test.sort_index(inplace=True)

modelPred_test = reg.predict(X_test)
ax.plot(pd.Series(index_values), y_test.values)


plotsInOne=pd.DataFrame(pd.concat([pd.Series(modelPred_test), pd.Series(y_test.values)], axis=1))


plt.figure(); plotsInOne.plot(); plt.legend(loc='best')
plt.annotate('annote test!', 
             xy=(len(modelPred_test), modelPred_test[-1]),  
             xycoords='data',
             xytext=(-30,30),
             textcoords='offset points',
             arrowprops=dict(arrowstyle="->"))

我们如何对plotting命令执行此操作,以便能够自动为绘图添加注释和标签(例如,
这是cubic31.csv结束的地方
这是cubic32.csv结束的地方
,…等等),如下所示


考虑在注释的xy坐标中使用
df.ix[]
,您可以通过
df.shape
(行数和列数的元组)检索索引。下面用随机数据演示(种子是可复制的)。第二个数据帧的大小是第一个数据帧的一半。下面还显示了水平和垂直合并注释图:

资料

个体注释

def runplot(df, title):   
    rows, cols = df.shape

    fig, ax = plt.subplots()
    ax.plot(df['time'].values, df['Y'].values)

    plt.title(title)
    plt.annotate('annote test!', 
                 xy=(rows, df.ix[rows-1,'Y']),  
                 xycoords='data',
                 xytext=(-30,30),
                 textcoords='offset points',
                 arrowprops=dict(arrowstyle="->"))


runplot(df1, 'Dataframe 1')
runplot(df2, 'Dataframe 2')

水平合并/垂直附加注释

def runplot_merge(left_df, right_df, df, title):               
    fig, ax = plt.subplots()
    ax.plot(df['time'].values, df['Y'].values)

    rows, cols = left_df.shape

    plt.title(title)
    plt.annotate('annote test 1!', 
                 xy=(rows, left_df.ix[rows-1,'Y']),  
                 xycoords='data',
                 xytext=(-30,30),
                 textcoords='offset points',
                 arrowprops=dict(arrowstyle="->"))

    rows, cols = right_df.shape

    plt.annotate('annote test 2!', 
                 xy=(rows, right_df.ix[rows-1,'Y']),  
                 xycoords='data',
                 xytext=(-30,30),
                 textcoords='offset points',
                 arrowprops=dict(arrowstyle="->"))        


df_wide = pd.concat([df1, df2], axis=1)
runplot_merge(df1, df2, df_wide, 'Horizontal Merge')

df_long = pd.concat([df1, df2]).reset_index(drop=True).sort_values('time')
runplot_merge(df1, df2, df_long, 'Vertical Append')

plt.show()
plt.clf()
plt.close()

我的意思不是让您复制我的解决方案,而是让您展示如何从发布的链接数据生成您的版本。我仍然相信所需的绘图使用不同的数据集。你想在一个绘图中添加100个注释?我会将所有数据帧读入一个列表,
pd.concat()
使用列表,然后通过列表迭代,将dfs传递到
plt.annotate(…)
但plot concatenated df。这个问题是关于注释一个plot,还是机器学习?为什么会有机器学习依赖关系?嗨,PaulH,当然整个任务都与机器学习领域有关,但这里的问题是注释情节。谢谢你的时间。那么你能不能把这归结为一个简单的例子,不需要我安装机器学习工具?我很困惑。您仍在读取我没有的CSV文件。