Python Plotly:如何使用Plotly和Plotly express绘制回归线?

Python Plotly:如何使用Plotly和Plotly express绘制回归线?,python,dataframe,plotly,regression,plotly-python,Python,Dataframe,Plotly,Regression,Plotly Python,我有一个数据帧,df和pm1和pm25列。我想展示一个图表(用Plotly)说明这两个信号的相关性。到目前为止,我已经成功地展示了散点图,但我没有画出信号之间合适的相关线。到目前为止,我已经尝试过: denominator=df.pm1**2-df.pm1.mean()*df.pm1.sum() print('denominator',denominator) m=(df.pm1.dot(df.pm25)-df.pm25.mean()*df.pm1.sum())/denominator b=(d

我有一个数据帧,df和pm1和pm25列。我想展示一个图表(用Plotly)说明这两个信号的相关性。到目前为止,我已经成功地展示了散点图,但我没有画出信号之间合适的相关线。到目前为止,我已经尝试过:

denominator=df.pm1**2-df.pm1.mean()*df.pm1.sum()
print('denominator',denominator)
m=(df.pm1.dot(df.pm25)-df.pm25.mean()*df.pm1.sum())/denominator
b=(df.pm25.mean()*df.pm1.dot(df.pm1)-df.pm1.mean()*df.pm1.dot(df.pm25))/denominator
y_pred=m*df.pm1+b


lineOfBestFit = go.Scattergl(
    x=df.pm1,
    y=y_pred,
    name='Line of best fit',
    line=dict(
        color='red',
    )
)

data = [dataPoints, lineOfBestFit]
figure = go.Figure(data=data)

figure.show()

绘图:

from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
df = pd.DataFrame({'X': X, 'Y':Y})

# regression
reg = LinearRegression().fit(np.vstack(df['X']), Y)
df['bestfit'] = reg.predict(np.vstack(df['X']))

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))

# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()
import plotly.graph_objects as go
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()

df = pd.DataFrame({'X': X, 'Y':Y})

# regression
df['bestfit'] = sm.OLS(df['Y'],sm.add_constant(df['X'])).fit().fittedvalues

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))


# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()

如何正确绘制最佳线条?

更新1: 现在,plotly express可以轻松地处理这两种数据(在您的情况下是后者),只需绘制回归线:

fig = px.scatter(df, x='X', y='Y', trendline="ols")
在问题的末尾,填写宽数据的完整代码段

如果希望回归线突出,可以直接通过以下方式编辑线颜色:

fig.data[1].line.color = 'red'

您可以通过访问回归参数,如
alpha
和beta

model = px.get_trendline_results(fig)
alpha = model.iloc[0]["px_fit_results"].params[0]
beta = model.iloc[0]["px_fit_results"].params[1]
您甚至可以通过以下方式请求非线性拟合:

fig = px.scatter(df, x='X', y='Y', trendline="lowess")

那么那些长格式呢?这就是plotly express展示其一些真正威力的地方。如果以内置数据集px.data.gapminder
为例,可以通过指定
color=“contraction”
来触发一组国家的单独行:

长格式的完整代码段 如果你想在模型选择和输出方面有更大的灵活性,你可以参考我对下面这篇文章的原始答案。但首先,在我回答的开头,这里有一个完整的例子片段:

宽数据的完整代码段
原始答复: 对于回归分析,我喜欢使用
statsmodels.api
sklearn.linear_model
。我还喜欢在一个数据框架中组织数据和回归结果。这里有一种方法可以以干净、有条理的方式完成您想要的任务:

使用sklearn或statsmodels绘图:

from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
df = pd.DataFrame({'X': X, 'Y':Y})

# regression
reg = LinearRegression().fit(np.vstack(df['X']), Y)
df['bestfit'] = reg.predict(np.vstack(df['X']))

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))

# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()
import plotly.graph_objects as go
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()

df = pd.DataFrame({'X': X, 'Y':Y})

# regression
df['bestfit'] = sm.OLS(df['Y'],sm.add_constant(df['X'])).fit().fittedvalues

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))


# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()

使用sklearn进行编码:

from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
df = pd.DataFrame({'X': X, 'Y':Y})

# regression
reg = LinearRegression().fit(np.vstack(df['X']), Y)
df['bestfit'] = reg.predict(np.vstack(df['X']))

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))

# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()
import plotly.graph_objects as go
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()

df = pd.DataFrame({'X': X, 'Y':Y})

# regression
df['bestfit'] = sm.OLS(df['Y'],sm.add_constant(df['X'])).fit().fittedvalues

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))


# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()
使用statsmodels的代码:

from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
df = pd.DataFrame({'X': X, 'Y':Y})

# regression
reg = LinearRegression().fit(np.vstack(df['X']), Y)
df['bestfit'] = reg.predict(np.vstack(df['X']))

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))

# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()
import plotly.graph_objects as go
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime

# data
np.random.seed(123)
numdays=20

X = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()
Y = (np.random.randint(low=-20, high=20, size=numdays).cumsum()+100).tolist()

df = pd.DataFrame({'X': X, 'Y':Y})

# regression
df['bestfit'] = sm.OLS(df['Y'],sm.add_constant(df['X'])).fit().fittedvalues

# plotly figure setup
fig=go.Figure()
fig.add_trace(go.Scatter(name='X vs Y', x=df['X'], y=df['Y'].values, mode='markers'))
fig.add_trace(go.Scatter(name='line of best fit', x=X, y=df['bestfit'], mode='lines'))


# plotly figure layout
fig.update_layout(xaxis_title = 'X', yaxis_title = 'Y')

fig.show()

Plotly还附带了statsmodels的本机包装器,用于打印(非线性)直线:

从他们的文件中引用:


将plotly.express导入为px
df=px.data.tips()
图=px.分散度(df,x=“总账单”,y=“小费”,趋势线=“ols”)
图2(图3)

哇,这是一种非常直观和快速的方法,可以实现问题中的要求