Python 从类向DataFrame添加列
我希望提高我的OOP技能,并编写了一个脚本来提取股票数据并运行一些简单的统计数据。我能够单独运行和调用Evaluation类中的每个函数(下面有注释),但在尝试循环遍历ticker列表并将统计信息附加到初始数据帧时遇到问题Python 从类向DataFrame添加列,python,pandas,oop,Python,Pandas,Oop,我希望提高我的OOP技能,并编写了一个脚本来提取股票数据并运行一些简单的统计数据。我能够单独运行和调用Evaluation类中的每个函数(下面有注释),但在尝试循环遍历ticker列表并将统计信息附加到初始数据帧时遇到问题 import datetime as d import pandas as pd import pandas_datareader.data as web import numpy as np start = d.datetime(2019, 1, 1) end = d.d
import datetime as d
import pandas as pd
import pandas_datareader.data as web
import numpy as np
start = d.datetime(2019, 1, 1)
end = d.datetime(2020, 4, 17)
class Security(object):
def _init__(self, ticker, data_platform, start_date, end_date):
self.ticker = ticker
self.data_platform = data_platform
self.start_date = start_date
self.end_date = end_date
def fetch_stock_data(self, ticker, data_platform, start_date, end_date):
df = web.DataReader(ticker, data_platform, start_date, end_date)
return df
class Evaluation(Security):
def __init__(self, ticker, data_platform, start_date, end_date):
self.df = Security.fetch_stock_data(
self, ticker, data_platform, start_date, end_date)
def simple_moving_average(self, period):
df = self.df
df['SMA-{}'.format(period)] = df['Adj Close'].rolling(period).mean()
return df['SMA-{}'.format(period)]
def exp_moving_average(self, period):
df = self.df
df['EMA_{}'.format(period)] = df['Adj Close'].ewm(span=period).mean()
return df['EMA_{}'.format(period)]
def rsi(self, period):
df = self.df
delta = df['Adj Close'].diff()
up = delta * 0
down = up.copy()
up[delta > 0] = delta[delta > 0]
down[delta < 0] = -delta[delta < 0]
up[up.index[period - 1]] = np.mean(up[:period])
up = up.drop(up.index[:(period - 1)])
down[down.index[period - 1]] = np.mean(down[:period])
down = down.drop(down.index[:(period - 1)])
rs = up.ewm(span=period - 1).mean() / down.ewm(span=period - 1).mean()
rsi_calc = 100 - 100 / (1 + rs)
df['rsi'] = rsi_calc
return df['rsi']
# pypl = Evaluation('PYPL', 'yahoo', start, end)
# print(csgs.df)
# print(csgs.simple_moving_average(50))
# print(csgs.exp_moving_average(26))
# print(csgs.rsi(14))
tickers = ['PYPL', 'TSLA']
for i in tickers:
df = Evaluation(i, 'yahoo', start, end)
df['SMA'] = df.simple_moving_average(50)
df['EMA'] = df.exp_moving_average(26)
df['rsi'] = df.rsi(14)
print(df)
>>>e = Evaluation()
>>>type(e)
__main__.Evaluation
>>>type(e.df)
pandas.core.frame.DataFrame
您混淆了对象方法和数据帧方法。在您的示例中,
df
是一个Evaluation
对象,而不是数据帧
import datetime as d
import pandas as pd
import pandas_datareader.data as web
import numpy as np
start = d.datetime(2019, 1, 1)
end = d.datetime(2020, 4, 17)
class Security(object):
def _init__(self, ticker, data_platform, start_date, end_date):
self.ticker = ticker
self.data_platform = data_platform
self.start_date = start_date
self.end_date = end_date
def fetch_stock_data(self, ticker, data_platform, start_date, end_date):
df = web.DataReader(ticker, data_platform, start_date, end_date)
return df
class Evaluation(Security):
def __init__(self, ticker, data_platform, start_date, end_date):
self.df = Security.fetch_stock_data(
self, ticker, data_platform, start_date, end_date)
def simple_moving_average(self, period):
df = self.df
df['SMA-{}'.format(period)] = df['Adj Close'].rolling(period).mean()
return df['SMA-{}'.format(period)]
def exp_moving_average(self, period):
df = self.df
df['EMA_{}'.format(period)] = df['Adj Close'].ewm(span=period).mean()
return df['EMA_{}'.format(period)]
def rsi(self, period):
df = self.df
delta = df['Adj Close'].diff()
up = delta * 0
down = up.copy()
up[delta > 0] = delta[delta > 0]
down[delta < 0] = -delta[delta < 0]
up[up.index[period - 1]] = np.mean(up[:period])
up = up.drop(up.index[:(period - 1)])
down[down.index[period - 1]] = np.mean(down[:period])
down = down.drop(down.index[:(period - 1)])
rs = up.ewm(span=period - 1).mean() / down.ewm(span=period - 1).mean()
rsi_calc = 100 - 100 / (1 + rs)
df['rsi'] = rsi_calc
return df['rsi']
# pypl = Evaluation('PYPL', 'yahoo', start, end)
# print(csgs.df)
# print(csgs.simple_moving_average(50))
# print(csgs.exp_moving_average(26))
# print(csgs.rsi(14))
tickers = ['PYPL', 'TSLA']
for i in tickers:
df = Evaluation(i, 'yahoo', start, end)
df['SMA'] = df.simple_moving_average(50)
df['EMA'] = df.exp_moving_average(26)
df['rsi'] = df.rsi(14)
print(df)
>>>e = Evaluation()
>>>type(e)
__main__.Evaluation
>>>type(e.df)
pandas.core.frame.DataFrame
行df['SMA']=df.simple\u moving\u average(50)
失败,因为无法向对象添加列。您需要使用df.df['SMA']=df.simple\u moving\u average(50)
正如NomadMonad所指出的,使用df作为求值对象的变量名是令人困惑的,因此最好给它一个不同的名称。但是,eval
是python中的内置函数,因此最好使用e
此外,您应该出于几个原因更改类设计
- 在python 3中,不需要从
对象继承
的Security
方法只有一个前导下划线,而不是两个\uuuuu init\uuuu
- 您不希望
继承自评估
。相反,在安全性
的求值
方法中传递一个初始化
对象安全
- 您不希望在实例化对象时调用刮取网站的方法。对datareader的调用应该是一个单独的方法
- 如果在
方法中设置了这些值,则不需要将参数传递给方法。您可以使用\uuu init\uu
访问它们self
- 您不需要在求值方法中修改基础数据帧。而是返回该方法生成的值
导入日期时间
作为pd进口熊猫
将numpy作为np导入
以web形式导入datareader.data
班级安全:
定义初始日期(self、ticker、数据平台、开始日期、结束日期):
self.ticker=ticker
self.data\u platform=数据平台
self.start\u date=开始日期
self.end\u日期=end\u日期
self.df=无
def fetch_stock_数据(自身):
self.df=web.DataReader(self.ticker、self.data\u平台、self.start\u日期、self.end\u日期)
班级评估:
定义初始化(自我,安全):
自我安全
def简单移动平均值(自身、期间):
df=self.security.df
返回df['Adj Close'].滚动(周期).平均值()
def exp_移动平均值(自身、期间):
df=self.security.df
返回df['Adj Close'].ewm(span=period).mean()
def rsi(自身,周期):
df=self.security.df
delta=df['Adj Close'].diff()
向上=增量*0
down=up.copy()
向上[增量>0]=增量[增量>0]
向下[delta<0]=-delta[delta<0]
上升[上升指数[期间-1]]=np.平均值(上升[:期间])
up=up.drop(up.index[:(句点-1)])
下跌[下跌指数[期间-1]]=np.均值(下跌[:期间])
down=down.drop(down.index[:(句点-1)])
rs=up.ewm(span=period-1.mean()/down.ewm(span=period-1.mean())
返回100-100/(1+rs)
start=datetime.datetime(2019,1,1)
end=datetime.datetime(2019,4,17)
s=安全性(股票代码“yahoo”,开始,结束)
e=评估(安全=s)
Evaluation返回一个存储df
属性而不是df
的对象,因此您应该执行eval=Evaluation(i'yahoo',start,end)
然后eval.df['foo']=data