Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/340.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python可以连接不同长度的数据帧_Python_Pandas - Fatal编程技术网

Python可以连接不同长度的数据帧

Python可以连接不同长度的数据帧,python,pandas,Python,Pandas,我试图使用来自两个数据帧的数据,比如说帧a和帧b,它们是通过时间戳索引的。框架a只包含一列,并且比框架b短。框架a的所有索引都包含在框架b的索引中。我的代码结构如下所示 dataframe = DataFrame(index=frame_a.index) dataframe.join(frame_a["a"]) dataframe.join(frame_b["b"]) 现在“dataframe”由frame_a的索引索引,并且应该只从frame_b[b]中获取那些值,其中索引对应于datafr

我试图使用来自两个数据帧的数据,比如说帧a和帧b,它们是通过时间戳索引的。框架a只包含一列,并且比框架b短。框架a的所有索引都包含在框架b的索引中。我的代码结构如下所示

dataframe = DataFrame(index=frame_a.index)
dataframe.join(frame_a["a"])
dataframe.join(frame_b["b"])
现在“dataframe”由frame_a的索引索引,并且应该只从frame_b[b]中获取那些值,其中索引对应于dataframe的索引。现在,我使用ItErrors提取数据,并使用它创建新的数据帧“returnframe”:

timestamps = []
results = []
for row_idx, row in enumerate(dataframe.iterrows()):
        try:                    ## note that row_idx is integer and index is here a datetime
            index,data = row
            rowData = data.tolist()

            a = rowData[1]
            b = rowData[2]
            if a < b:
                b = b+1
            results.append(b)
            timestamps.append(b.index[row_idx])
        except:
            pass
returnframe = DataFrame(index=timestamps) # this frame is even shorter than frame_a
returnframe["results"] = results
行为不像我预料的那样。我需要将帧b的数据添加到returnframe中,其中两个帧的索引时间戳相等。但“join”似乎在中间添加了一些行,这可能是由于重复的索引所致??。对于我的其他函数来说,后面添加的数据不会改变returnframe的长度,这一点很重要

为了完整起见,我的整个函数df_输入和df_信号都是函数参数。它接受信号整数,并根据信号“买入”或“卖出”调整持股、基金和估值:

def calculatePerformance(self, df_input, df_signals, closei=3, changei=11, printResults=True):
    funds_array = []
    holdings_array = []
    valuation_array = []
    percChange_array = []
    date_array = []
    type_array = []
    signal_array = []
    type = df_input["type"][0]

    funds = df_input["close"][0]*8
    holdings = 0
    valuation = funds
    percChange = 0

    merged = pd.DataFrame(index=df_signals.index)
    merged = merged.join(df_signals["type"])
    merged = merged.join(df_input["close"])
    merged = merged.join(df_signals["signal"])

    prices = np.array([price for index, price in merged["close"].iteritems()])
    signals = np.array([signal for index, signal in merged["signal"].iteritems()])

    for row_idx, row in enumerate(merged.iterrows()):
        try:                    ## note that row_idx is integer and index is here a datetime
            index,data = row
            rowData = data.tolist()

            price = rowData[1]         
            signal = np.int(rowData[2])
            if isinstance(signal, np.int) and signal != 0:
                if signal > 0 and (signal*price) > funds :
                    signal=0
                elif signal < 0 and (holdings == 0 or (holdings+signal) < 0):
                    signal = holdings

                funds -= signal*price
                holdings += signal
                valuation = funds + holdings*price
                percChange = (valuation - valuation_array[0])/valuation_array[0]*100.

            type_array.append(type)
            date_array.append(index)
            signal_array.append(df_signals["signal"][index])

            funds_array.append(funds)
            holdings_array.append(holdings)
            valuation_array.append(valuation)
            percChange_array.append(percChange)



        except:
            pass


    # build and return the performance df of this stock
    df_performance = pd.DataFrame(index=date_array)        
    df_performance["type"] = type_array

    df_performance["signal"] = signal_array
    ## i would like to use the following line, but it wont work. i want to use it that way to eventually eliminate the need for the above for loop
    #df_performance["signal"] = df_signals["signal"]

    df_performance["funds"] = funds_array
    df_performance["holdings"] = holdings_array
    df_performance["valuation"] = valuation_array
    df_performance["percChange"] = percChange_array

return df_performance
def calculatePerformance(self, df_input, df_signals, closei=3, changei=11, printResults=True):
    funds_array = []
    holdings_array = []
    valuation_array = []
    percChange_array = []
    date_array = []
    type_array = []
    signal_array = []
    type = df_input["type"][0]

    funds = df_input["close"][0]*8
    holdings = 0
    valuation = funds
    percChange = 0

    merged = pd.DataFrame(index=df_signals.index)
    merged = merged.join(df_signals["type"])
    merged = merged.join(df_input["close"])
    merged = merged.join(df_signals["signal"])

    prices = np.array([price for index, price in merged["close"].iteritems()])
    signals = np.array([signal for index, signal in merged["signal"].iteritems()])

    for row_idx, row in enumerate(merged.iterrows()):
        try:                    ## note that row_idx is integer and index is here a datetime
            index,data = row
            rowData = data.tolist()

            price = rowData[1]         
            signal = np.int(rowData[2])
            if isinstance(signal, np.int) and signal != 0:
                if signal > 0 and (signal*price) > funds :
                    signal=0
                elif signal < 0 and (holdings == 0 or (holdings+signal) < 0):
                    signal = holdings

                funds -= signal*price
                holdings += signal
                valuation = funds + holdings*price
                percChange = (valuation - valuation_array[0])/valuation_array[0]*100.

            type_array.append(type)
            date_array.append(index)
            signal_array.append(df_signals["signal"][index])

            funds_array.append(funds)
            holdings_array.append(holdings)
            valuation_array.append(valuation)
            percChange_array.append(percChange)



        except:
            pass


    # build and return the performance df of this stock
    df_performance = pd.DataFrame(index=date_array)        
    df_performance["type"] = type_array

    df_performance["signal"] = signal_array
    ## i would like to use the following line, but it wont work. i want to use it that way to eventually eliminate the need for the above for loop
    #df_performance["signal"] = df_signals["signal"]

    df_performance["funds"] = funds_array
    df_performance["holdings"] = holdings_array
    df_performance["valuation"] = valuation_array
    df_performance["percChange"] = percChange_array

return df_performance