Python 处理数据时间序列
我正在努力学习如何处理时间序列 这是我现在的代码:Python 处理数据时间序列,python,pandas,tensorflow,keras,time-series,Python,Pandas,Tensorflow,Keras,Time Series,我正在努力学习如何处理时间序列 这是我现在的代码: import numpy as np import matplotlib.pyplot as plt import pandas as pd import pandas_datareader as web import datetime as dt from sklearn.preprocessing import MinMaxScaler # ***** TRAIN ***** # # Load data company = 'CSCO'
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
# ***** TRAIN ***** #
# Load data
company = 'CSCO'
start = dt.datetime(2012,1,1)
end = dt.datetime(2020,1,1)
data = web.DataReader(company, 'yahoo', start, end)
# Scaling data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))
# Creating the timeseries
prediction_days = 60
x_train, y_train = [], []
for x in range(prediction_days, len(scaled_data)):
print(x, x-prediction_days)
x_train.append(scaled_data[x-prediction_days:x,0])
y_train.append(scaled_data[x,0])
# Adapting data to my model
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# ***** TEST ***** #
test_start = dt.datetime(2020,1,1)
test_end = dt.datetime.now()
test_data = web.DataReader(company, 'yahoo', test_start, test_end)
actual_prices = test_data['Close'].values
total_dataset = pd.concat((data['Close'], test_data['Close']), axis=0)
model_inputs = total_dataset[len(total_dataset)- len(test_data) - prediction_days:].values
model_inputs = model_inputs.reshape(-1,1)
model_inputs = scaler.transform(model_inputs)
x_test = []
for x in range(prediction_days, len(model_inputs)):
x_test.append(model_inputs[x-prediction_days:x, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
# RNN MODELS
dic_models = {
'Vanilla' :VanillaLSTM,
'Stacked' :StackedLSTM,
'Bidirectional' :BidirectionalLSTM,
}
all_predicted_prices = []
for model in dic_models:
# Build the model
model = dic_models[model](x_train.shape[1], 1)
# Fit the model
model.fit(x_train, y_train, epochs=2, batch_size=32)
# Predict
predicted_prices = model.predict(x_test)
predicted_prices = scaler.inverse_transform(predicted_prices)
all_predicted_prices.append(predicted_prices)
# Plots
plt.figure(figsize=(20,4))
plt.plot(actual_prices, label=f'Actual price')
for model, prediction in zip(dic_models, all_predicted_prices):
plt.plot(prediction, label=f'{model}')
plt.xlabel('time'); plt.ylabel(f'{company} Share Price')
plt.title(f'Comparasió')
plt.legend()
plt.show()
我实现的第一个目标是,我成功地做出了一个有着良好结果的预测
但现在我对数据处理不满意。我的想法是:
Pandas
是否允许将数据直接拆分为训练/验证/测试,但是Sklearn
允许它(训练/测试),我可以做一些小技巧来进行验证
company = 'CSCO'
start = dt.datetime(2012,1,1)
end = dt.datetime.now()
df = web.DataReader(company, 'yahoo', start, end)
# Scaling data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1,1))
# Splitting data into train/test
train, test = train_test_split(scaled_data, test_size=0.2)
train, val = train_test_split(train, test_size=0.1)
但是现在我不知道如何在不使用for
循环的情况下获得时间序列。也许tensorflow
有一些功能,或者Pandas
但是我找不到它
你通常是怎么做的
在此之后,我认为只需进行重塑,因此数据可能已经输入到我的训练模型中