Python 3.x 轻微的LSTM输入变化会导致学习失败
问题如下:我有几个玩具网络,它们以w2v(大小300)向量编码的一系列单词的形式读入句子。游戏的目的是将这些句子分为两类。很简单。分离并不容易,我的最佳网络管理的准确率约为60-65%。然后我得到了一些(理论上)有用的额外信息,这些信息与我的词尾分类有关,这些信息将应用于这些句子中的许多单词,它以小浮点数的形式出现。我取了这些浮点数,对它们应用的词,把它们附加到这些词的w2v向量上,我还把零附加到没有这些信息的词的向量上。然后有301大小的向量被输入。。。现在所有的东西都在50%左右。发生了什么事 我可以理解,额外的信息实际上可能是无用的,或者是错误的,但在我看来,它要么与结果相关(或者是反共振态),要么是噪声(或者是如此复杂,以至于相关性尚未被发现,所以我们只将其视为噪声),无论结果如何——网络应该要么学会利用这些额外的信息,或者无视它。在我看来,我不明白为什么我至少不能再获得我以前的准确度,特别是考虑到这是对输入数据的一个很小的调整 根据一个评论请求,我已经附上了相关的代码-为其黑客性质道歉,这一切在目前是相当实验性的 相关代码Python 3.x 轻微的LSTM输入变化会导致学习失败,python-3.x,neural-network,keras,recurrent-neural-network,Python 3.x,Neural Network,Keras,Recurrent Neural Network,问题如下:我有几个玩具网络,它们以w2v(大小300)向量编码的一系列单词的形式读入句子。游戏的目的是将这些句子分为两类。很简单。分离并不容易,我的最佳网络管理的准确率约为60-65%。然后我得到了一些(理论上)有用的额外信息,这些信息与我的词尾分类有关,这些信息将应用于这些句子中的许多单词,它以小浮点数的形式出现。我取了这些浮点数,对它们应用的词,把它们附加到这些词的w2v向量上,我还把零附加到没有这些信息的词的向量上。然后有301大小的向量被输入。。。现在所有的东西都在50%左右。发生了什么
self.log('Beginning Training')
# fix random seed for reproducibility
numpy.random.seed(7)
# load the dataset but only keep the top n words, zero the rest
X,y = self.load_test_train_data(corpus_manager,sentiment_analyser)
# truncate and pad input sequences
max_review_length = 25
self.log('Padding input vectors to length ', max_review_length)
X = sequence.pad_sequences(X, maxlen=max_review_length)
self.log('Padding complete.')
# create the model
embedding_vecor_length = 32
self.log('Initialising model.')
model = Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(25, 301)))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(10))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='Adam', metrics=['accuracy'])
self.log(model.summary())
self.log('Training model.')
avg_score = 0
y = numpy.array(y)
X_train = None
X_test = None
for train_index, test_index in self.kf.split(X):
#for train_index, test_index in zip(self.train_indicies, self.test_indicies):
X_train = X[train_index]
X_test = X[test_index]
y_train, y_test = y[train_index], y[test_index]
self.log('Split into ', len(X_train), ' training and ', len(X_test),' testing events.')
y_train = numpy.array(y_train)
X_train = numpy.array(X_train)
model.fit(X_train, y_train, epochs=250, batch_size=64,shuffle=True)#64
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=1)
avg_score += scores[1]
self.model = model
if scores[1] > self.model_score:
#so we know which set the final model was actually tested and trained over
self.optimum_X_train, self.optimum_X_test = X_train, X_test
self.model_saccuracy = scores[1]
self.model = self.model
self.optimum_y_train , self.optimum_y_test = y_train, y_test
还有这个功能
def load_test_train_data(self, corpus_manager,sentiment_analyser,
input_database='../data/processed_articles/ndaq_articles.csv',
company_names_csv='../data/SP_500_company_list.csv',
company_history_list='../data/financial_trading_history/sp_trading_history.pkl',
previously_transformed_articles='../data/previously_transformed_articles.pkl'
):
"""
Retrieve a set of training and testing data from the corpus manager
in prepration for training or validation.
Parameters
----------
corpus_manager: A CorpusManager class already fitted with processed articles
Returns
-------
X_train, y_train: Training feature space and target space data points
X_test, y_test: Orthogonal test feature space and target space data points to the training set
"""
article_manager = ArticleManager(input_database=input_database,
verbosity=0)
data = pandas.read_csv(input_database, encoding = "ISO-8859-1")
data = data[ data.title.str.contains("UPDATE") == False]
data = data[ data.title.str.contains("DIARY") == False]
'''
for text in articles:
get_text_feautre_vector(self, article_text)
for each vector get its sentiment
def evalaute_article(self, article):
append it to end of feature vector
'''
sent_model_class = sentiment_analyser
X = []
if not os.path.isfile(previously_transformed_articles):
self.log(" Transforming and storing articles.")
with tqdm(total=len( list( data['title'] )) ) as pbar:
for index, row in data.iterrows():
word_and_sent_vecs = corpus_manager.word2vec_and_sent(string = row['title'], sentiment_analyser = self.sentiment_analyser)
X.append(word_and_sent_vecs)
pbar.update(1)
with open(previously_transformed_articles, 'wb') as handle:
pickle.dump(X, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
with open(previously_transformed_articles, 'rb') as handle:
X = pickle.load(handle)
hist = CompanyHistoryManager(article_manager.get_companies())
hist.load_history(company_history_list)
company_names_symbols = pandas.read_csv(company_names_csv, encoding = "ISO-8859-1")
self.log(company_names_symbols)
self.log(company_names_symbols.columns)
Y=[]
if not os.path.isfile('stock_motion.pkl'):
self.log(" Creating data on stock movements.")
with tqdm(total=len( data.index )) as pbar:
for _, row in data.iterrows():
#sym = company_names_symbols.loc[company_names_symbols['Symbol'] == row['symbol']]
stock_rose = hist.did_it_rise(row['date'], row['symbol'])
#print(stock_rose)
Y.append(stock_rose)
pbar.update(1)
with open('stock_motion.pkl', 'wb') as handle:
pickle.dump(Y, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
with open('stock_motion.pkl', 'rb') as handle:
Y = pickle.load(handle)
#print(Y)
#remove all the nones!!!!!!!!!!!!!!!!!!!!!! do this more efficiently
self.log('Removing faulty data (Where we could not obtain the stock movement data)')
X_purged = []
Y_purged = []
count = 0
count1 = 0
with tqdm(total=len(X)) as pbar:
for i,j in zip(X,Y):
if j == 'Invalid':
count += 1
elif j == 'Static':
count1 += 1
else:
X_purged.append(i)
Y_purged.append(j)
pbar.update(1)
self.log(100*(count/len(X)),'%% of the data was removed due to an inability to obtain the data.')
self.log(100*(count1/len(X)),'%% of the data was removed due to the resultant stock movement being zero.')
X = X_purged
Y = Y_purged
# Separate the articles into test and train - we used a fixed random state
# so that our results are the same between runs
#X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
self.kf = KFold(n_splits=5, random_state=42, shuffle=True)
最后,可能是错误的函数
def word2vec_and_sent(self, string, sentiment_analyser):
w2v = self.w2v_model
words = string.lower().split()
vectors = []
word_sent=[0]
skip = False
skip_count = 0
for word in words:
skip = False
try:
word_vec = w2v.wv[word]
except:
skip = True
skip_count = skip_count + 1
if not skip:
if word in self.keywords:
bm25_vec = self.get_text_feautre_vector(word)
word_sent = sentiment_analyser.evalaute_article(np.array(bm25_vec).reshape(1, -1))
else:
a=0
wordsent = [a]
vec = word_vec.tolist()
vec.append(word_sent[0])
vectors.append(vec)
return(vectors)
您能否分享您的实施细节?这听起来像是由实现错误引起的,除非您忽略了最后一个维度的一些基本缩放/预处理