Python 3.x 轻微的LSTM输入变化会导致学习失败

Python 3.x 轻微的LSTM输入变化会导致学习失败,python-3.x,neural-network,keras,recurrent-neural-network,Python 3.x,Neural Network,Keras,Recurrent Neural Network,问题如下:我有几个玩具网络,它们以w2v(大小300)向量编码的一系列单词的形式读入句子。游戏的目的是将这些句子分为两类。很简单。分离并不容易,我的最佳网络管理的准确率约为60-65%。然后我得到了一些(理论上)有用的额外信息,这些信息与我的词尾分类有关,这些信息将应用于这些句子中的许多单词,它以小浮点数的形式出现。我取了这些浮点数,对它们应用的词,把它们附加到这些词的w2v向量上,我还把零附加到没有这些信息的词的向量上。然后有301大小的向量被输入。。。现在所有的东西都在50%左右。发生了什么

问题如下:我有几个玩具网络,它们以w2v(大小300)向量编码的一系列单词的形式读入句子。游戏的目的是将这些句子分为两类。很简单。分离并不容易,我的最佳网络管理的准确率约为60-65%。然后我得到了一些(理论上)有用的额外信息,这些信息与我的词尾分类有关,这些信息将应用于这些句子中的许多单词,它以小浮点数的形式出现。我取了这些浮点数,对它们应用的词,把它们附加到这些词的w2v向量上,我还把零附加到没有这些信息的词的向量上。然后有301大小的向量被输入。。。现在所有的东西都在50%左右。发生了什么事

我可以理解,额外的信息实际上可能是无用的,或者是错误的,但在我看来,它要么与结果相关(或者是反共振态),要么是噪声(或者是如此复杂,以至于相关性尚未被发现,所以我们只将其视为噪声),无论结果如何——网络应该要么学会利用这些额外的信息,或者无视它。在我看来,我不明白为什么我至少不能再获得我以前的准确度,特别是考虑到这是对输入数据的一个很小的调整

根据一个评论请求,我已经附上了相关的代码-为其黑客性质道歉,这一切在目前是相当实验性的

相关代码

    self.log('Beginning Training')
    # fix random seed for reproducibility
    numpy.random.seed(7)
    # load the dataset but only keep the top n words, zero the rest
    X,y = self.load_test_train_data(corpus_manager,sentiment_analyser)


    # truncate and pad input sequences
    max_review_length = 25
    self.log('Padding input vectors to length ', max_review_length)
    X = sequence.pad_sequences(X, maxlen=max_review_length)
    self.log('Padding complete.')
    # create the model
    embedding_vecor_length = 32



    self.log('Initialising model.')
    model = Sequential()

    model.add(LSTM(50,return_sequences=True,input_shape=(25, 301)))
    model.add(LSTM(50,return_sequences=True))
    model.add(LSTM(10))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',optimizer='Adam', metrics=['accuracy'])

    self.log(model.summary())
    self.log('Training model.')


    avg_score = 0


    y = numpy.array(y)


    X_train = None
    X_test = None

    for train_index, test_index in self.kf.split(X):
        #for train_index, test_index in zip(self.train_indicies, self.test_indicies):

        X_train = X[train_index]
        X_test = X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        self.log('Split into ', len(X_train), ' training and ', len(X_test),' testing events.')


        y_train = numpy.array(y_train)
        X_train = numpy.array(X_train)

        model.fit(X_train, y_train, epochs=250, batch_size=64,shuffle=True)#64
        # Final evaluation of the model
        scores = model.evaluate(X_test, y_test, verbose=1)

        avg_score += scores[1]

        self.model = model

        if scores[1] > self.model_score:  
            #so we know which set the final model was actually tested and trained over
            self.optimum_X_train, self.optimum_X_test = X_train, X_test
            self.model_saccuracy = scores[1]
            self.model = self.model
            self.optimum_y_train , self.optimum_y_test = y_train, y_test
还有这个功能

def load_test_train_data(self, corpus_manager,sentiment_analyser,
        input_database='../data/processed_articles/ndaq_articles.csv',
        company_names_csv='../data/SP_500_company_list.csv',
        company_history_list='../data/financial_trading_history/sp_trading_history.pkl',
        previously_transformed_articles='../data/previously_transformed_articles.pkl'
        ):
    """
    Retrieve a set of training and testing data from the corpus manager
    in prepration for training or validation. 
    Parameters
    ----------
    corpus_manager: A CorpusManager class already fitted with processed articles
    Returns
    -------
        X_train, y_train: Training feature space and target space data points 
        X_test, y_test: Orthogonal test feature space and target space data points to the training set
    """



    article_manager = ArticleManager(input_database=input_database,
                                    verbosity=0)  

    data  = pandas.read_csv(input_database, encoding = "ISO-8859-1") 
    data = data[ data.title.str.contains("UPDATE") == False]
    data = data[ data.title.str.contains("DIARY") == False]

    '''
    for text in articles:
        get_text_feautre_vector(self, article_text)
        for each vector get its sentiment
        def evalaute_article(self, article):
        append it to end of feature vector
    ''' 
    sent_model_class = sentiment_analyser

    X = []
    if not os.path.isfile(previously_transformed_articles):
        self.log(" Transforming and storing articles.")
        with tqdm(total=len( list( data['title'] )) ) as pbar:
            for index, row in data.iterrows():

                word_and_sent_vecs = corpus_manager.word2vec_and_sent(string = row['title'], sentiment_analyser = self.sentiment_analyser)


                X.append(word_and_sent_vecs)
                pbar.update(1)

        with open(previously_transformed_articles, 'wb') as handle:
            pickle.dump(X, handle, protocol=pickle.HIGHEST_PROTOCOL)
    else:
        with open(previously_transformed_articles, 'rb') as handle: 
            X = pickle.load(handle)


    hist = CompanyHistoryManager(article_manager.get_companies())
    hist.load_history(company_history_list)
    company_names_symbols  = pandas.read_csv(company_names_csv, encoding = "ISO-8859-1") 
    self.log(company_names_symbols)
    self.log(company_names_symbols.columns)

    Y=[] 
    if not os.path.isfile('stock_motion.pkl'):
        self.log(" Creating data on stock movements.")

        with tqdm(total=len( data.index )) as pbar:
            for _, row in data.iterrows():

                #sym = company_names_symbols.loc[company_names_symbols['Symbol'] == row['symbol']]
                stock_rose = hist.did_it_rise(row['date'], row['symbol'])
                #print(stock_rose)
                Y.append(stock_rose)

                pbar.update(1)

        with open('stock_motion.pkl', 'wb') as handle:
            pickle.dump(Y, handle, protocol=pickle.HIGHEST_PROTOCOL)

    else:
        with open('stock_motion.pkl', 'rb') as handle: 
            Y = pickle.load(handle)

    #print(Y)


    #remove all the nones!!!!!!!!!!!!!!!!!!!!!! do this more efficiently
    self.log('Removing faulty data (Where we could not obtain the stock movement data)')
    X_purged = []
    Y_purged = []
    count = 0
    count1 = 0
    with tqdm(total=len(X)) as pbar:
        for i,j in zip(X,Y):
            if j == 'Invalid':
                count += 1

            elif  j == 'Static':
                count1 += 1  
            else:
                X_purged.append(i)
                Y_purged.append(j)

            pbar.update(1)

    self.log(100*(count/len(X)),'%% of the data was removed due to an inability to obtain the data.')
    self.log(100*(count1/len(X)),'%% of the data was removed due to the resultant stock movement being zero.')
    X = X_purged
    Y = Y_purged

    # Separate the articles into test and train - we used a fixed random state
    # so that our results are the same between runs

    #X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

    self.kf = KFold(n_splits=5, random_state=42, shuffle=True)
最后,可能是错误的函数

def word2vec_and_sent(self, string, sentiment_analyser):

    w2v = self.w2v_model
    words = string.lower().split()
    vectors = []
    word_sent=[0]

    skip = False
    skip_count = 0
    for word in words:
        skip = False
        try:
            word_vec = w2v.wv[word]
        except:
            skip = True
            skip_count = skip_count + 1
        if not skip:

            if word in self.keywords:
                bm25_vec = self.get_text_feautre_vector(word)
                word_sent = sentiment_analyser.evalaute_article(np.array(bm25_vec).reshape(1, -1))
            else:
                a=0
                wordsent = [a]

            vec = word_vec.tolist()


            vec.append(word_sent[0])



            vectors.append(vec)
    return(vectors)

您能否分享您的实施细节?这听起来像是由实现错误引起的,除非您忽略了最后一个维度的一些基本缩放/预处理