Python Tensorflow\u输入\u fn速度慢、CPU/GPU不足

Python Tensorflow\u输入\u fn速度慢、CPU/GPU不足,python,pandas,numpy,tensorflow,tensorflow-gpu,Python,Pandas,Numpy,Tensorflow,Tensorflow Gpu,我正在按照Tensorflow wide and deep教程()中的框架开发一个广泛而深入的模型。当以旧的方式(从pandas加载整个数据集、转换为张量、输入_fn)构建模型时,模型运行良好,这对于在CPU上运行来说是正常的。但是,要使其在GPU上工作,数据集太大,无法装入GPU内存,因此需要批处理 我试着使用pandas_input_fn将数据批处理到视频卡上,注意到在准备下一批数据时,我会出现活动高峰,然后是长时间的休息。奇怪的是,即使我在只有CPU的机器上运行它,这种情况也会发生。停顿的

我正在按照Tensorflow wide and deep教程()中的框架开发一个广泛而深入的模型。当以旧的方式(从pandas加载整个数据集、转换为张量、输入_fn)构建模型时,模型运行良好,这对于在CPU上运行来说是正常的。但是,要使其在GPU上工作,数据集太大,无法装入GPU内存,因此需要批处理

我试着使用pandas_input_fn将数据批处理到视频卡上,注意到在准备下一批数据时,我会出现活动高峰,然后是长时间的休息。奇怪的是,即使我在只有CPU的机器上运行它,这种情况也会发生。停顿的长度几乎完全相同,因此这不仅仅是视频卡在一个简单的模型中以比proc更快的速度被压碎。它似乎总是在等待开始加载下一批,直到最后一批完成。我增加了模型的复杂性,以确保它不太容易计算,并且仍然存在相同的问题。我尝试过增加分配给pandas_input_fn的线程数量,我尝试过将队列大小增加到比看起来合理的大得多(10倍数据集大小),这有点帮助,但不多。我不确定是排队还是退队时出现了减速,但经过一周的故障排除,我一直无法解决这个问题

我处理的数据是117列,400k行。我创建了一个通用脚本,它生成假值来模拟问题。然而,伪列远远少于真实列,因此步骤之间的间隔虽然没有真实列那么长,但仍然很明显。 代码如下:

import tensorflow as tf
import pandas as pd
import numpy as np
import logging
import time
import datetime
import tempfile
from math import log2
from sqlalchemy import create_engine
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.externals import joblib
from tensorflow.contrib.learn.python.learn import monitors as monitor_lib


logging.basicConfig(format='%(asctime)s - %(levelname)s: %(message)s', level=logging.INFO, filename='Classifier.log', filemode='a')
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
logging.debug('Main method started')
tf.logging.set_verbosity(tf.logging.INFO)
start_time = time.perf_counter()
logging.info(datetime.datetime.today())

#  sess = tf.Session()

model_dir = tempfile.mkdtemp()

LABEL_COLUMN = "t"

CATEGORICAL_COLUMNS = ["a", "b", "c", "d", "e"]

CONTINUOUS_COLUMNS = ["f", "g", "h", "i"]

ALL_COLUMNS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "u", "v", "x", "y", "z"]

full_set = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "u", "v", "x", "y", "z", "t"]


model_type = ""

def input_fn(num_epochs=None, shuffle=False):
    df = pd.DataFrame(np.random.randint(0,1,size=(400000, 25)), columns=full_set)
    df.fillna(0, inplace=True)
    df[CATEGORICAL_COLUMNS].round(0)
    dfindex = df.index    
    df.reset_index(inplace=True)
    df.reindex(range(len(df)))
    df.loc[:,CATEGORICAL_COLUMNS] = df.loc[:,CATEGORICAL_COLUMNS].astype(int)
    df.loc[:,CONTINUOUS_COLUMNS] = df.loc[:,CONTINUOUS_COLUMNS].astype(float)
    with tf.device('/CPU:0'):
        return tf.estimator.inputs.pandas_input_fn(
                x=df[ALL_COLUMNS],
            y=df[LABEL_COLUMN],
            batch_size=100000,
            num_epochs=num_epochs,
            shuffle=shuffle,
            num_threads=4,
            queue_capacity=400000,
            target_column=LABEL_COLUMN)

def evaluation_input_fn(num_epochs=None, shuffle=False):
    df = pd.DataFrame(np.random.randint(0,1,size=(200000, 25)), columns=full_set)
    df.fillna(0, inplace=True)
    df[CATEGORICAL_COLUMNS].round(0)
    dfindex = df.index    
    df.reset_index(inplace=True)
    df.reindex(range(len(df)))
    df.loc[:,CATEGORICAL_COLUMNS] = df.loc[:,CATEGORICAL_COLUMNS].astype(int)
    df.loc[:,CONTINUOUS_COLUMNS] = df.loc[:,CONTINUOUS_COLUMNS].astype(float)

    return tf.estimator.inputs.pandas_input_fn(
            x=df.loc[:, ALL_COLUMNS],
            y=df.loc[:, LABEL_COLUMN],
            batch_size=200000,
            num_epochs=num_epochs,
            shuffle=shuffle,
            num_threads=1,
            target_column=LABEL_COLUMN)

def classifier(model_dir):
    with tf.device('/CPU:0'):
        logging.info('Parsing continuous columns into tensors')
        with tf.variable_scope("Continuous_Features") as scope:
            continuous_columns = [tf.feature_column.numeric_column(k) for k in CONTINUOUS_COLUMNS]

        logging.info('Parsing categorical columns into tensors')
        with tf.variable_scope("Categorical_Features") as scope:
            categorical_columns = [tf.feature_column.categorical_column_with_hash_bucket(k, hash_bucket_size=1000, dtype=tf.int32) for k in CATEGORICAL_COLUMNS]

        embedded_columns = []
        logging.info('Creating embedded columns')
        with tf.variable_scope("Embedded_Columns") as scope:
            for i in range(len(categorical_columns)):
                embedded_columns.append(tf.feature_column.embedding_column(categorical_columns[i], dimension=8))

        logging.info('Bucketizing age')
        categorical_columns.append(tf.feature_column.bucketized_column(continuous_columns[0], boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75]))

        logging.info('Creating crossed columns')
        crossed_columns = [tf.feature_column.crossed_column(["a", "b"],
                                                             hash_bucket_size=int(1e4)),
                           tf.feature_column.crossed_column(["a", "c"],
                                                             hash_bucket_size=int(1e4)),
                           tf.feature_column.crossed_column(["a", "g"],
                                                             hash_bucket_size=int(1e4)),
                           tf.feature_column.crossed_column(["b", "n"],
                                                             hash_bucket_size=int(1e4)),
                           tf.feature_column.crossed_column(["m", "v"],
                                                             hash_bucket_size=int(1e4)),
                           tf.feature_column.crossed_column(["a", "m", "v"],
                                                             hash_bucket_size=int(1e6))]

        wide_columns = categorical_columns + crossed_columns

        deep_columns = continuous_columns + embedded_columns

    logging.info('Creating Classifier')
    with tf.variable_scope('Wide_and_Deep') as scope:
            clf = tf.estimator.DNNLinearCombinedClassifier(linear_feature_columns=wide_columns,
                                                               dnn_feature_columns=deep_columns,
                                                               dnn_hidden_units=[400, 200, 300],
                                                               n_classes=2,
                                                               config=tf.contrib.learn.RunConfig(log_device_placement=True,
                                                                                                  save_summary_steps=100,
                                                                                                  save_checkpoints_steps=100,
                                                                                                  keep_checkpoint_max=5,
                                                                                                  model_dir=model_dir,
                                                                                                  num_cores=0,
                                                                                                  gpu_memory_fraction=1,
                                                                                                  tf_random_seed=3))

    return clf


def build_model(model_dir, train_steps):
    model_dir = tempfile.mkdtemp() if not model_dir else model_dir
    logging.info('Declaring and training classifier')
    clf = classifier(model_dir=model_dir)    
    clf.train(input_fn=input_fn(), max_steps=train_steps) #  , max_steps=2000, monitors=[validation_monitor], hooks=hook
    logging.info('Starting model evaluation')
    results = clf.evaluate(input_fn=evaluation_input_fn(), steps=1)
    logging.debug(results)
    for i in results:
        print(i, results[i])

    return clf


def main():
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=True)
    config.gpu_options.allow_growth = True
    build_model(model_dir='C://TFLogs//DWDevGPU', train_steps=20)

if __name__ == '__main__':
    main()
我真的很想得到一些帮助,弄清楚如何将表格(sql)数据以足够快的速度批处理到CPU/GPU,以跟上计算机的速度。不一定非得是熊猫或裸体,我会选择任何合理的。到目前为止,我还没有尝试序列化到tfrecord,因为认为从光盘读取比从内存读取快似乎很愚蠢,但我会尝试任何方法。提前感谢您的帮助