Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/tensorflow/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Tensorflow XLA挂起GPU和CPU在0%使用的张量流_Tensorflow_Xla - Fatal编程技术网

Tensorflow XLA挂起GPU和CPU在0%使用的张量流

Tensorflow XLA挂起GPU和CPU在0%使用的张量流,tensorflow,xla,Tensorflow,Xla,我想尝试从源代码构建的支持XLA编译器的TyoSoFL1.1.0的XLA后端。我也在使用Ubuntu 16.04。没有XLA后端,我的模型运行良好。在我的GTX 1080 GPU上计算单个训练步骤大约需要0.8秒。然而,当我启用XLA编译器时,它使我达到了第一个调用SeStudio的点。在我的模型中运行,然后挂在那里。我的CPU和GPU的使用率大约为零 config = tf.ConfigProto() config.graph_options.optimizer_options.global_

我想尝试从源代码构建的支持XLA编译器的TyoSoFL1.1.0的XLA后端。我也在使用Ubuntu 16.04。没有XLA后端,我的模型运行良好。在我的GTX 1080 GPU上计算单个训练步骤大约需要0.8秒。然而,当我启用XLA编译器时,它使我达到了第一个调用SeStudio的点。在我的模型中运行,然后挂在那里。我的CPU和GPU的使用率大约为零

config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
sess = tf.Session(config = config)
m = model.CharacterTranslator(sess, MAX_LENGTH)
m.init_variables()

best_cost = None
m.restore('/media/chase/98d61322-9ea7-473e-b835-8739c77d1e1e/Translator/model.chk')
while True:
    #session.run is called inside of here
    m.train(random.sample(training_data, 40000), 64, False)
    c = m.train(validation_data, 64, True)[0]
    if best_cost is None or c < best_cost:
        count = 0
        best_cost = c
        print('Saving...')
        m.save('/media/chase/98d61322-9ea7-473e-b835-8739c77d1e1e/Translator/model.chk')
    else:
        count += 1
        if count == 10:
            break


...
    def train(self, training_data, batch_size, validate = False, verbose = True):
            total_cost = 0
            total_acc = 0
            total_time = 0
            last_chars = 0
            total_batches = len(training_data) // batch_size
            for i, batch in enumerate(_batch(training_data, batch_size, False)):
                x, y = zip(*batch)
                x, xl = zip(*[self._vectorize_sent(s) for s in x])
                y, yl = zip(*[self._vectorize_sent(s) for s in y])
                start_time = time.time()
                c, a, g, l, _ = self.session.run((self.cost, self.accuracy, self.global_step, self.learning_rate, self.null_train_step if validate else self.train_step), {
                    self.source_text: x,
                    self.target_text: y,
                    self.target_length: yl,
                    self.teacher_forcing: True,
                })
                end_time = time.time()
                total_cost += c
                total_acc += a
                total_time += end_time - start_time
                if verbose:
                    msg = '%s b(%d / %d) g(%d) c(%e) a(%0.4f) lr(%e) dt(%0.2f)' % ('Validating' if validate else 'Training', i, total_batches, g, total_cost / (i + 1), total_acc / (i + 1), l, total_time / (i + 1))
                    msg += ' ' * max(0, last_chars - len(msg))
                    last_chars = len(msg)
                    print(msg, end = '\r')
            if verbose: 
                print()
            return total_cost / (i + 1), total_acc / (i + 1)
config=tf.ConfigProto()
config.graph\u options.options\u options.global\u jit\u level=tf.OptimizerOptions.ON\u 1
sess=tf.Session(config=config)
m=型号.字符转换器(sess,最大长度)
m、 init_变量()
最佳成本=无
m、 还原(“/media/chase/98d61322-9ea7-473e-b835-8739c77d1e1e/Translator/model.chk”)
尽管如此:
#在此处内部调用session.run
m、 训练(随机样本(训练数据,40000),64,假)
c=m.train(验证数据,64,真)[0]
如果最佳成本为无或c<最佳成本:
计数=0
最佳成本=c
打印('保存…')
m、 保存(“/media/chase/98d61322-9ea7-473e-b835-8739c77d1e1e/Translator/model.chk”)
其他:
计数+=1
如果计数=10:
打破
...
def序列(自我、训练数据、批次大小、验证=假、详细=真):
总成本=0
总成本=0
总时间=0
最后字符=0
批次总数=len(训练数据)//批次大小
对于i,枚举中的批(_batch(训练数据,批大小,False)):
x、 y=zip(*批次)
x、 xl=zip(*[self.\u矢量化\u为x中的s发送])
y、 yl=zip(*[self.\u矢量化\u为y中的s发送])
开始时间=time.time()
c、 a,g,l,u=self.session.run((self.cost,self.accurity,self.global_step,self.learning_rate,self.null_train_step if validate else self.train_step){
self.source_text:x,
self.target_text:y,
自我目标长度:yl,
老师:是的,
})
结束时间=time.time()
总成本+=c
总加速度+=a
总时间+=结束时间-开始时间
如果冗长:
msg='%s b(%d/%d)g(%d)c(%e)a(%0.4f)lr(%e)dt(%0.2f)%('Validating'如果validate else'Training',i,总批次,g,总成本/(i+1),总配件/(i+1),l,总时间/(i+1))
msg+=''*max(0,最后字符长度(msg))
最后字符=len(msg)
打印(消息,结束='\r')
如果冗长:
打印()
返回总成本/(i+1),总成本/(i+1)
当我尝试运行它时,它会生成以下tensorflow输出

2017-04-26 05:15:58.421388: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2017-04-26 05:15:58.421698: I tensorflow/core/common_runtime/gpu/gpu_device.cc:887] Found device 0 with properties: 
name: GeForce GTX 1080
major: 6 minor: 1 memoryClockRate (GHz) 1.7335
pciBusID 0000:01:00.0
Total memory: 7.92GiB
Free memory: 7.33GiB
2017-04-26 05:15:58.421708: I tensorflow/core/common_runtime/gpu/gpu_device.cc:908] DMA: 0 
2017-04-26 05:15:58.421711: I tensorflow/core/common_runtime/gpu/gpu_device.cc:918] 0:   Y 
2017-04-26 05:15:58.421719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080, pci bus id: 0000:01:00.0)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gradients_impl.py:93: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
2017-04-26 05:17:17.107616: I tensorflow/compiler/xla/service/platform_util.cc:58] platform CUDA present with 1 visible devices
2017-04-26 05:17:17.107635: I tensorflow/compiler/xla/service/platform_util.cc:58] platform Host present with 8 visible devices
2017-04-26 05:17:17.108265: I tensorflow/compiler/xla/service/service.cc:183] XLA service 0xa103840 executing computations on platform Host. Devices:
2017-04-26 05:17:17.108274: I tensorflow/compiler/xla/service/service.cc:191]   StreamExecutor device (0): <undefined>, <undefined>
2017-04-26 05:17:17.108393: I tensorflow/compiler/xla/service/platform_util.cc:58] platform CUDA present with 1 visible devices
2017-04-26 05:17:17.108398: I tensorflow/compiler/xla/service/platform_util.cc:58] platform Host present with 8 visible devices
2017-04-26 05:17:17.108602: I tensorflow/compiler/xla/service/service.cc:183] XLA service 0xe383100 executing computations on platform CUDA. Devices:
2017-04-26 05:17:17.108607: I tensorflow/compiler/xla/service/service.cc:191]   StreamExecutor device (0): GeForce GTX 1080, Compute Capability 6.1
2017-04-26 05:15:58.421388:I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:901]从SysFS读取的成功NUMA节点的值为负值(-1),但必须至少有一个NUMA节点,因此返回NUMA节点零
2017-04-26 05:15:58.421698:I tensorflow/core/common_runtime/gpu/gpu_device.cc:887]找到了具有以下属性的设备0:
名称:GeForce GTX 1080
大调:6小调:1记忆时钟频率(GHz)1.7335
pciBusID 0000:01:00.0
总内存:7.92GiB
可用内存:7.33GiB
2017-04-26 05:15:58.421708:I tensorflow/core/common_runtime/gpu/gpu_device.cc:908]DMA:0
2017-04-26 05:15:58.421711:I tensorflow/core/common_runtime/gpu/gpu_device.cc:918]0:Y
2017-04-26 05:15:58.421719:I tensorflow/core/common_runtime/gpu/gpu_device.cc:977]创建tensorflow设备(/gpu:0)->(设备:0,名称:GeForce GTX 1080,pci总线id:0000:01:00.0)
/usr/local/lib/python3.5/dist packages/tensorflow/python/ops/gradients_impl.py:93:UserWarning:将稀疏索引转换为未知形状的密集张量。这可能会消耗大量内存。
“将稀疏索引转换为未知形状的稠密张量。”
2017-04-26 05:17:17.107616:I tensorflow/compiler/xla/service/platform_util.cc:58]平台CUDA显示1个可见设备
2017-04-26 05:17:17.107635:I tensorflow/compiler/xla/service/platform_util.cc:58]平台主机上有8个可见设备
2017~04-2605:17:17.108265:I TysFult/Cyp/XLA/Service / Service .CC:183)XLA服务0xA10840在平台主机上执行计算。设备:
2017-04-26 05:17:17.108274:I tensorflow/compiler/xla/service/service.cc:191]流执行器设备(0):,
2017-04-26 05:17:17.108393:I tensorflow/compiler/xla/service/platform_util.cc:58]平台CUDA提供1个可见设备
2017-04-26 05:17:17.108398:I tensorflow/compiler/xla/service/platform_util.cc:58]平台主机上有8个可见设备
2017-04-2605:17: 17.108602:I TysFult/Cys/XLA/Service / Service .CC:183)XLA服务0XE33100在CUDA平台上执行计算。设备:
2017-04-26 05:17:17.108607:I tensorflow/compiler/xla/service/service.cc:191]StreamExecutor设备(0):GeForce GTX 1080,计算能力6.1
然后,我将gdb附加到该流程,以查看它在做什么。看起来它只是坐在pthread条件等待上

#0  pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1  0x00007f715569291c in std::condition_variable::wait(std::unique_lock<std::mutex>&) ()
   from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2  0x00007f716d85257b in tensorflow::DirectSession::WaitForNotification(tensorflow::Notification*, long long) ()
   from /usr/local/lib/python3.5/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#3  0x00007f716d85262d in tensorflow::DirectSession::WaitForNotification(tensorflow::DirectSession::RunState*, tensorflow::CancellationManager*, long long) ()
   from /usr/local/lib/python3.5/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#4  0x00007f716d85d287 in tensorflow::DirectSession::Run(tensorflow::RunOptions const&, std::vector<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tensorflow::Tensor>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tensorflow::Tensor> > > const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<tensorflow::Tensor, std::allocator<tensorflow::Tensor> >*, tensorflow::RunMetadata*) ()
   from /usr/local/lib/python3.5/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#5  0x00007f716c3259d1 in TF_Run_Helper(tensorflow::Session*, char const*, TF_Buffer const*, std::vector<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tensorflow::Tensor>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tensorflow::Tensor> > > const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, TF_Tensor**, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, TF_Buffer*, TF_Status*) [clone .constprop.554] ()
   from /usr/local/lib/python3.5/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#6  0x00007f716c32639a in TF_Run ()
   from /usr/local/lib/python3.5/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#7  0x00007f716c0ab351 in tensorflow::TF_Run_wrapper_helper(TF_DeprecatedSession*, char const*, TF_Buffer const*, _object*, tensorflow::gtl::InlinedVector<char const*, 8> const&, tensorflow::gtl::InlinedVector<char const*, 8> const&, TF_Status*, tensorflow::gtl::InlinedVector<_object*, 8>*, TF_Buffer*) ()
#0 pthread_cond_wait@@GLIBC_2.3.2()位于../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#std::condition_variable::wait(std::unique_lock&)中的1 0x00007f715569291c
来自/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#tensorflow::DirectSession::WaitForNotification中的2 0x00007f716d85257b(tensorflow::Notification*,long-long)()
来自/usr/local/lib/python3.5/dist-packages/tensorflow/python//u pywrap\u tensorflow\u internal.so
#tensorflow::DirectSession::WaitForNotification中的3 0x00007f716d85262d(tensorflow::DirectSession::RunState*,tensorflow::CancellationManager*,long-long)()
来自/usr/local/lib/python3.5/dist-packages/tensorflow/python//u pywrap\u tensorflow\u internal.so
#tensorflow::DirectS中的4 0x00007f716d85d287