如何运行Tensorflow';在Kaggle笔记本的GPU上使用s Keras model.fit()函数?

如何运行Tensorflow';在Kaggle笔记本的GPU上使用s Keras model.fit()函数?,keras,gpu,kaggle,Keras,Gpu,Kaggle,我想在Kaggle提供的GPU上运行我的代码。虽然我能够在CPU上运行我的代码,但我想我无法将其正确迁移到Kaggle GPU上运行 关于运行这个 with tf.device("/device:GPU:0"): hist = model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), batch_size=25, epochs=20, callbacks=callbacks_list) 得到这个错误 ----------

我想在Kaggle提供的GPU上运行我的代码。虽然我能够在CPU上运行我的代码,但我想我无法将其正确迁移到Kaggle GPU上运行

关于运行这个

with tf.device("/device:GPU:0"):
hist = model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), batch_size=25, epochs=20, callbacks=callbacks_list)
得到这个错误

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-28-cdb8b009cd85> in <module>
      1 with tf.device("/device:GPU:0"):
----> 2     hist = model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), batch_size=25, epochs=20, callbacks=callbacks_list)

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817     self._assert_compile_was_called()
    818     self._check_call_args('evaluate')
--> 819 
    820     func = self._select_training_loop(x)
    821     return func.evaluate(

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    233 
    234       recreate_training_iterator = (
--> 235           training_data_adapter.should_recreate_iterator(steps_per_epoch))
    236       if not steps_per_epoch:
    237         # TODO(b/139762795): Add step inference for when steps is None to

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    591                     class_weights=None,
    592                     shuffle=False,
--> 593                     steps=None,
    594                     distribution_strategy=None,
    595                     max_queue_size=10,

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_inputs(model, mode, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    704     """Provide a scope for running one batch."""
    705     batch_logs = {'batch': step, 'size': size}
--> 706     self.callbacks._call_batch_hook(
    707         mode, 'begin', step, batch_logs)
    708     self.progbar.on_batch_begin(step, batch_logs)

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, sample_weight_modes, batch_size, epochs, steps, shuffle, **kwargs)
    355     sample_weights = _process_numpy_inputs(sample_weights)
    356 
--> 357     # If sample_weights are not specified for an output use 1.0 as weights.
    358     if (sample_weights is not None and
    359         any([sw is None for sw in sample_weights])):

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/data_adapter.py in slice_inputs(self, indices_dataset, inputs)
    381     if steps and not batch_size:
    382       batch_size = int(math.ceil(num_samples/steps))
--> 383 
    384     if not batch_size:
    385       raise ValueError(

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/ops/dataset_ops.py in from_tensors(tensors)
    564       existing iterators.
    565 
--> 566       Args:
    567         unused_dummy: Ignored value.
    568 

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/ops/dataset_ops.py in __init__(self, element)
   2763       init_args: A nested structure representing the arguments to `init_func`.
   2764       init_func: A TensorFlow function that will be called on `init_args` each
-> 2765         time a C++ iterator over this dataset is constructed. Returns a nested
   2766         structure representing the "state" of the dataset.
   2767       next_func: A TensorFlow function that will be called on the result of

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/util/structure.py in normalize_element(element)
    111               ops.convert_to_tensor(t, name="component_%d" % i))
    112   return nest.pack_sequence_as(element, normalized_components)
--> 113 
    114 
    115 def convert_legacy_structure(output_types, output_shapes, output_classes):

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
   1312     return ret
   1313   raise TypeError("%sCannot convert %r with type %s to Tensor: "
-> 1314                   "no conversion function registered." %
   1315                   (_error_prefix(name), value, type(value)))
   1316 

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/tensor_conversion_registry.py in _default_conversion_function(***failed resolving arguments***)
     50 def _default_conversion_function(value, dtype, name, as_ref):
     51   del as_ref  # Unused.
---> 52   return constant_op.constant(value, dtype, name=name)
     53 
     54 

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py in constant(value, dtype, shape, name)
    256         return _eager_fill(shape.as_list(), t, ctx)
    257     raise TypeError("Eager execution of tf.constant with unsupported shape "
--> 258                     "(value has %d elements, shape is %s with %d elements)." %
    259                     (num_t, shape, shape.num_elements()))
    260   g = ops.get_default_graph()

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    264           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
    265           allow_broadcast=allow_broadcast))
--> 266   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
    267   const_tensor = g.create_op(
    268       "Const", [], [dtype_value.type],

/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
     94       dtype = dtypes.as_dtype(dtype).as_datatype_enum
     95   ctx.ensure_initialized()
---> 96   return ops.EagerTensor(value, ctx.device_name, dtype)
     97 
     98 

RuntimeError: Can't copy Tensor with type string to device /job:localhost/replica:0/task:0/device:GPU:0.
---------------------------------------------------------------------------
运行时错误回溯(上次最近调用)
在里面
1与tf.device(“/device:GPU:0”):
---->2 hist=model.fit(x=x\u序列,y=y\u序列,验证数据=(x\u测试,y\u测试),批量大小=25,历代=20,回调=回调列表)
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training.py in-fit(self、x、y、批大小、历元、冗余、回调、验证拆分、验证数据、洗牌、类权重、样本权重、初始历元、每历元步数、验证步骤、验证频率、最大队列大小、工作人员、使用多处理、**kwargs)
817 self.\u assert\u compile\u被称为()
818自我检查呼叫参数(“评估”)
--> 819 
820 func=自身。\选择\训练\循环(x)
821返回函数求值(
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training\u v2.py(self、model、x、y、批大小、历元、冗余、回调、验证拆分、验证数据、洗牌、类权重、样本权重、初始历元、每个历元的步骤、验证步骤、验证频率、最大队列大小、工作人员、使用多处理、**kwargs)
233
234重新创建\u训练\u迭代器=(
-->235训练数据适配器。是否应重新创建迭代器(每个历元的步骤))
236如果不是每个历元的步数:
237#TODO(b/139762795):在步骤为无时添加步骤推断
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training\u v2.py进程内\u训练\u输入(模型、x、y、批量大小、历元、样本权重、类权重、每历元步长、验证拆分、验证数据、验证步骤、洗牌、分布策略、最大队列大小、工人、使用多处理)
591类重量=无,
592 shuffle=False,
-->593步=无,
594分配策略=无,
595最大队列大小=10,
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training\u v2.py in\u process\u输入(模型、模式、x、y、批量大小、历代、样本权重、类权重、无序排列、步骤、分布策略、最大队列大小、工人、使用多处理)
704“提供运行一个批处理的作用域。”“”
705批处理日志={'batch':步骤,'size':大小}
-->706.自我回调。\调用\批处理\钩子(
707模式,“开始”,步骤,批处理日志)
708批处理开始时的自编程栏(步骤,批处理日志)
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/data\u adapter.py in\uuuuuuuuu init\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
355样本权重=\过程\数量\输入(样本权重)
356
-->357#如果未为输出指定样本权重,则使用1.0作为权重。
358如果(样本)重量不是无,则
359任何([样本重量中sw的sw为无]):
/切片输入中的opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/data\u adapter.py(self、index\u数据集、输入)
381如果步骤而不是批次大小:
382批次大小=整数(数学单元(样本数/步数))
--> 383 
384如果不是批量大小:
385上升值错误(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/ops/dataset_ops.py from_tensors(tensors)
564个现有迭代器。
565
-->566 Args:
567未使用的_伪:忽略值。
568
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/data/ops/dataset\u ops.py in uuuuuu init_uuuuuuu(self,element)
2763 init_args:一个嵌套结构,表示'init_func'的参数。
2764 init_func:将在'init_args'上调用的TensorFlow函数
> 2765次,在这个数据集上构造一个C++迭代器。返回一个嵌套的
2766表示数据集“状态”的结构。
2767 next_func:一个将在
/normalize_元素(element)中的opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/util/structure.py
111运算。将_转换为_张量(t,name=“component u%d”%i))
112返回嵌套。打包序列(元素、标准化组件)
--> 113 
114
115 def convert_legacy_结构(输出_类型、输出_形状、输出_类):
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/framework/ops.py in convert\u to\u tensor(值、数据类型、名称、as\u ref、首选数据类型、数据类型提示、ctx、接受的结果类型)
1312返回ret
1313 raise TypeError(“%s”不能将类型为%s的%r转换为张量:”
->1314“未注册转换函数”。%
1315(_error_前缀(名称)、值、类型(值)))
1316
/函数中的opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/framework/tensor\u conversion\u registry.py(***解析参数失败***)
50 def默认转换函数(值、数据类型、名称,作为参考):
51 del as_ref#未使用。
--->52返回常量\运算常量(值,数据类型,名称=名称)
53
54
/常量中的opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py(值、数据类型、形状、名称)
256返回\u急切\u填充(shape.as\u list(),t,ctx)
257 raise TypeError(“使用不支持的形状急切执行tf.constant”)
-->258“(值包含%d个元素,形状为%s,包含%d个元素)。”%
259(num\u t,shape,shape.num\u elements())
260 g=ops.get\u default\u graph()
/选择/反对
tf.debugging.set_log_device_placement(True)

try:
    with tf.device('/device:XLA_GPU:0'):
        X_train = tf.convert_to_tensor(x_train, dtype=tf.int32)
        Y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
        X_dev = tf.convert_to_tensor(x_val, dtype=tf.int32)
        Y_dev = tf.convert_to_tensor(y_val, dtype=tf.float32)
        _model = tf.keras.Model(review_input, preds)
        opt = optimizers.Adam()
        _model.compile(loss="mean_absolute_error", optimizer=opt, metrics=['acc'])
except RuntimeError as e:
  print(e)
history=_model.fit(X_train, Y_train, epochs=100, batch_size=128, validation_data=(X_dev, Y_dev), verbose=1)