如何运行Tensorflow';在Kaggle笔记本的GPU上使用s Keras model.fit()函数?
我想在Kaggle提供的GPU上运行我的代码。虽然我能够在CPU上运行我的代码,但我想我无法将其正确迁移到Kaggle GPU上运行 关于运行这个如何运行Tensorflow';在Kaggle笔记本的GPU上使用s Keras model.fit()函数?,keras,gpu,kaggle,Keras,Gpu,Kaggle,我想在Kaggle提供的GPU上运行我的代码。虽然我能够在CPU上运行我的代码,但我想我无法将其正确迁移到Kaggle GPU上运行 关于运行这个 with tf.device("/device:GPU:0"): hist = model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), batch_size=25, epochs=20, callbacks=callbacks_list) 得到这个错误 ----------
with tf.device("/device:GPU:0"):
hist = model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), batch_size=25, epochs=20, callbacks=callbacks_list)
得到这个错误
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-28-cdb8b009cd85> in <module>
1 with tf.device("/device:GPU:0"):
----> 2 hist = model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), batch_size=25, epochs=20, callbacks=callbacks_list)
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 self._assert_compile_was_called()
818 self._check_call_args('evaluate')
--> 819
820 func = self._select_training_loop(x)
821 return func.evaluate(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
233
234 recreate_training_iterator = (
--> 235 training_data_adapter.should_recreate_iterator(steps_per_epoch))
236 if not steps_per_epoch:
237 # TODO(b/139762795): Add step inference for when steps is None to
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
591 class_weights=None,
592 shuffle=False,
--> 593 steps=None,
594 distribution_strategy=None,
595 max_queue_size=10,
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_inputs(model, mode, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)
704 """Provide a scope for running one batch."""
705 batch_logs = {'batch': step, 'size': size}
--> 706 self.callbacks._call_batch_hook(
707 mode, 'begin', step, batch_logs)
708 self.progbar.on_batch_begin(step, batch_logs)
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, sample_weight_modes, batch_size, epochs, steps, shuffle, **kwargs)
355 sample_weights = _process_numpy_inputs(sample_weights)
356
--> 357 # If sample_weights are not specified for an output use 1.0 as weights.
358 if (sample_weights is not None and
359 any([sw is None for sw in sample_weights])):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/data_adapter.py in slice_inputs(self, indices_dataset, inputs)
381 if steps and not batch_size:
382 batch_size = int(math.ceil(num_samples/steps))
--> 383
384 if not batch_size:
385 raise ValueError(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/ops/dataset_ops.py in from_tensors(tensors)
564 existing iterators.
565
--> 566 Args:
567 unused_dummy: Ignored value.
568
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/ops/dataset_ops.py in __init__(self, element)
2763 init_args: A nested structure representing the arguments to `init_func`.
2764 init_func: A TensorFlow function that will be called on `init_args` each
-> 2765 time a C++ iterator over this dataset is constructed. Returns a nested
2766 structure representing the "state" of the dataset.
2767 next_func: A TensorFlow function that will be called on the result of
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/util/structure.py in normalize_element(element)
111 ops.convert_to_tensor(t, name="component_%d" % i))
112 return nest.pack_sequence_as(element, normalized_components)
--> 113
114
115 def convert_legacy_structure(output_types, output_shapes, output_classes):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1312 return ret
1313 raise TypeError("%sCannot convert %r with type %s to Tensor: "
-> 1314 "no conversion function registered." %
1315 (_error_prefix(name), value, type(value)))
1316
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/tensor_conversion_registry.py in _default_conversion_function(***failed resolving arguments***)
50 def _default_conversion_function(value, dtype, name, as_ref):
51 del as_ref # Unused.
---> 52 return constant_op.constant(value, dtype, name=name)
53
54
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py in constant(value, dtype, shape, name)
256 return _eager_fill(shape.as_list(), t, ctx)
257 raise TypeError("Eager execution of tf.constant with unsupported shape "
--> 258 "(value has %d elements, shape is %s with %d elements)." %
259 (num_t, shape, shape.num_elements()))
260 g = ops.get_default_graph()
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
264 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
265 allow_broadcast=allow_broadcast))
--> 266 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
267 const_tensor = g.create_op(
268 "Const", [], [dtype_value.type],
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
94 dtype = dtypes.as_dtype(dtype).as_datatype_enum
95 ctx.ensure_initialized()
---> 96 return ops.EagerTensor(value, ctx.device_name, dtype)
97
98
RuntimeError: Can't copy Tensor with type string to device /job:localhost/replica:0/task:0/device:GPU:0.
---------------------------------------------------------------------------
运行时错误回溯(上次最近调用)
在里面
1与tf.device(“/device:GPU:0”):
---->2 hist=model.fit(x=x\u序列,y=y\u序列,验证数据=(x\u测试,y\u测试),批量大小=25,历代=20,回调=回调列表)
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training.py in-fit(self、x、y、批大小、历元、冗余、回调、验证拆分、验证数据、洗牌、类权重、样本权重、初始历元、每历元步数、验证步骤、验证频率、最大队列大小、工作人员、使用多处理、**kwargs)
817 self.\u assert\u compile\u被称为()
818自我检查呼叫参数(“评估”)
--> 819
820 func=自身。\选择\训练\循环(x)
821返回函数求值(
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training\u v2.py(self、model、x、y、批大小、历元、冗余、回调、验证拆分、验证数据、洗牌、类权重、样本权重、初始历元、每个历元的步骤、验证步骤、验证频率、最大队列大小、工作人员、使用多处理、**kwargs)
233
234重新创建\u训练\u迭代器=(
-->235训练数据适配器。是否应重新创建迭代器(每个历元的步骤))
236如果不是每个历元的步数:
237#TODO(b/139762795):在步骤为无时添加步骤推断
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training\u v2.py进程内\u训练\u输入(模型、x、y、批量大小、历元、样本权重、类权重、每历元步长、验证拆分、验证数据、验证步骤、洗牌、分布策略、最大队列大小、工人、使用多处理)
591类重量=无,
592 shuffle=False,
-->593步=无,
594分配策略=无,
595最大队列大小=10,
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/training\u v2.py in\u process\u输入(模型、模式、x、y、批量大小、历代、样本权重、类权重、无序排列、步骤、分布策略、最大队列大小、工人、使用多处理)
704“提供运行一个批处理的作用域。”“”
705批处理日志={'batch':步骤,'size':大小}
-->706.自我回调。\调用\批处理\钩子(
707模式,“开始”,步骤,批处理日志)
708批处理开始时的自编程栏(步骤,批处理日志)
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/data\u adapter.py in\uuuuuuuuu init\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
355样本权重=\过程\数量\输入(样本权重)
356
-->357#如果未为输出指定样本权重,则使用1.0作为权重。
358如果(样本)重量不是无,则
359任何([样本重量中sw的sw为无]):
/切片输入中的opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/keras/engine/data\u adapter.py(self、index\u数据集、输入)
381如果步骤而不是批次大小:
382批次大小=整数(数学单元(样本数/步数))
--> 383
384如果不是批量大小:
385上升值错误(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/ops/dataset_ops.py from_tensors(tensors)
564个现有迭代器。
565
-->566 Args:
567未使用的_伪:忽略值。
568
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/data/ops/dataset\u ops.py in uuuuuu init_uuuuuuu(self,element)
2763 init_args:一个嵌套结构,表示'init_func'的参数。
2764 init_func:将在'init_args'上调用的TensorFlow函数
> 2765次,在这个数据集上构造一个C++迭代器。返回一个嵌套的
2766表示数据集“状态”的结构。
2767 next_func:一个将在
/normalize_元素(element)中的opt/conda/lib/python3.6/site-packages/tensorflow_core/python/data/util/structure.py
111运算。将_转换为_张量(t,name=“component u%d”%i))
112返回嵌套。打包序列(元素、标准化组件)
--> 113
114
115 def convert_legacy_结构(输出_类型、输出_形状、输出_类):
/opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/framework/ops.py in convert\u to\u tensor(值、数据类型、名称、as\u ref、首选数据类型、数据类型提示、ctx、接受的结果类型)
1312返回ret
1313 raise TypeError(“%s”不能将类型为%s的%r转换为张量:”
->1314“未注册转换函数”。%
1315(_error_前缀(名称)、值、类型(值)))
1316
/函数中的opt/conda/lib/python3.6/site-packages/tensorflow\u core/python/framework/tensor\u conversion\u registry.py(***解析参数失败***)
50 def默认转换函数(值、数据类型、名称,作为参考):
51 del as_ref#未使用。
--->52返回常量\运算常量(值,数据类型,名称=名称)
53
54
/常量中的opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py(值、数据类型、形状、名称)
256返回\u急切\u填充(shape.as\u list(),t,ctx)
257 raise TypeError(“使用不支持的形状急切执行tf.constant”)
-->258“(值包含%d个元素,形状为%s,包含%d个元素)。”%
259(num\u t,shape,shape.num\u elements())
260 g=ops.get\u default\u graph()
/选择/反对
tf.debugging.set_log_device_placement(True)
try:
with tf.device('/device:XLA_GPU:0'):
X_train = tf.convert_to_tensor(x_train, dtype=tf.int32)
Y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
X_dev = tf.convert_to_tensor(x_val, dtype=tf.int32)
Y_dev = tf.convert_to_tensor(y_val, dtype=tf.float32)
_model = tf.keras.Model(review_input, preds)
opt = optimizers.Adam()
_model.compile(loss="mean_absolute_error", optimizer=opt, metrics=['acc'])
except RuntimeError as e:
print(e)
history=_model.fit(X_train, Y_train, epochs=100, batch_size=128, validation_data=(X_dev, Y_dev), verbose=1)