Tensorflow Colab TPU在每个历元结束时不计算验证数据?
我正在用Colab TPU训练我的模特。但是我遇到了Tensorflow Colab TPU在每个历元结束时不计算验证数据?,tensorflow,google-colaboratory,tpu,Tensorflow,Google Colaboratory,Tpu,我正在用Colab TPU训练我的模特。但是我遇到了model.fit()API的一些令人困惑的行为。我遇到的问题只有在传递validation\u data参数时才会出现 情景1 返回错误: --------------------------------------------------------------------------- IndexError Traceback (most recent call last) &
model.fit()
API的一些令人困惑的行为。我遇到的问题只有在传递validation\u data
参数时才会出现
情景1
返回错误:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-20-f929303620ea> in <module>()
20 callbacks = [lr_callback],
21 validation_data = get_val_ds(),
---> 22 validation_steps = 100)
23
24 with open("trainHistoryDict", 'wb') as file_pi:
22 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/models.py in _clone_functional_model(model, input_tensors, layer_fn)
178 input_tensors = nest.flatten(input_tensors)
179 for i, input_tensor in enumerate(input_tensors):
--> 180 original_input_layer = model._input_layers[i]
181
182 # Cache input layer. Create a new layer if the tensor is originally not
IndexError: list index out of range
返回错误
KeyError: 'Failed to format this callback filepath: "model_{epoch:03d}-{loss:.8f}.h5". Reason: \'val_acc\'
此外,在GPU上进行训练时,每个历元都会显示验证丢失。但是,在培训TPU时,验证损失从未显示出来
根据3个观察结果,我怀疑Colab TPU上没有评估验证数据?对吗?有没有办法在每个历元结束时计算验证数据
====
更新:
我的数据管道代码如下:
batch_size = 16
gcs_path = "gs://my-bucket/"
train_pattern = gcs_path + "train/*.tfrecords"
train_fns = tf.io.gfile.glob(train_pattern)
val_pattern = gcs_path + "val/*.tfrecords"
val_fns = tf.io.gfile.glob(val_pattern)
def get_train_ds():
train_dataset = tf.data.TFRecordDataset(train_fns, num_parallel_reads=AUTO)
train_dataset = train_dataset.shuffle(buffer_size = len(train_fns), reshuffle_each_iteration = True)
train_dataset = train_dataset.map(parse_func)
train_dataset = train_dataset.batch(batch_size, drop_remainder = True).prefetch(batch_size)
return train_dataset
def get_val_ds():
val_dataset = tf.data.TFRecordDataset(val_fns, num_parallel_reads=AUTO)
val_dataset = val_dataset.map(parse_func)
val_dataset = val_dataset.batch(batch_size, drop_remainder = True).prefetch(AUTO)
return val_dataset
==更新
with tpu_strategy.scope(): # creating the model in the TPUStrategy scope means we will train the model on the TPU
model = create_model()
periodic_save = keras.callbacks.ModelCheckpoint('model_{epoch:03d}.h5',
save_weights_only=True, save_freq="epoch")
hist = model.fit(get_train_ds().repeat(),
steps_per_epoch = 100,
epochs = 5,
verbose = 1,
callbacks = [lr_callback],
validation_data = get_val_ds(),
validation_steps = 10)
错误消息
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-23-d088df3513e2> in <module>()
20 callbacks = [lr_callback],
21 validation_data = get_val_ds(),
---> 22 validation_steps = 100)
23
24 with open("trainHistoryDict", 'wb') as file_pi:
22 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_v1.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
807 max_queue_size=max_queue_size,
808 workers=workers,
--> 809 use_multiprocessing=use_multiprocessing)
810
811 def evaluate(self,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
668 steps_per_epoch=steps_per_epoch,
669 validation_steps=validation_steps,
--> 670 validation_freq=validation_freq)
671
672 return training_arrays.fit_loop(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_fit_loop(model, dataset, epochs, verbose, callbacks, initial_epoch, steps_per_epoch, val_dataset, validation_steps, validation_freq)
271 steps=validation_steps,
272 verbose=verbose,
--> 273 callbacks=callbacks)
274 if not isinstance(val_outs, list):
275 val_outs = [val_outs]
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_test_loop(model, dataset, verbose, steps, callbacks)
340 test_input_data = iterator.get_next()
341 per_replica_outputs = current_strategy.run(
--> 342 _test_step_fn, args=(test_input_data,))
343 output_tensors = {}
344 for label, output in zip(out_labels, per_replica_outputs):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in run(self, fn, args, kwargs, options)
344 fn = autograph.tf_convert(fn, autograph_ctx.control_status_ctx())
345 options = options or distribute_lib.RunOptions()
--> 346 return self.extended.tpu_run(fn, args, kwargs, options)
347
348 @property
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_run(self, fn, args, kwargs, options)
1093 def tpu_run(self, fn, args, kwargs, options=None):
1094 func = self._tpu_function_creator(fn, options)
-> 1095 return func(args, kwargs)
1096
1097 def _tpu_function_creator(self, fn, options):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_function(args, kwargs)
1160 device_assignment=self._device_assignment,
1161 maximum_shapes=maximum_shapes,
-> 1162 padding_spec=padding_spec)
1163
1164 # Remove all no ops that may have been added during 'tpu.replicate()'
/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in replicate(computation, inputs, infeed_queue, device_assignment, name, maximum_shapes, padding_spec)
913 name,
914 maximum_shapes=maximum_shapes,
--> 915 padding_spec=padding_spec)[1]
916
917
/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in split_compile_and_replicate(***failed resolving arguments***)
1378 vscope.set_custom_getter(custom_getter)
1379
-> 1380 outputs = computation(*computation_inputs)
1381
1382 vscope.set_use_resource(saved_use_resource)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in replicated_fn(replica_id, replica_args, replica_kwargs)
1122 """Wraps user function to provide replica ID and `Tensor` inputs."""
1123 with _TPUReplicaContext(strategy, replica_id_in_sync_group=replica_id):
-> 1124 result[0] = fn(*replica_args, **replica_kwargs)
1125 return result[0]
1126
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
530
531 if not options.user_requested and conversion.is_whitelisted(f):
--> 532 return _call_unconverted(f, args, kwargs, options)
533
534 # internal_convert_user_code is for example turned off when issuing a dynamic
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _test_step_fn(inputs)
331
332 (distribution_strategy_context.get_replica_context().merge_call(
--> 333 _build_model, args=(model, mode, inputs, targets)))
334
335 (_, outputs, updates, _) = _per_replica_execution_function(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in merge_call(self, merge_fn, args, kwargs)
2713 merge_fn = autograph.tf_convert(
2714 merge_fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
-> 2715 return self._merge_call(merge_fn, args, kwargs)
2716
2717 def _merge_call(self, merge_fn, args, kwargs):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in _merge_call(self, merge_fn, args, kwargs)
2720 distribution_strategy_context._CrossReplicaThreadMode(self._strategy)) # pylint: disable=protected-access
2721 try:
-> 2722 return merge_fn(self._strategy, *args, **kwargs)
2723 finally:
2724 _pop_per_thread_mode()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
273 def wrapper(*args, **kwargs):
274 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.UNSPECIFIED):
--> 275 return func(*args, **kwargs)
276
277 if inspect.isfunction(func) or inspect.ismethod(func):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _build_model(strategy, model, mode, inputs, targets)
55 else:
56 dist_utils._build_distributed_network(model, strategy, mode, inputs,
---> 57 targets)
58
59
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_distributed_network(model, strategy, mode, inputs, targets)
780 distributed_model = strategy.extended.call_for_each_replica(
781 _build_network_on_replica,
--> 782 args=(model, mode, inputs, targets))
783 set_distributed_model(model, mode, distributed_model)
784
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
2583 kwargs = {}
2584 with self._container_strategy().scope():
-> 2585 return self._call_for_each_replica(fn, args, kwargs)
2586
2587 def _call_for_each_replica(self, fn, args, kwargs):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in _call_for_each_replica(self, fn, args, kwargs)
741 # we're in a tpu.rewrite(), and update TPUMirroredVariable accordingly.
742 with _TPUReplicaContext(self._container_strategy()):
--> 743 return fn(*args, **kwargs)
744
745 @contextlib.contextmanager
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_network_on_replica(model, mode, inputs, targets)
740 else:
741 updated_model = models._clone_functional_model(
--> 742 model, input_tensors=inputs, layer_fn=models.share_weights)
743 # Callable losses added directly to a functional Model need to be added
744 # here.
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/models.py in _clone_functional_model(model, input_tensors, layer_fn)
178 input_tensors = nest.flatten(input_tensors)
179 for i, input_tensor in enumerate(input_tensors):
--> 180 original_input_layer = model._input_layers[i]
181
182 # Cache input layer. Create a new layer if the tensor is originally not
IndexError: list index out of range
---------------------------------------------------------------------------
索引器回溯(最后一次最近调用)
在()
20次回调=[lr_回调],
21验证\数据=获取\值\数据(),
--->22验证(步骤=100)
23
24打开(“trainHistoryDict”,“wb”)作为文件
22帧
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u v1.py in fit(self、x、y、批大小、历元、冗余、回调、验证拆分、验证数据、无序排列、类权重、样本权重、初始历元、每历元步数、验证步骤、验证频率、最大队列大小、工人、使用多处理、**kwargs)
807最大队列大小=最大队列大小,
808名工人=工人,
-->809使用\多处理=使用\多处理)
810
811 def评估(自我,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u distributed.py in fit(self、model、x、y、批大小、epoch、verbose、回调、验证拆分、验证数据、无序、类权重、样本权重、初始epoch、每epoch步数、验证步数、验证频率、**kwargs)
668步/u历元=步/u历元,
669验证步骤=验证步骤,
-->670验证频率=验证频率)
671
672返回训练\u数组.fit\u循环(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u distributed.py在实验性的\u-tpu\u-fit\u循环中(模型、数据集、历元、冗余、回调、初始历元、每历元步骤、val\u数据集、验证步骤、验证频率)
271步骤=验证步骤,
272详细=详细,
-->273回调=回调)
274如果不存在(val_out,列表):
275 val_outs=[val_outs]
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u distributed.py在实验性的测试循环中(模型、数据集、详细信息、步骤、回调)
340测试\输入\数据=迭代器。获取\下一步()
341个副本\u输出=当前\u策略.run(
-->342测试步骤fn,args=(测试输入数据)
343输出_张量={}
344对于标签,在zip中输出(输出标签,每个副本输出):
/运行中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(self、fn、args、kwargs、options)
344 fn=自动签名.tf\u转换(fn,自动签名\u ctx.control\u status\u ctx())
345选项=选项或分发库RunOptions()
-->346返回self.extended.tpu_运行(fn、args、kwargs、options)
347
348@property
/tpu运行中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(self、fn、args、kwargs、options)
1093 def tpu_运行(self、fn、args、kwargs、options=None):
1094 func=self.\u tpu\u函数\u创建者(fn,选项)
->1095返回函数(args,kwargs)
1096
1097 def_tpu_函数_创建者(自身、fn、选项):
/tpu_函数中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(args,kwargs)
1160设备分配=自身设备分配,
1161最大_形=最大_形,
->1162填充规格=填充规格)
1163
1164#删除“tpu.replicate()”期间可能添加的所有no ops
/复制中的usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py(计算、输入、馈入队列、设备分配、名称、最大形状、填充规格)
913姓名,
914最大_形=最大_形,
-->915填充规格=填充规格[1]
916
917
/拆分、编译和复制中的usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py(***解析参数失败***)
1378 vscope.set_custom_getter(custom_getter)
1379
->1380输出=计算(*计算输入)
1381
1382 vscope.设置使用资源(已保存使用资源)
/复制的fn中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(副本id、副本参数、副本kwargs)
1122“包装用户函数以提供副本ID和“Tensor”输入。”“”
1123带有_tpureplicationcontext(策略,同步组中的副本id=副本id):
->1124结果[0]=fn(*副本参数,**副本参数)
1125返回结果[0]
1126
/包装器中的usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py(*args,**kwargs)
253尝试:
254与转换_ctx:
-->255返回转换的呼叫(f、args、kwargs、options=options)
256例外情况为e:#pylint:disable=broad except
257如果hasattr(即“ag\u错误\u元数据”):
/转换调用中的usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py(f、args、kwargs、caller\u fn\u范围、选项)
530
531如果不是options.user,则请求转换。是否被列入白名单(f):
-->532返回-调用-未转换(f、ARG、kwargs、选项)
533
534#内部#转换#用户#代码在发出动态
/usr/local/lib/python3.7/dist-packages/tensorflow/python
with tpu_strategy.scope(): # creating the model in the TPUStrategy scope means we will train the model on the TPU
model = create_model()
periodic_save = keras.callbacks.ModelCheckpoint('model_{epoch:03d}.h5',
save_weights_only=True, save_freq="epoch")
hist = model.fit(get_train_ds().repeat(),
steps_per_epoch = 100,
epochs = 5,
verbose = 1,
callbacks = [lr_callback],
validation_data = get_val_ds(),
validation_steps = 10)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-23-d088df3513e2> in <module>()
20 callbacks = [lr_callback],
21 validation_data = get_val_ds(),
---> 22 validation_steps = 100)
23
24 with open("trainHistoryDict", 'wb') as file_pi:
22 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_v1.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
807 max_queue_size=max_queue_size,
808 workers=workers,
--> 809 use_multiprocessing=use_multiprocessing)
810
811 def evaluate(self,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
668 steps_per_epoch=steps_per_epoch,
669 validation_steps=validation_steps,
--> 670 validation_freq=validation_freq)
671
672 return training_arrays.fit_loop(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_fit_loop(model, dataset, epochs, verbose, callbacks, initial_epoch, steps_per_epoch, val_dataset, validation_steps, validation_freq)
271 steps=validation_steps,
272 verbose=verbose,
--> 273 callbacks=callbacks)
274 if not isinstance(val_outs, list):
275 val_outs = [val_outs]
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_test_loop(model, dataset, verbose, steps, callbacks)
340 test_input_data = iterator.get_next()
341 per_replica_outputs = current_strategy.run(
--> 342 _test_step_fn, args=(test_input_data,))
343 output_tensors = {}
344 for label, output in zip(out_labels, per_replica_outputs):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in run(self, fn, args, kwargs, options)
344 fn = autograph.tf_convert(fn, autograph_ctx.control_status_ctx())
345 options = options or distribute_lib.RunOptions()
--> 346 return self.extended.tpu_run(fn, args, kwargs, options)
347
348 @property
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_run(self, fn, args, kwargs, options)
1093 def tpu_run(self, fn, args, kwargs, options=None):
1094 func = self._tpu_function_creator(fn, options)
-> 1095 return func(args, kwargs)
1096
1097 def _tpu_function_creator(self, fn, options):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_function(args, kwargs)
1160 device_assignment=self._device_assignment,
1161 maximum_shapes=maximum_shapes,
-> 1162 padding_spec=padding_spec)
1163
1164 # Remove all no ops that may have been added during 'tpu.replicate()'
/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in replicate(computation, inputs, infeed_queue, device_assignment, name, maximum_shapes, padding_spec)
913 name,
914 maximum_shapes=maximum_shapes,
--> 915 padding_spec=padding_spec)[1]
916
917
/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in split_compile_and_replicate(***failed resolving arguments***)
1378 vscope.set_custom_getter(custom_getter)
1379
-> 1380 outputs = computation(*computation_inputs)
1381
1382 vscope.set_use_resource(saved_use_resource)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in replicated_fn(replica_id, replica_args, replica_kwargs)
1122 """Wraps user function to provide replica ID and `Tensor` inputs."""
1123 with _TPUReplicaContext(strategy, replica_id_in_sync_group=replica_id):
-> 1124 result[0] = fn(*replica_args, **replica_kwargs)
1125 return result[0]
1126
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
530
531 if not options.user_requested and conversion.is_whitelisted(f):
--> 532 return _call_unconverted(f, args, kwargs, options)
533
534 # internal_convert_user_code is for example turned off when issuing a dynamic
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _test_step_fn(inputs)
331
332 (distribution_strategy_context.get_replica_context().merge_call(
--> 333 _build_model, args=(model, mode, inputs, targets)))
334
335 (_, outputs, updates, _) = _per_replica_execution_function(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in merge_call(self, merge_fn, args, kwargs)
2713 merge_fn = autograph.tf_convert(
2714 merge_fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
-> 2715 return self._merge_call(merge_fn, args, kwargs)
2716
2717 def _merge_call(self, merge_fn, args, kwargs):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in _merge_call(self, merge_fn, args, kwargs)
2720 distribution_strategy_context._CrossReplicaThreadMode(self._strategy)) # pylint: disable=protected-access
2721 try:
-> 2722 return merge_fn(self._strategy, *args, **kwargs)
2723 finally:
2724 _pop_per_thread_mode()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
273 def wrapper(*args, **kwargs):
274 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.UNSPECIFIED):
--> 275 return func(*args, **kwargs)
276
277 if inspect.isfunction(func) or inspect.ismethod(func):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _build_model(strategy, model, mode, inputs, targets)
55 else:
56 dist_utils._build_distributed_network(model, strategy, mode, inputs,
---> 57 targets)
58
59
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_distributed_network(model, strategy, mode, inputs, targets)
780 distributed_model = strategy.extended.call_for_each_replica(
781 _build_network_on_replica,
--> 782 args=(model, mode, inputs, targets))
783 set_distributed_model(model, mode, distributed_model)
784
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
2583 kwargs = {}
2584 with self._container_strategy().scope():
-> 2585 return self._call_for_each_replica(fn, args, kwargs)
2586
2587 def _call_for_each_replica(self, fn, args, kwargs):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in _call_for_each_replica(self, fn, args, kwargs)
741 # we're in a tpu.rewrite(), and update TPUMirroredVariable accordingly.
742 with _TPUReplicaContext(self._container_strategy()):
--> 743 return fn(*args, **kwargs)
744
745 @contextlib.contextmanager
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_network_on_replica(model, mode, inputs, targets)
740 else:
741 updated_model = models._clone_functional_model(
--> 742 model, input_tensors=inputs, layer_fn=models.share_weights)
743 # Callable losses added directly to a functional Model need to be added
744 # here.
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/models.py in _clone_functional_model(model, input_tensors, layer_fn)
178 input_tensors = nest.flatten(input_tensors)
179 for i, input_tensor in enumerate(input_tensors):
--> 180 original_input_layer = model._input_layers[i]
181
182 # Cache input layer. Create a new layer if the tensor is originally not
IndexError: list index out of range