Tensorflow Colab TPU在每个历元结束时不计算验证数据?

Tensorflow Colab TPU在每个历元结束时不计算验证数据?,tensorflow,google-colaboratory,tpu,Tensorflow,Google Colaboratory,Tpu,我正在用Colab TPU训练我的模特。但是我遇到了model.fit()API的一些令人困惑的行为。我遇到的问题只有在传递validation\u data参数时才会出现 情景1 返回错误: --------------------------------------------------------------------------- IndexError Traceback (most recent call last) &

我正在用Colab TPU训练我的模特。但是我遇到了
model.fit()
API的一些令人困惑的行为。我遇到的问题只有在传递
validation\u data
参数时才会出现

情景1 返回错误:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-20-f929303620ea> in <module>()
     20                     callbacks = [lr_callback],
     21                     validation_data = get_val_ds(),
---> 22                     validation_steps = 100)
     23 
     24 with open("trainHistoryDict", 'wb') as file_pi:

22 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/models.py in _clone_functional_model(model, input_tensors, layer_fn)
    178     input_tensors = nest.flatten(input_tensors)
    179     for i, input_tensor in enumerate(input_tensors):
--> 180       original_input_layer = model._input_layers[i]
    181 
    182       # Cache input layer. Create a new layer if the tensor is originally not

IndexError: list index out of range
返回错误

KeyError: 'Failed to format this callback filepath: "model_{epoch:03d}-{loss:.8f}.h5". Reason: \'val_acc\'
此外,在GPU上进行训练时,每个历元都会显示验证丢失。但是,在培训TPU时,验证损失从未显示出来

根据3个观察结果,我怀疑Colab TPU上没有评估验证数据?对吗?有没有办法在每个历元结束时计算验证数据

==== 更新:

我的数据管道代码如下:

batch_size = 16
gcs_path = "gs://my-bucket/"

train_pattern = gcs_path + "train/*.tfrecords"
train_fns = tf.io.gfile.glob(train_pattern)

val_pattern = gcs_path + "val/*.tfrecords"
val_fns = tf.io.gfile.glob(val_pattern)


def get_train_ds():

    train_dataset = tf.data.TFRecordDataset(train_fns, num_parallel_reads=AUTO)
    train_dataset = train_dataset.shuffle(buffer_size = len(train_fns), reshuffle_each_iteration = True)
    train_dataset = train_dataset.map(parse_func)
    train_dataset = train_dataset.batch(batch_size, drop_remainder = True).prefetch(batch_size)
    
    return train_dataset

def get_val_ds():
    val_dataset = tf.data.TFRecordDataset(val_fns, num_parallel_reads=AUTO)
    val_dataset = val_dataset.map(parse_func)
    val_dataset = val_dataset.batch(batch_size, drop_remainder = True).prefetch(AUTO)

    return val_dataset
==更新

with tpu_strategy.scope(): # creating the model in the TPUStrategy scope means we will train the model on the TPU

    model = create_model()
    periodic_save = keras.callbacks.ModelCheckpoint('model_{epoch:03d}.h5', 
                                 save_weights_only=True, save_freq="epoch")


    hist = model.fit(get_train_ds().repeat(), 
                steps_per_epoch = 100,
                epochs = 5,
                verbose = 1,
                callbacks = [lr_callback],
                validation_data = get_val_ds(),
                validation_steps = 10)
错误消息

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-23-d088df3513e2> in <module>()
     20                     callbacks = [lr_callback],
     21                     validation_data = get_val_ds(),
---> 22                     validation_steps = 100)
     23 
     24 with open("trainHistoryDict", 'wb') as file_pi:

22 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_v1.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    807         max_queue_size=max_queue_size,
    808         workers=workers,
--> 809         use_multiprocessing=use_multiprocessing)
    810 
    811   def evaluate(self,

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    668             steps_per_epoch=steps_per_epoch,
    669             validation_steps=validation_steps,
--> 670             validation_freq=validation_freq)
    671 
    672     return training_arrays.fit_loop(

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_fit_loop(model, dataset, epochs, verbose, callbacks, initial_epoch, steps_per_epoch, val_dataset, validation_steps, validation_freq)
    271           steps=validation_steps,
    272           verbose=verbose,
--> 273           callbacks=callbacks)
    274       if not isinstance(val_outs, list):
    275         val_outs = [val_outs]

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_test_loop(model, dataset, verbose, steps, callbacks)
    340   test_input_data = iterator.get_next()
    341   per_replica_outputs = current_strategy.run(
--> 342       _test_step_fn, args=(test_input_data,))
    343   output_tensors = {}
    344   for label, output in zip(out_labels, per_replica_outputs):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in run(self, fn, args, kwargs, options)
    344     fn = autograph.tf_convert(fn, autograph_ctx.control_status_ctx())
    345     options = options or distribute_lib.RunOptions()
--> 346     return self.extended.tpu_run(fn, args, kwargs, options)
    347 
    348   @property

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_run(self, fn, args, kwargs, options)
   1093   def tpu_run(self, fn, args, kwargs, options=None):
   1094     func = self._tpu_function_creator(fn, options)
-> 1095     return func(args, kwargs)
   1096 
   1097   def _tpu_function_creator(self, fn, options):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_function(args, kwargs)
   1160             device_assignment=self._device_assignment,
   1161             maximum_shapes=maximum_shapes,
-> 1162             padding_spec=padding_spec)
   1163 
   1164       # Remove all no ops that may have been added during 'tpu.replicate()'

/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in replicate(computation, inputs, infeed_queue, device_assignment, name, maximum_shapes, padding_spec)
    913       name,
    914       maximum_shapes=maximum_shapes,
--> 915       padding_spec=padding_spec)[1]
    916 
    917 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in split_compile_and_replicate(***failed resolving arguments***)
   1378       vscope.set_custom_getter(custom_getter)
   1379 
-> 1380       outputs = computation(*computation_inputs)
   1381 
   1382       vscope.set_use_resource(saved_use_resource)

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in replicated_fn(replica_id, replica_args, replica_kwargs)
   1122         """Wraps user function to provide replica ID and `Tensor` inputs."""
   1123         with _TPUReplicaContext(strategy, replica_id_in_sync_group=replica_id):
-> 1124           result[0] = fn(*replica_args, **replica_kwargs)
   1125         return result[0]
   1126 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    253       try:
    254         with conversion_ctx:
--> 255           return converted_call(f, args, kwargs, options=options)
    256       except Exception as e:  # pylint:disable=broad-except
    257         if hasattr(e, 'ag_error_metadata'):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
    530 
    531   if not options.user_requested and conversion.is_whitelisted(f):
--> 532     return _call_unconverted(f, args, kwargs, options)
    533 
    534   # internal_convert_user_code is for example turned off when issuing a dynamic

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in _call_unconverted(f, args, kwargs, options, update_cache)
    337 
    338   if kwargs is not None:
--> 339     return f(*args, **kwargs)
    340   return f(*args)
    341 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _test_step_fn(inputs)
    331 
    332     (distribution_strategy_context.get_replica_context().merge_call(
--> 333         _build_model, args=(model, mode, inputs, targets)))
    334 
    335     (_, outputs, updates, _) = _per_replica_execution_function(

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in merge_call(self, merge_fn, args, kwargs)
   2713     merge_fn = autograph.tf_convert(
   2714         merge_fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
-> 2715     return self._merge_call(merge_fn, args, kwargs)
   2716 
   2717   def _merge_call(self, merge_fn, args, kwargs):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in _merge_call(self, merge_fn, args, kwargs)
   2720         distribution_strategy_context._CrossReplicaThreadMode(self._strategy))  # pylint: disable=protected-access
   2721     try:
-> 2722       return merge_fn(self._strategy, *args, **kwargs)
   2723     finally:
   2724       _pop_per_thread_mode()

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    273   def wrapper(*args, **kwargs):
    274     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.UNSPECIFIED):
--> 275       return func(*args, **kwargs)
    276 
    277   if inspect.isfunction(func) or inspect.ismethod(func):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _build_model(strategy, model, mode, inputs, targets)
     55   else:
     56     dist_utils._build_distributed_network(model, strategy, mode, inputs,
---> 57                                           targets)
     58 
     59 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_distributed_network(model, strategy, mode, inputs, targets)
    780     distributed_model = strategy.extended.call_for_each_replica(
    781         _build_network_on_replica,
--> 782         args=(model, mode, inputs, targets))
    783     set_distributed_model(model, mode, distributed_model)
    784 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
   2583       kwargs = {}
   2584     with self._container_strategy().scope():
-> 2585       return self._call_for_each_replica(fn, args, kwargs)
   2586 
   2587   def _call_for_each_replica(self, fn, args, kwargs):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in _call_for_each_replica(self, fn, args, kwargs)
    741     # we're in a tpu.rewrite(), and update TPUMirroredVariable accordingly.
    742     with _TPUReplicaContext(self._container_strategy()):
--> 743       return fn(*args, **kwargs)
    744 
    745   @contextlib.contextmanager

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_network_on_replica(model, mode, inputs, targets)
    740   else:
    741     updated_model = models._clone_functional_model(
--> 742         model, input_tensors=inputs, layer_fn=models.share_weights)
    743     # Callable losses added directly to a functional Model need to be added
    744     # here.

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/models.py in _clone_functional_model(model, input_tensors, layer_fn)
    178     input_tensors = nest.flatten(input_tensors)
    179     for i, input_tensor in enumerate(input_tensors):
--> 180       original_input_layer = model._input_layers[i]
    181 
    182       # Cache input layer. Create a new layer if the tensor is originally not

IndexError: list index out of range
---------------------------------------------------------------------------
索引器回溯(最后一次最近调用)
在()
20次回调=[lr_回调],
21验证\数据=获取\值\数据(),
--->22验证(步骤=100)
23
24打开(“trainHistoryDict”,“wb”)作为文件
22帧
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u v1.py in fit(self、x、y、批大小、历元、冗余、回调、验证拆分、验证数据、无序排列、类权重、样本权重、初始历元、每历元步数、验证步骤、验证频率、最大队列大小、工人、使用多处理、**kwargs)
807最大队列大小=最大队列大小,
808名工人=工人,
-->809使用\多处理=使用\多处理)
810
811 def评估(自我,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u distributed.py in fit(self、model、x、y、批大小、epoch、verbose、回调、验证拆分、验证数据、无序、类权重、样本权重、初始epoch、每epoch步数、验证步数、验证频率、**kwargs)
668步/u历元=步/u历元,
669验证步骤=验证步骤,
-->670验证频率=验证频率)
671
672返回训练\u数组.fit\u循环(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u distributed.py在实验性的\u-tpu\u-fit\u循环中(模型、数据集、历元、冗余、回调、初始历元、每历元步骤、val\u数据集、验证步骤、验证频率)
271步骤=验证步骤,
272详细=详细,
-->273回调=回调)
274如果不存在(val_out,列表):
275 val_outs=[val_outs]
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training\u distributed.py在实验性的测试循环中(模型、数据集、详细信息、步骤、回调)
340测试\输入\数据=迭代器。获取\下一步()
341个副本\u输出=当前\u策略.run(
-->342测试步骤fn,args=(测试输入数据)
343输出_张量={}
344对于标签,在zip中输出(输出标签,每个副本输出):
/运行中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(self、fn、args、kwargs、options)
344 fn=自动签名.tf\u转换(fn,自动签名\u ctx.control\u status\u ctx())
345选项=选项或分发库RunOptions()
-->346返回self.extended.tpu_运行(fn、args、kwargs、options)
347
348@property
/tpu运行中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(self、fn、args、kwargs、options)
1093 def tpu_运行(self、fn、args、kwargs、options=None):
1094 func=self.\u tpu\u函数\u创建者(fn,选项)
->1095返回函数(args,kwargs)
1096
1097 def_tpu_函数_创建者(自身、fn、选项):
/tpu_函数中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(args,kwargs)
1160设备分配=自身设备分配,
1161最大_形=最大_形,
->1162填充规格=填充规格)
1163
1164#删除“tpu.replicate()”期间可能添加的所有no ops
/复制中的usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py(计算、输入、馈入队列、设备分配、名称、最大形状、填充规格)
913姓名,
914最大_形=最大_形,
-->915填充规格=填充规格[1]
916
917
/拆分、编译和复制中的usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py(***解析参数失败***)
1378 vscope.set_custom_getter(custom_getter)
1379
->1380输出=计算(*计算输入)
1381
1382 vscope.设置使用资源(已保存使用资源)
/复制的fn中的usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py(副本id、副本参数、副本kwargs)
1122“包装用户函数以提供副本ID和“Tensor”输入。”“”
1123带有_tpureplicationcontext(策略,同步组中的副本id=副本id):
->1124结果[0]=fn(*副本参数,**副本参数)
1125返回结果[0]
1126
/包装器中的usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py(*args,**kwargs)
253尝试:
254与转换_ctx:
-->255返回转换的呼叫(f、args、kwargs、options=options)
256例外情况为e:#pylint:disable=broad except
257如果hasattr(即“ag\u错误\u元数据”):
/转换调用中的usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py(f、args、kwargs、caller\u fn\u范围、选项)
530
531如果不是options.user,则请求转换。是否被列入白名单(f):
-->532返回-调用-未转换(f、ARG、kwargs、选项)
533
534#内部#转换#用户#代码在发出动态
/usr/local/lib/python3.7/dist-packages/tensorflow/python
with tpu_strategy.scope(): # creating the model in the TPUStrategy scope means we will train the model on the TPU

    model = create_model()
    periodic_save = keras.callbacks.ModelCheckpoint('model_{epoch:03d}.h5', 
                                 save_weights_only=True, save_freq="epoch")


    hist = model.fit(get_train_ds().repeat(), 
                steps_per_epoch = 100,
                epochs = 5,
                verbose = 1,
                callbacks = [lr_callback],
                validation_data = get_val_ds(),
                validation_steps = 10)
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-23-d088df3513e2> in <module>()
     20                     callbacks = [lr_callback],
     21                     validation_data = get_val_ds(),
---> 22                     validation_steps = 100)
     23 
     24 with open("trainHistoryDict", 'wb') as file_pi:

22 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_v1.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    807         max_queue_size=max_queue_size,
    808         workers=workers,
--> 809         use_multiprocessing=use_multiprocessing)
    810 
    811   def evaluate(self,

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    668             steps_per_epoch=steps_per_epoch,
    669             validation_steps=validation_steps,
--> 670             validation_freq=validation_freq)
    671 
    672     return training_arrays.fit_loop(

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_fit_loop(model, dataset, epochs, verbose, callbacks, initial_epoch, steps_per_epoch, val_dataset, validation_steps, validation_freq)
    271           steps=validation_steps,
    272           verbose=verbose,
--> 273           callbacks=callbacks)
    274       if not isinstance(val_outs, list):
    275         val_outs = [val_outs]

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in experimental_tpu_test_loop(model, dataset, verbose, steps, callbacks)
    340   test_input_data = iterator.get_next()
    341   per_replica_outputs = current_strategy.run(
--> 342       _test_step_fn, args=(test_input_data,))
    343   output_tensors = {}
    344   for label, output in zip(out_labels, per_replica_outputs):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in run(self, fn, args, kwargs, options)
    344     fn = autograph.tf_convert(fn, autograph_ctx.control_status_ctx())
    345     options = options or distribute_lib.RunOptions()
--> 346     return self.extended.tpu_run(fn, args, kwargs, options)
    347 
    348   @property

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_run(self, fn, args, kwargs, options)
   1093   def tpu_run(self, fn, args, kwargs, options=None):
   1094     func = self._tpu_function_creator(fn, options)
-> 1095     return func(args, kwargs)
   1096 
   1097   def _tpu_function_creator(self, fn, options):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in tpu_function(args, kwargs)
   1160             device_assignment=self._device_assignment,
   1161             maximum_shapes=maximum_shapes,
-> 1162             padding_spec=padding_spec)
   1163 
   1164       # Remove all no ops that may have been added during 'tpu.replicate()'

/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in replicate(computation, inputs, infeed_queue, device_assignment, name, maximum_shapes, padding_spec)
    913       name,
    914       maximum_shapes=maximum_shapes,
--> 915       padding_spec=padding_spec)[1]
    916 
    917 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/tpu/tpu.py in split_compile_and_replicate(***failed resolving arguments***)
   1378       vscope.set_custom_getter(custom_getter)
   1379 
-> 1380       outputs = computation(*computation_inputs)
   1381 
   1382       vscope.set_use_resource(saved_use_resource)

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in replicated_fn(replica_id, replica_args, replica_kwargs)
   1122         """Wraps user function to provide replica ID and `Tensor` inputs."""
   1123         with _TPUReplicaContext(strategy, replica_id_in_sync_group=replica_id):
-> 1124           result[0] = fn(*replica_args, **replica_kwargs)
   1125         return result[0]
   1126 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    253       try:
    254         with conversion_ctx:
--> 255           return converted_call(f, args, kwargs, options=options)
    256       except Exception as e:  # pylint:disable=broad-except
    257         if hasattr(e, 'ag_error_metadata'):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
    530 
    531   if not options.user_requested and conversion.is_whitelisted(f):
--> 532     return _call_unconverted(f, args, kwargs, options)
    533 
    534   # internal_convert_user_code is for example turned off when issuing a dynamic

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in _call_unconverted(f, args, kwargs, options, update_cache)
    337 
    338   if kwargs is not None:
--> 339     return f(*args, **kwargs)
    340   return f(*args)
    341 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _test_step_fn(inputs)
    331 
    332     (distribution_strategy_context.get_replica_context().merge_call(
--> 333         _build_model, args=(model, mode, inputs, targets)))
    334 
    335     (_, outputs, updates, _) = _per_replica_execution_function(

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in merge_call(self, merge_fn, args, kwargs)
   2713     merge_fn = autograph.tf_convert(
   2714         merge_fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
-> 2715     return self._merge_call(merge_fn, args, kwargs)
   2716 
   2717   def _merge_call(self, merge_fn, args, kwargs):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in _merge_call(self, merge_fn, args, kwargs)
   2720         distribution_strategy_context._CrossReplicaThreadMode(self._strategy))  # pylint: disable=protected-access
   2721     try:
-> 2722       return merge_fn(self._strategy, *args, **kwargs)
   2723     finally:
   2724       _pop_per_thread_mode()

/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    273   def wrapper(*args, **kwargs):
    274     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.UNSPECIFIED):
--> 275       return func(*args, **kwargs)
    276 
    277   if inspect.isfunction(func) or inspect.ismethod(func):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_distributed.py in _build_model(strategy, model, mode, inputs, targets)
     55   else:
     56     dist_utils._build_distributed_network(model, strategy, mode, inputs,
---> 57                                           targets)
     58 
     59 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_distributed_network(model, strategy, mode, inputs, targets)
    780     distributed_model = strategy.extended.call_for_each_replica(
    781         _build_network_on_replica,
--> 782         args=(model, mode, inputs, targets))
    783     set_distributed_model(model, mode, distributed_model)
    784 

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
   2583       kwargs = {}
   2584     with self._container_strategy().scope():
-> 2585       return self._call_for_each_replica(fn, args, kwargs)
   2586 
   2587   def _call_for_each_replica(self, fn, args, kwargs):

/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py in _call_for_each_replica(self, fn, args, kwargs)
    741     # we're in a tpu.rewrite(), and update TPUMirroredVariable accordingly.
    742     with _TPUReplicaContext(self._container_strategy()):
--> 743       return fn(*args, **kwargs)
    744 
    745   @contextlib.contextmanager

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/distribute/distributed_training_utils.py in _build_network_on_replica(model, mode, inputs, targets)
    740   else:
    741     updated_model = models._clone_functional_model(
--> 742         model, input_tensors=inputs, layer_fn=models.share_weights)
    743     # Callable losses added directly to a functional Model need to be added
    744     # here.

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/models.py in _clone_functional_model(model, input_tensors, layer_fn)
    178     input_tensors = nest.flatten(input_tensors)
    179     for i, input_tensor in enumerate(input_tensors):
--> 180       original_input_layer = model._input_layers[i]
    181 
    182       # Cache input layer. Create a new layer if the tensor is originally not

IndexError: list index out of range