Python Tensorflow RNN:如何推断没有重复项的序列?
我正在使用seq2seq RNN生成给定种子标签的标签输出序列。在推断步骤中,我希望生成只包含唯一标签的序列(即跳过已添加到输出序列中的标签)。为此,我创建了一个sampler对象,它试图记住已添加到输出中的标签,并将它们的logit值减少到Python Tensorflow RNN:如何推断没有重复项的序列?,python,tensorflow,recurrent-neural-network,rnn,sequence-to-sequence,Python,Tensorflow,Recurrent Neural Network,Rnn,Sequence To Sequence,我正在使用seq2seq RNN生成给定种子标签的标签输出序列。在推断步骤中,我希望生成只包含唯一标签的序列(即跳过已添加到输出序列中的标签)。为此,我创建了一个sampler对象,它试图记住已添加到输出中的标签,并将它们的logit值减少到-np.inf 以下是取样器代码: class InferenceSampler(object): def __init__(self, out_weights, out_biases): self._out_weights = tf
-np.inf
以下是取样器代码:
class InferenceSampler(object):
def __init__(self, out_weights, out_biases):
self._out_weights = tf.transpose(out_weights)
self._out_biases = out_biases
self._n_tracks = out_weights.shape[0]
self.ids_mask = tf.zeros([self._n_tracks], name="playlist_mask")
def __call__(self, decoder_outputs):
_logits = tf.matmul(decoder_outputs, self._out_weights)
_logits = tf.nn.bias_add(_logits, self._out_biases)
# apply mask
_logits = _logits + self.ids_mask
_sample_ids = tf.cast(tf.argmax(_logits, axis=-1), tf.int32)
# update mask
step_ids_mask = tf.sparse_to_dense(_sample_ids, [self._n_tracks], -np.inf)
self.ids_mask = self.ids_mask + step_ids_mask
return _sample_ids
推理图的代码如下所示:
self._max_playlist_len = tf.placeholder(tf.int32, ())
self._start_tokens = tf.placeholder(tf.int32, [None])
sample_fn = InferenceSampler(out_weights, out_biases)
with tf.name_scope("inf_decoder"):
def _end_fn(sample_ids):
return tf.equal(sample_ids, PAD_ITEM_ID)
def _next_inputs_fn(sample_ids):
return tf.nn.embedding_lookup(
track_embs,
sample_ids
)
_start_inputs = tf.nn.embedding_lookup(
track_embs,
self._start_tokens
)
helper = tf.contrib.seq2seq.InferenceHelper(
sample_fn=sample_fn,
sample_shape=[],
sample_dtype=tf.int32,
start_inputs=_start_inputs,
end_fn=_end_fn,
next_inputs_fn=_next_inputs_fn
)
decoder = tf.contrib.seq2seq.BasicDecoder(
rnn_cell,
helper,
rnn_cell.zero_state(tf.shape(self._start_tokens)[0], tf.float32),
output_layer=projection_layer
)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder,
maximum_iterations=self._max_playlist_len
)
self.playlists = outputs.sample_id
with tf.name_scope("inf_decoder"):
def _sample_fn(decoder_outputs):
return decoder_outputs
def _end_fn(sample_ids):
# infinite
return tf.tile([False], [n_seeds])
_start_inputs = tf.nn.embedding_lookup(
track_embs,
self._seed_items
)
helper = tf.contrib.seq2seq.InferenceHelper(
sample_fn=_sample_fn,
sample_shape=[self.emb_size],
sample_dtype=tf.float32,
start_inputs=_start_inputs,
end_fn=_end_fn,
)
decoder = tf.contrib.seq2seq.BasicDecoder(
rnn_cell,
helper,
rnn_cell.zero_state(n_seeds, tf.float32),
output_layer=projection_layer
)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder,
maximum_iterations=self._max_playlist_len
)
flat_rnn_output = tf.reshape(outputs.rnn_output, [-1, self.emb_size])
flat_logits = tf.matmul(flat_rnn_output, out_weights, transpose_b=True)
flat_logits = tf.nn.bias_add(flat_logits, out_biases)
item_ids = tf.cast(tf.argmax(flat_logits, axis=-1), tf.int32)
playlists = tf.reshape(item_ids, [n_seeds, -1])
self.playlists = playlists
不幸的是,结果仍然有重复的标签。此外,当我试图访问sample\u fn.ids\u mask
时,我收到一条错误消息:ValueError:Operation'inf\u decoder/decoder/while/BasicDecoderStep/add\u 1'已被标记为不可获取。
我做错了什么?创建这样的
sample\u fn
合法吗?为了克服这个问题,我更新了推理,在每个RNN步骤中,我输出嵌入向量,而不是项id
。推理完成后,我将嵌入转换为item\u id
首先,此解决方案将操作数降至最低。其次,由于我使用了LSTM/GRU单元,它们使在RNN推理的不同步骤上观察到两个绝对相似输出的概率最小化
新代码如下所示:
self._max_playlist_len = tf.placeholder(tf.int32, ())
self._start_tokens = tf.placeholder(tf.int32, [None])
sample_fn = InferenceSampler(out_weights, out_biases)
with tf.name_scope("inf_decoder"):
def _end_fn(sample_ids):
return tf.equal(sample_ids, PAD_ITEM_ID)
def _next_inputs_fn(sample_ids):
return tf.nn.embedding_lookup(
track_embs,
sample_ids
)
_start_inputs = tf.nn.embedding_lookup(
track_embs,
self._start_tokens
)
helper = tf.contrib.seq2seq.InferenceHelper(
sample_fn=sample_fn,
sample_shape=[],
sample_dtype=tf.int32,
start_inputs=_start_inputs,
end_fn=_end_fn,
next_inputs_fn=_next_inputs_fn
)
decoder = tf.contrib.seq2seq.BasicDecoder(
rnn_cell,
helper,
rnn_cell.zero_state(tf.shape(self._start_tokens)[0], tf.float32),
output_layer=projection_layer
)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder,
maximum_iterations=self._max_playlist_len
)
self.playlists = outputs.sample_id
with tf.name_scope("inf_decoder"):
def _sample_fn(decoder_outputs):
return decoder_outputs
def _end_fn(sample_ids):
# infinite
return tf.tile([False], [n_seeds])
_start_inputs = tf.nn.embedding_lookup(
track_embs,
self._seed_items
)
helper = tf.contrib.seq2seq.InferenceHelper(
sample_fn=_sample_fn,
sample_shape=[self.emb_size],
sample_dtype=tf.float32,
start_inputs=_start_inputs,
end_fn=_end_fn,
)
decoder = tf.contrib.seq2seq.BasicDecoder(
rnn_cell,
helper,
rnn_cell.zero_state(n_seeds, tf.float32),
output_layer=projection_layer
)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder,
maximum_iterations=self._max_playlist_len
)
flat_rnn_output = tf.reshape(outputs.rnn_output, [-1, self.emb_size])
flat_logits = tf.matmul(flat_rnn_output, out_weights, transpose_b=True)
flat_logits = tf.nn.bias_add(flat_logits, out_biases)
item_ids = tf.cast(tf.argmax(flat_logits, axis=-1), tf.int32)
playlists = tf.reshape(item_ids, [n_seeds, -1])
self.playlists = playlists
所以,经过一些调查,我找到了与这条线索有关的所有问题的答案。主要的问题是:
interferencesampler
中的self.ids\u mask
为什么不更新?原因在于动态解码
的内部。根据in Tensorflow的问题追踪器:
。。。将仅计算循环内定义的张量
每个循环迭代。在循环外定义的所有张量都将
精确评估一次
在我的例子中,self.ids\u mask
在循环外部指定。这意味着我需要重新编写dynamic\u decode
,以获得我想要的。下面的代码是初始任务的一个稍加修改的版本,但其作用几乎相同
让我们从一个新的dynamic_decode
开始,它应该创建并更新已预测的掩码收集sample_id
。我删除了我没有修改的代码,遵循初始掩码
和掩码
变量
新的动态解码:
def dynamic_decode(decoder,
output_time_major=False,
impute_finished=False,
maximum_iterations=None,
parallel_iterations=32,
swap_memory=False,
scope=None):
...
initial_finished, initial_inputs, initial_mask, initial_state = decoder.initialize()
...
def body(time, outputs_ta, state, inputs, finished, sequence_lengths, mask):
"""Internal while_loop body.
Args:
time: scalar int32 tensor.
outputs_ta: structure of TensorArray.
state: (structure of) state tensors and TensorArrays.
inputs: (structure of) input tensors.
finished: bool tensor (keeping track of what's finished).
sequence_lengths: int32 tensor (keeping track of time of finish).
mask: SparseTensor to remove already predicted items
Returns:
`(time + 1, outputs_ta, next_state, next_inputs, next_finished,
next_sequence_lengths, next_mask)`.
```
"""
(next_outputs, decoder_state, next_inputs, next_mask,
decoder_finished) = decoder.step(time, inputs, state, mask)
...
nest.assert_same_structure(state, decoder_state)
nest.assert_same_structure(outputs_ta, next_outputs)
nest.assert_same_structure(inputs, next_inputs)
nest.assert_same_structure(mask, next_mask)
...
return (time + 1, outputs_ta, next_state, next_inputs, next_finished,
next_sequence_lengths, next_mask)
res = control_flow_ops.while_loop(
condition,
body,
loop_vars=[
initial_time, initial_outputs_ta, initial_state, initial_inputs,
initial_finished, initial_sequence_lengths, initial_mask,
],
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
...
return final_outputs, final_state, final_sequence_lengths
在下一步中,应将掩码
传递给解码器
和助手
。以下是BasicDecoder
和推断助手
的更新版本:
class MaskedDecoder(BasicDecoder):
def step(self, time, inputs, state, mask, name=None):
with ops.name_scope(name, "MaskedDecoderStep", (time, inputs, state, mask)):
cell_outputs, cell_state = self._cell(inputs, state)
if self._output_layer is not None:
cell_outputs = self._output_layer(cell_outputs)
sample_ids = self._helper.sample(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask)
(finished, next_inputs, next_state, next_mask) = self._helper.next_inputs(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask,
sample_ids=sample_ids)
outputs = BasicDecoderOutput(cell_outputs, sample_ids)
return (outputs, next_state, next_inputs, next_mask, finished)
class MaskedInferenceHelper(Helper):
"""A helper to use during inference with a custom sampling function."""
def __init__(self, norm_track_embs, features, start_sample_ids):
self._norm_track_embs = norm_track_embs
self._batch_size = tf.shape(start_sample_ids)[0]
self._n_tracks = tf.shape(norm_track_embs)[0]
self._start_sample_ids = start_sample_ids
self._sample_shape = tf.TensorShape([])
self._sample_dtype = tf.int32
self._features = features
def _get_sparse_mask(self, sample_ids):
_mask_shape = tf.convert_to_tensor([
tf.cast(self._batch_size, dtype=tf.int64),
tf.cast(self._n_tracks, dtype=tf.int64)
])
_st_rows = tf.range(0, self._batch_size)
_st_cols = sample_ids
_st_indices = tf.cast(tf.stack([_st_rows, _st_cols], axis=1), dtype=tf.int64)
_st_values = tf.fill([self._batch_size], np.inf)
return tf.SparseTensor(_st_indices, _st_values, _mask_shape)
...
def initialize(self, name=None):
finished = tf.tile([False], [self._batch_size])
start_embs = tf.nn.embedding_lookup(self._norm_track_embs, self._start_sample_ids)
start_inputs = tf.concat([start_embs, self._features], axis=1)
mask = self._get_sparse_mask(self._start_sample_ids)
return finished, start_inputs, mask
def sample(self, time, outputs, state, mask, name=None):
del time, state # unused by sample
outputs = tf.nn.l2_normalize(outputs, axis=-1)
cos_sims = tf.matmul(outputs, self._norm_track_embs, transpose_b=True)
cos_sims = cos_sims - tf.sparse_tensor_to_dense(mask)
sample_ids = tf.cast(tf.argmax(cos_sims, axis=-1), tf.int32)
return sample_ids
def next_inputs(self, time, outputs, state, sample_ids, mask, name=None):
del time, outputs # unused by next_inputs
finished = tf.tile([False], [self._batch_size])
next_embs = tf.nn.embedding_lookup(self._norm_track_embs, sample_ids)
next_inputs = tf.concat([next_embs, self._features], axis=1)
next_mask = tf.sparse_add(mask, self._get_sparse_mask(sample_ids))
return finished, next_inputs, state, next_mask
MaskedDecoder
:
class MaskedDecoder(BasicDecoder):
def step(self, time, inputs, state, mask, name=None):
with ops.name_scope(name, "MaskedDecoderStep", (time, inputs, state, mask)):
cell_outputs, cell_state = self._cell(inputs, state)
if self._output_layer is not None:
cell_outputs = self._output_layer(cell_outputs)
sample_ids = self._helper.sample(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask)
(finished, next_inputs, next_state, next_mask) = self._helper.next_inputs(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask,
sample_ids=sample_ids)
outputs = BasicDecoderOutput(cell_outputs, sample_ids)
return (outputs, next_state, next_inputs, next_mask, finished)
class MaskedInferenceHelper(Helper):
"""A helper to use during inference with a custom sampling function."""
def __init__(self, norm_track_embs, features, start_sample_ids):
self._norm_track_embs = norm_track_embs
self._batch_size = tf.shape(start_sample_ids)[0]
self._n_tracks = tf.shape(norm_track_embs)[0]
self._start_sample_ids = start_sample_ids
self._sample_shape = tf.TensorShape([])
self._sample_dtype = tf.int32
self._features = features
def _get_sparse_mask(self, sample_ids):
_mask_shape = tf.convert_to_tensor([
tf.cast(self._batch_size, dtype=tf.int64),
tf.cast(self._n_tracks, dtype=tf.int64)
])
_st_rows = tf.range(0, self._batch_size)
_st_cols = sample_ids
_st_indices = tf.cast(tf.stack([_st_rows, _st_cols], axis=1), dtype=tf.int64)
_st_values = tf.fill([self._batch_size], np.inf)
return tf.SparseTensor(_st_indices, _st_values, _mask_shape)
...
def initialize(self, name=None):
finished = tf.tile([False], [self._batch_size])
start_embs = tf.nn.embedding_lookup(self._norm_track_embs, self._start_sample_ids)
start_inputs = tf.concat([start_embs, self._features], axis=1)
mask = self._get_sparse_mask(self._start_sample_ids)
return finished, start_inputs, mask
def sample(self, time, outputs, state, mask, name=None):
del time, state # unused by sample
outputs = tf.nn.l2_normalize(outputs, axis=-1)
cos_sims = tf.matmul(outputs, self._norm_track_embs, transpose_b=True)
cos_sims = cos_sims - tf.sparse_tensor_to_dense(mask)
sample_ids = tf.cast(tf.argmax(cos_sims, axis=-1), tf.int32)
return sample_ids
def next_inputs(self, time, outputs, state, sample_ids, mask, name=None):
del time, outputs # unused by next_inputs
finished = tf.tile([False], [self._batch_size])
next_embs = tf.nn.embedding_lookup(self._norm_track_embs, sample_ids)
next_inputs = tf.concat([next_embs, self._features], axis=1)
next_mask = tf.sparse_add(mask, self._get_sparse_mask(sample_ids))
return finished, next_inputs, state, next_mask
maskedinReferenceHelper
:
class MaskedDecoder(BasicDecoder):
def step(self, time, inputs, state, mask, name=None):
with ops.name_scope(name, "MaskedDecoderStep", (time, inputs, state, mask)):
cell_outputs, cell_state = self._cell(inputs, state)
if self._output_layer is not None:
cell_outputs = self._output_layer(cell_outputs)
sample_ids = self._helper.sample(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask)
(finished, next_inputs, next_state, next_mask) = self._helper.next_inputs(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask,
sample_ids=sample_ids)
outputs = BasicDecoderOutput(cell_outputs, sample_ids)
return (outputs, next_state, next_inputs, next_mask, finished)
class MaskedInferenceHelper(Helper):
"""A helper to use during inference with a custom sampling function."""
def __init__(self, norm_track_embs, features, start_sample_ids):
self._norm_track_embs = norm_track_embs
self._batch_size = tf.shape(start_sample_ids)[0]
self._n_tracks = tf.shape(norm_track_embs)[0]
self._start_sample_ids = start_sample_ids
self._sample_shape = tf.TensorShape([])
self._sample_dtype = tf.int32
self._features = features
def _get_sparse_mask(self, sample_ids):
_mask_shape = tf.convert_to_tensor([
tf.cast(self._batch_size, dtype=tf.int64),
tf.cast(self._n_tracks, dtype=tf.int64)
])
_st_rows = tf.range(0, self._batch_size)
_st_cols = sample_ids
_st_indices = tf.cast(tf.stack([_st_rows, _st_cols], axis=1), dtype=tf.int64)
_st_values = tf.fill([self._batch_size], np.inf)
return tf.SparseTensor(_st_indices, _st_values, _mask_shape)
...
def initialize(self, name=None):
finished = tf.tile([False], [self._batch_size])
start_embs = tf.nn.embedding_lookup(self._norm_track_embs, self._start_sample_ids)
start_inputs = tf.concat([start_embs, self._features], axis=1)
mask = self._get_sparse_mask(self._start_sample_ids)
return finished, start_inputs, mask
def sample(self, time, outputs, state, mask, name=None):
del time, state # unused by sample
outputs = tf.nn.l2_normalize(outputs, axis=-1)
cos_sims = tf.matmul(outputs, self._norm_track_embs, transpose_b=True)
cos_sims = cos_sims - tf.sparse_tensor_to_dense(mask)
sample_ids = tf.cast(tf.argmax(cos_sims, axis=-1), tf.int32)
return sample_ids
def next_inputs(self, time, outputs, state, sample_ids, mask, name=None):
del time, outputs # unused by next_inputs
finished = tf.tile([False], [self._batch_size])
next_embs = tf.nn.embedding_lookup(self._norm_track_embs, sample_ids)
next_inputs = tf.concat([next_embs, self._features], axis=1)
next_mask = tf.sparse_add(mask, self._get_sparse_mask(sample_ids))
return finished, next_inputs, state, next_mask
因此,现在我可以在不重复已经预测的项目的情况下生成推论。为什么要重复\u sample\u id=tf.cast(tf.argmax(\u logits,axis=-1),tf.int32)
内部调用?您已经使用旧掩码计算了_sample_id,我认为如果您在上一次迭代中再次计算,您将在maskOops中拥有all-inf。。。我会再检查一遍。但是我想这是复制粘贴到SO的问题。所以,我重新检查了代码,这是复制粘贴的问题。它仍然不起作用。我更新了问题。