Python tensorflow2中自定义模型的子类:无法将dtype资源的张量转换为NumPy数组
我是tensorflow2的新手,使用tensorflow2.3.1,cpu版本 我以子类化的方式定义了模型,在显示模型的结构时,遇到了错误“tensorflow.python.framework.errors\u impl.InvalidArgumentError:无法将数据类型资源的张量转换为NumPy数组”,它指向BST\u DSSM.build\u模型中的以下行 “self.item_sequence_embeddings=tf.nn.embedding_lookup(” 我浏览过类似的问题,但找不到满意的解决办法。 任何帮助都将不胜感激:) 下面是我的代码Python tensorflow2中自定义模型的子类:无法将dtype资源的张量转换为NumPy数组,python,tensorflow2,Python,Tensorflow2,我是tensorflow2的新手,使用tensorflow2.3.1,cpu版本 我以子类化的方式定义了模型,在显示模型的结构时,遇到了错误“tensorflow.python.framework.errors\u impl.InvalidArgumentError:无法将数据类型资源的张量转换为NumPy数组”,它指向BST\u DSSM.build\u模型中的以下行 “self.item_sequence_embeddings=tf.nn.embedding_lookup(” 我浏览过类似的
import tensorflow as tf
class MultiHeadAttention(tf.keras.layers.Layer):
""" def multi head attention layer
q, k, v multiplied by Wq, Wk, Wv respectively -> q', k', v'
q' * k' -> w, w / sqrt(q'.shape[1]) -> w'
w' * v' -> z, z * Wz -> z'
z' add v (residual), then goes through LRelu, do a LN at last
"""
def __init__(
self,
scope_name,
num_units=8,
num_heads=1,
embed_dim=8,
has_residual=True,
dropout_keep_prob=1.0):
super(MultiHeadAttention, self).__init__()
assert num_units % num_heads == 0
assert scope_name in ["user", "item"]
self.num_heads = num_heads
self.num_units = num_units
self.embed_dim = embed_dim
self.dropout_keep_prob = dropout_keep_prob
self.Wq = tf.keras.layers.Dense(
units=self.num_units, activation=tf.nn.leaky_relu, name=f"{scope_name}_Wq")
self.Wk = tf.keras.layers.Dense(
units=self.num_units, activation=tf.nn.leaky_relu, name=f"{scope_name}_Wk")
self.Wv = tf.keras.layers.Dense(
units=self.num_units, activation=tf.nn.leaky_relu, name=f"{scope_name}_Wv")
self.has_residual = has_residual
self.Wz = tf.keras.layers.Dense(embed_dim)
def call(self, queries, keys_, values):
"""
:param queries: of shape [batch_size, max_length, emb_dim]
:param keys_: of shape [batch_size, max_length, emb_dim]
:param values: of shape [batch_size, max_length, emb_dim]
:return:
"""
assert values.get_shape().as_list()[-1] == self.embed_dim
assert queries.get_shape().as_list()[-1] == self.embed_dim
assert keys_.get_shape().as_list()[-1] == self.embed_dim
# Linear projections
Q = self.Wq(queries)
K = self.Wk(keys_)
V = self.Wv(values)
# Split and concat
Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0)
K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0)
V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0)
# Multiplication
weights = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))
# Scale
weights = weights / (K_.get_shape().as_list()[-1] ** 0.5)
# convert to prob vector
weights = tf.nn.softmax(weights)
# Dropouts
if 0 < self.dropout_keep_prob < 1:
weights = tf.keras.layers.AlphaDropout(
rate=1 - self.dropout_keep_prob)(weights)
# Weighted sum
# [batch_size * num_heads, max_length, num_units / num_heads]
outputs = tf.matmul(weights, V_)
# Restore shape to [batch_size, max_length, num_units]
z = tf.concat(tf.split(outputs, self.num_heads, axis=0), axis=2)
# Restore shape to [batch_size, max_length, embed_dim]
z = self.Wz(z)
# Residual connection
if self.has_residual:
z += values
z = tf.nn.leaky_relu(z)
# Normalize
z = tf.keras.layers.LayerNormalization(
beta_initializer="zeros", gamma_initializer="ones")(z)
return z
class BST_DSSM(tf.keras.Model):
"""define BST+DSSM model stucture
"""
def __init__(self, model_dir,
item_embedding=None, user_embedding=None,
embedding_size=8,
vocab_size=1000,
max_length_item=15, max_length_user=6,
epoch=10, batch_size=256, blocks=2,
learning_rate=0.001, optimizer_type="adam",
batch_norm=0, batch_norm_decay=0.995,
verbose=False, random_seed=2019,
l2_reg=0.0, has_residual=True):
"""
initial model related parms and tensors
"""
super(BST_DSSM, self).__init__()
# denote as K, size of the feature embedding
self.embedding_size = embedding_size
self.l2_reg = l2_reg
self.epoch = epoch
self.batch_size = batch_size
self.learning_rate = learning_rate
self.optimizer_type = optimizer_type
self.optimizer = None
self.blocks = blocks
self.batch_norm = batch_norm
self.batch_norm_decay = batch_norm_decay
self.verbose = verbose
self.random_seed = random_seed
self.model_dir = model_dir
# self._init_graph()
self.vocab_size = vocab_size
self.max_length_item = max_length_item
self.max_length_user = max_length_user
self.has_residual = has_residual
self.model = None
self.item_embedding = item_embedding
self.user_embedding = user_embedding
self.mha_user = MultiHeadAttention("user", num_units=embedding_size)
self.mha_item = MultiHeadAttention("item", num_units=embedding_size)
def _get_item_embedding_matrix(self):
if self.item_embedding is None:
std = 0.1
minval = -std
maxval = std
emb_matrix = tf.Variable(
tf.random.uniform(
[self.vocab_size, self.embedding_size],
minval, maxval,
seed=self.random_seed,
dtype=tf.float32),
name="item_embedding")
self.item_embedding = emb_matrix
def _get_user_embedding_matrix(self):
if self.user_embedding is None:
std = 0.1
minval = -std
maxval = std
emb_matrix = tf.Variable(
tf.random.uniform(
[self.vocab_size, self.embedding_size],
minval, maxval,
seed=self.random_seed,
dtype=tf.float32),
name="user_embedding")
self.user_embedding = emb_matrix
def build_model(self):
# initialize lut
self._get_item_embedding_matrix()
self._get_user_embedding_matrix()
item_inputs = tf.keras.Input(
shape=(
self.max_length_item
),
dtype=tf.int32,
name="item_sequence_idx")
user_inputs = tf.keras.Input(
shape=(
self.max_length_user
),
dtype=tf.int32,
name="user_sequence_idx")
# user and item use different lut, similarly to DSSM
self.item_sequence_embeddings = tf.nn.embedding_lookup(
self.item_embedding, item_inputs, name="item_sequence_embeddings")
self.video_sequence_embeddings = tf.nn.embedding_lookup(
self.user_embedding, user_inputs, name="video_sequence_embeddings")
# self attn part
for i in range(self.blocks):
self.item_sequence_embeddings = self.mha_item(
queries=self.item_sequence_embeddings,
keys=self.item_sequence_embeddings,
values=self.item_sequence_embeddings)
self.video_sequence_embeddings = self.mha_user(
queries=self.video_sequence_embeddings,
keys=self.video_sequence_embeddings,
values=self.video_sequence_embeddings)
# max pooling
self.item_sequence_embeddings = tf.nn.max_pool(
self.item_sequence_embeddings,
[1, self.max_length_item, 1],
[1 for _ in range(len(self.item_sequence_embeddings.shape))],
padding="VALID")
self.video_sequence_embeddings = tf.nn.max_pool(
self.video_sequence_embeddings,
[1, self.max_length_user, 1],
[1 for _ in range(len(self.video_sequence_embeddings.shape))],
padding="VALID")
# cosine similarity
self.item_sequence_embeddings = tf.nn.l2_normalize(
self.item_sequence_embeddings, axis=2)
self.video_sequence_embeddings = tf.nn.l2_normalize(
self.video_sequence_embeddings, axis=2)
outputs = tf.matmul(
self.item_sequence_embeddings,
tf.transpose(self.video_sequence_embeddings, [0, 2, 1]))
outputs = tf.reshape(outputs, [-1, 1])
# optimizer
if self.optimizer_type == "adam":
self.optimizer = tf.keras.optimizers.Adam(
learning_rate=self.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
elif self.optimizer_type == "adagrad":
self.optimizer = tf.keras.optimizers.Adagrad(
learning_rate=self.learning_rate,
initial_accumulator_value=1e-8)
elif self.optimizer_type == "gd":
self.optimizer = tf.keras.optimizers.SGD(
learning_rate=self.learning_rate)
elif self.optimizer_type == "momentum":
self.optimizer = tf.keras.optimizers.SGD(
learning_rate=self.learning_rate, momentum=0.95)
self.model = tf.keras.Model(
inputs={
"item_sequence_idx": item_inputs,
"user_sequence_idx": user_inputs
},
outputs=outputs)
self.model.compile(
optimizer=self.optimizer,
loss=self.loss_fn,
metrics=[
tf.keras.metrics.AUC(),
tf.keras.metrics.binary_accuracy()])
将tensorflow导入为tf
班级多人注意(tf.keras.layers.Layer):
“”“def多头注意层”
q、 k,v分别乘以Wq,Wk,Wv->q',k',v'
q'*k'->w,w/sqrt(q'形状[1])->w'
w'*v'->z,z*Wz->z'
z'加v(残差),然后通过LRelu,最后做一个LN
"""
定义初始化__(
自己
范围和名称,
num_单位=8,
头数=1,
嵌入尺寸=8,
has_剩余=真,
辍学率(保留概率=1.0):
超级(多头注意力,自我)。\uuuu init\uuuuu()
断言数量单位%num头==0
在[“用户”、“项目”]中断言作用域名称
self.num\u heads=num\u heads
self.num\u units=num\u units
self.embed\u dim=嵌入\u dim
self.dropout\u keep\u prob=dropout\u keep\u prob
self.Wq=tf.keras.layers.density(
units=self.num\u units,activation=tf.nn.leaky\u relu,name=f“{scope\u name}\u Wq”)
self.Wk=tf.keras.layers.density(
units=self.num\u units,activation=tf.nn.leaky\u relu,name=f“{scope\u name}\u Wk”)
self.Wv=tf.keras.layers.density(
units=self.num\u units,activation=tf.nn.leaky\u relu,name=f“{scope\u name}\u Wv”)
self.has_剩余=has_剩余
self.Wz=tf.keras.layers.density(嵌入尺寸)
def调用(self、查询、键、值):
"""
:参数查询:形状[批次大小、最大长度、emb尺寸]
:参数键:形状[批次大小、最大长度、emb尺寸]
:参数值:形状[批次大小、最大长度、emb尺寸]
:返回:
"""
断言值。获取形状()作为列表()[-1]==self.embed\u dim
断言查询。获取形状()。作为列表()[-1]==self.embed\u dim
断言键\uu.get\u shape().as\u list()[-1]==self.embed\u dim
#线性投影
Q=self.Wq(查询)
K=self.Wk(键)
V=自我。Wv(值)
#裂壳
Q_uu=tf.concat(tf.split(Q,self.num_u头,轴=2),轴=0)
K_u=tf.concat(tf.split(K,self.num_u头,轴=2),轴=0)
V_uu=tf.concat(tf.split(V,self.num_u头,轴=2),轴=0)
#倍增
权重=tf.matmul(Q_,tf.transpose(K_,[0,2,1]))
#鳞片
权重=权重/(K_.get_shape().as_list()[-1]**0.5)
#转换为prob向量
权重=tf.nn.softmax(权重)
#辍学者
如果0