Python tensorflow2中自定义模型的子类:无法将dtype资源的张量转换为NumPy数组

Python tensorflow2中自定义模型的子类:无法将dtype资源的张量转换为NumPy数组,python,tensorflow2,Python,Tensorflow2,我是tensorflow2的新手,使用tensorflow2.3.1,cpu版本 我以子类化的方式定义了模型,在显示模型的结构时,遇到了错误“tensorflow.python.framework.errors\u impl.InvalidArgumentError:无法将数据类型资源的张量转换为NumPy数组”,它指向BST\u DSSM.build\u模型中的以下行 “self.item_sequence_embeddings=tf.nn.embedding_lookup(” 我浏览过类似的

我是tensorflow2的新手,使用tensorflow2.3.1,cpu版本

我以子类化的方式定义了模型,在显示模型的结构时,遇到了错误“tensorflow.python.framework.errors\u impl.InvalidArgumentError:无法将数据类型资源的张量转换为NumPy数组”,它指向BST\u DSSM.build\u模型中的以下行 “self.item_sequence_embeddings=tf.nn.embedding_lookup(”

我浏览过类似的问题,但找不到满意的解决办法。 任何帮助都将不胜感激:)

下面是我的代码

import tensorflow as tf


class MultiHeadAttention(tf.keras.layers.Layer):
    """ def multi head attention layer

    q, k, v multiplied by Wq, Wk, Wv respectively -> q', k', v'
    q' * k' -> w, w / sqrt(q'.shape[1]) -> w'
    w' * v' -> z, z * Wz -> z'
    z' add v (residual), then goes through LRelu, do a LN at last
    """

    def __init__(
            self,
            scope_name,
            num_units=8,
            num_heads=1,
            embed_dim=8,
            has_residual=True,
            dropout_keep_prob=1.0):
        super(MultiHeadAttention, self).__init__()
        assert num_units % num_heads == 0
        assert scope_name in ["user", "item"]
        self.num_heads = num_heads
        self.num_units = num_units
        self.embed_dim = embed_dim
        self.dropout_keep_prob = dropout_keep_prob

        self.Wq = tf.keras.layers.Dense(
            units=self.num_units, activation=tf.nn.leaky_relu, name=f"{scope_name}_Wq")
        self.Wk = tf.keras.layers.Dense(
            units=self.num_units, activation=tf.nn.leaky_relu, name=f"{scope_name}_Wk")
        self.Wv = tf.keras.layers.Dense(
            units=self.num_units, activation=tf.nn.leaky_relu, name=f"{scope_name}_Wv")

        self.has_residual = has_residual
        self.Wz = tf.keras.layers.Dense(embed_dim)

    def call(self, queries, keys_, values):
        """

        :param queries: of shape [batch_size, max_length, emb_dim]
        :param keys_:  of shape [batch_size, max_length, emb_dim]
        :param values: of shape [batch_size, max_length, emb_dim]
        :return:
        """
        assert values.get_shape().as_list()[-1] == self.embed_dim
        assert queries.get_shape().as_list()[-1] == self.embed_dim
        assert keys_.get_shape().as_list()[-1] == self.embed_dim
        # Linear projections
        Q = self.Wq(queries)
        K = self.Wk(keys_)
        V = self.Wv(values)

        # Split and concat
        Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0)
        K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0)
        V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0)

        # Multiplication
        weights = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))

        # Scale
        weights = weights / (K_.get_shape().as_list()[-1] ** 0.5)

        # convert to prob vector
        weights = tf.nn.softmax(weights)

        # Dropouts
        if 0 < self.dropout_keep_prob < 1:
            weights = tf.keras.layers.AlphaDropout(
                rate=1 - self.dropout_keep_prob)(weights)

        # Weighted sum
        # [batch_size * num_heads, max_length, num_units / num_heads]
        outputs = tf.matmul(weights, V_)

        # Restore shape to [batch_size, max_length, num_units]
        z = tf.concat(tf.split(outputs, self.num_heads, axis=0), axis=2)

        # Restore shape to [batch_size, max_length, embed_dim]
        z = self.Wz(z)

        # Residual connection
        if self.has_residual:
            z += values

        z = tf.nn.leaky_relu(z)

        # Normalize
        z = tf.keras.layers.LayerNormalization(
            beta_initializer="zeros", gamma_initializer="ones")(z)

        return z


class BST_DSSM(tf.keras.Model):
    """define BST+DSSM model stucture
    """
    def __init__(self, model_dir,
                 item_embedding=None, user_embedding=None,
                 embedding_size=8,
                 vocab_size=1000,
                 max_length_item=15, max_length_user=6,
                 epoch=10, batch_size=256, blocks=2,
                 learning_rate=0.001, optimizer_type="adam",
                 batch_norm=0, batch_norm_decay=0.995,
                 verbose=False, random_seed=2019,
                 l2_reg=0.0, has_residual=True):
        """
        initial model related parms and tensors
        """
        super(BST_DSSM, self).__init__()
        # denote as K, size of the feature embedding
        self.embedding_size = embedding_size

        self.l2_reg = l2_reg

        self.epoch = epoch
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer_type
        self.optimizer = None

        self.blocks = blocks
        self.batch_norm = batch_norm
        self.batch_norm_decay = batch_norm_decay

        self.verbose = verbose
        self.random_seed = random_seed
        self.model_dir = model_dir

        # self._init_graph()
        self.vocab_size = vocab_size
        self.max_length_item = max_length_item
        self.max_length_user = max_length_user
        self.has_residual = has_residual
        self.model = None

        self.item_embedding = item_embedding
        self.user_embedding = user_embedding

        self.mha_user = MultiHeadAttention("user", num_units=embedding_size)
        self.mha_item = MultiHeadAttention("item", num_units=embedding_size)

    def _get_item_embedding_matrix(self):
        if self.item_embedding is None:
            std = 0.1
            minval = -std
            maxval = std
            emb_matrix = tf.Variable(
                tf.random.uniform(
                    [self.vocab_size, self.embedding_size],
                    minval, maxval,
                    seed=self.random_seed,
                    dtype=tf.float32),
                name="item_embedding")
            self.item_embedding = emb_matrix

    def _get_user_embedding_matrix(self):
        if self.user_embedding is None:
            std = 0.1
            minval = -std
            maxval = std
            emb_matrix = tf.Variable(
                tf.random.uniform(
                    [self.vocab_size, self.embedding_size],
                    minval, maxval,
                    seed=self.random_seed,
                    dtype=tf.float32),
                name="user_embedding")
            self.user_embedding = emb_matrix

    def build_model(self):
        # initialize lut
        self._get_item_embedding_matrix()
        self._get_user_embedding_matrix()

        item_inputs = tf.keras.Input(
            shape=(
                self.max_length_item
            ),
            dtype=tf.int32,
            name="item_sequence_idx")
        user_inputs = tf.keras.Input(
            shape=(
                self.max_length_user
            ),
            dtype=tf.int32,
            name="user_sequence_idx")

        # user and item use different lut, similarly to DSSM
        self.item_sequence_embeddings = tf.nn.embedding_lookup(
            self.item_embedding, item_inputs, name="item_sequence_embeddings")
        self.video_sequence_embeddings = tf.nn.embedding_lookup(
            self.user_embedding, user_inputs, name="video_sequence_embeddings")

        # self attn part
        for i in range(self.blocks):
            self.item_sequence_embeddings = self.mha_item(
                queries=self.item_sequence_embeddings,
                keys=self.item_sequence_embeddings,
                values=self.item_sequence_embeddings)

            self.video_sequence_embeddings = self.mha_user(
                queries=self.video_sequence_embeddings,
                keys=self.video_sequence_embeddings,
                values=self.video_sequence_embeddings)

        # max pooling
        self.item_sequence_embeddings = tf.nn.max_pool(
            self.item_sequence_embeddings,
            [1, self.max_length_item, 1],
            [1 for _ in range(len(self.item_sequence_embeddings.shape))],
            padding="VALID")
        self.video_sequence_embeddings = tf.nn.max_pool(
            self.video_sequence_embeddings,
            [1, self.max_length_user, 1],
            [1 for _ in range(len(self.video_sequence_embeddings.shape))],
            padding="VALID")

        # cosine similarity
        self.item_sequence_embeddings = tf.nn.l2_normalize(
            self.item_sequence_embeddings, axis=2)
        self.video_sequence_embeddings = tf.nn.l2_normalize(
            self.video_sequence_embeddings, axis=2)

        outputs = tf.matmul(
            self.item_sequence_embeddings,
            tf.transpose(self.video_sequence_embeddings, [0, 2, 1]))
        outputs = tf.reshape(outputs, [-1, 1])

        # optimizer
        if self.optimizer_type == "adam":
            self.optimizer = tf.keras.optimizers.Adam(
                learning_rate=self.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
        elif self.optimizer_type == "adagrad":
            self.optimizer = tf.keras.optimizers.Adagrad(
                learning_rate=self.learning_rate,
                initial_accumulator_value=1e-8)
        elif self.optimizer_type == "gd":
            self.optimizer = tf.keras.optimizers.SGD(
                learning_rate=self.learning_rate)
        elif self.optimizer_type == "momentum":
            self.optimizer = tf.keras.optimizers.SGD(
                learning_rate=self.learning_rate, momentum=0.95)

        self.model = tf.keras.Model(
            inputs={
                "item_sequence_idx": item_inputs,
                "user_sequence_idx": user_inputs
            },
            outputs=outputs)

        self.model.compile(
            optimizer=self.optimizer,
            loss=self.loss_fn,
            metrics=[
                tf.keras.metrics.AUC(),
                tf.keras.metrics.binary_accuracy()])
将tensorflow导入为tf
班级多人注意(tf.keras.layers.Layer):
“”“def多头注意层”
q、 k,v分别乘以Wq,Wk,Wv->q',k',v'
q'*k'->w,w/sqrt(q'形状[1])->w'
w'*v'->z,z*Wz->z'
z'加v(残差),然后通过LRelu,最后做一个LN
"""
定义初始化__(
自己
范围和名称,
num_单位=8,
头数=1,
嵌入尺寸=8,
has_剩余=真,
辍学率(保留概率=1.0):
超级(多头注意力,自我)。\uuuu init\uuuuu()
断言数量单位%num头==0
在[“用户”、“项目”]中断言作用域名称
self.num\u heads=num\u heads
self.num\u units=num\u units
self.embed\u dim=嵌入\u dim
self.dropout\u keep\u prob=dropout\u keep\u prob
self.Wq=tf.keras.layers.density(
units=self.num\u units,activation=tf.nn.leaky\u relu,name=f“{scope\u name}\u Wq”)
self.Wk=tf.keras.layers.density(
units=self.num\u units,activation=tf.nn.leaky\u relu,name=f“{scope\u name}\u Wk”)
self.Wv=tf.keras.layers.density(
units=self.num\u units,activation=tf.nn.leaky\u relu,name=f“{scope\u name}\u Wv”)
self.has_剩余=has_剩余
self.Wz=tf.keras.layers.density(嵌入尺寸)
def调用(self、查询、键、值):
"""
:参数查询:形状[批次大小、最大长度、emb尺寸]
:参数键:形状[批次大小、最大长度、emb尺寸]
:参数值:形状[批次大小、最大长度、emb尺寸]
:返回:
"""
断言值。获取形状()作为列表()[-1]==self.embed\u dim
断言查询。获取形状()。作为列表()[-1]==self.embed\u dim
断言键\uu.get\u shape().as\u list()[-1]==self.embed\u dim
#线性投影
Q=self.Wq(查询)
K=self.Wk(键)
V=自我。Wv(值)
#裂壳
Q_uu=tf.concat(tf.split(Q,self.num_u头,轴=2),轴=0)
K_u=tf.concat(tf.split(K,self.num_u头,轴=2),轴=0)
V_uu=tf.concat(tf.split(V,self.num_u头,轴=2),轴=0)
#倍增
权重=tf.matmul(Q_,tf.transpose(K_,[0,2,1]))
#鳞片
权重=权重/(K_.get_shape().as_list()[-1]**0.5)
#转换为prob向量
权重=tf.nn.softmax(权重)
#辍学者
如果0