Python 利用三重态损耗预测图像
我是新来的 我使用三重态丢失方法建立了一个用于图像理解的神经网络 我认为我缺少一些关于如何使用这种方法预测图像标签的基本知识 在我建立了模型之后,我应该如何预测样本图像? 因为我的模型输入是一个三元组-三元组应该用什么来构造 至于理论,我认为我应该以某种方式得到测试图像的嵌入矩阵,然后使用k=1的knn得到最近的嵌入。但我不知道如何在实践中做到这一点 我的代码正在运行并生成模型:Python 利用三重态损耗预测图像,python,tensorflow,keras,neural-network,Python,Tensorflow,Keras,Neural Network,我是新来的 我使用三重态丢失方法建立了一个用于图像理解的神经网络 我认为我缺少一些关于如何使用这种方法预测图像标签的基本知识 在我建立了模型之后,我应该如何预测样本图像? 因为我的模型输入是一个三元组-三元组应该用什么来构造 至于理论,我认为我应该以某种方式得到测试图像的嵌入矩阵,然后使用k=1的knn得到最近的嵌入。但我不知道如何在实践中做到这一点 我的代码正在运行并生成模型: import numpy as np import random import os import imageio
import numpy as np
import random
import os
import imageio
import matplotlib.pyplot as plt
import pandas as pd
from time import time
import tensorflow as tf
tf.set_random_seed(1)
from PIL import Image
from keras.models import Model
from keras.layers import Input, Lambda, concatenate
from keras.optimizers import Adam
from keras import backend as K
from keras.layers import Conv2D, PReLU, Flatten, Dense
ALPHA = 0.2 # Triplet Loss Parameter
def get_triplets(features):
df_features = pd.DataFrame(features)
triplets = []
for index, row in df_features.iterrows():
same_tag = df_features.loc[df_features.iloc[:, -1] == row.iloc[-1]]
same_tag_indexes = list(set(same_tag.index) - {index})
diff_tag_indexes = list(set(df_features.index) - set(same_tag_indexes) - {index})
anchor = row.iloc[0]
anchor = anchor.reshape(-1, anchor.shape[0], anchor.shape[1], anchor.shape[2])
pos = df_features.iloc[random.choice(same_tag_indexes), :].iloc[0]
pos = pos.reshape(-1, pos.shape[0], pos.shape[1], pos.shape[2])
neg = df_features.iloc[random.choice(diff_tag_indexes), :].iloc[0]
neg = neg.reshape(-1, neg.shape[0], neg.shape[1], neg.shape[2])
triplets.append(list(list([anchor, pos, neg])))
return np.array(triplets)
def triplet_loss(x):
anchor, positive, negative = tf.split(x, 3, axis=1)
pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), ALPHA)
loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
return loss
# When fitting the model (i.e., model.fit()); use as an input [anchor_example,
# positive_example, negative_example] in that order and as an output zero.
# The reason to use the output as zero is that you are trying to minimize the
# triplet loss as much as possible and the minimum value of the loss is zero.
def create_embedding_network(input_shape):
input_shape = Input(input_shape)
x = Conv2D(32, (3, 3))(input_shape)
x = PReLU()(x)
x = Conv2D(64, (3, 3))(x)
x = PReLU()(x)
x = Flatten()(x)
x = Dense(10, activation='softmax')(x)
model = Model(inputs=input_shape, outputs=x)
return model
anchor_embedding = None
# Builds an embedding for each example (i.e., positive, negative, anchor)
# Then calculates the triplet loss between their embedding.
# Then applies identity loss on the triplet loss value to minimize it on training.
def build_model(input_shape):
global anchor_embedding
# Standardizing the input shape order
K.set_image_data_format('channels_last')
positive_example = Input(shape=input_shape)
negative_example = Input(shape=input_shape)
anchor_example = Input(shape=input_shape)
# Create Common network to share the weights along different examples (+/-/Anchor)
embedding_network = create_embedding_network(input_shape)
positive_embedding = embedding_network(positive_example)
negative_embedding = embedding_network(negative_example)
anchor_embedding = embedding_network(anchor_example)
# loss = merge([anchor_embedding, positive_embedding, negative_embedding],
# mode=triplet_loss, output_shape=(1,))
merged_output = concatenate([anchor_embedding, positive_embedding, negative_embedding])
loss = Lambda(triplet_loss, (1,))(merged_output)
model = Model(inputs=[anchor_example, positive_example, negative_example],
outputs=loss)
model.compile(loss='mean_absolute_error', optimizer=Adam())
return model
#start_time = time()
numOfPhotosPerTag = 10
#Change this line to your own drive path
baseDir = "C:/Intelligent systems/DNN/images/"
imagesHashtags = ["beer", "bigcity"]
imagesDir = [baseDir + str(x) for x in imagesHashtags]
images = ["/" + str(x) + ".jpg" for x in range(1, numOfPhotosPerTag + 1)]
allImages = []
for x in imagesDir:
allImages += [x + loc for loc in images]
imageio.imread(allImages[0], pilmode="RGB").shape
data = []
for x in allImages:
image = imageio.imread(x, pilmode="RGB")
tag = x.split('/')[-2]
data.append((image, tag))
data = np.array(data)
triplets = get_triplets(data)
model = build_model((256, 256, 3))
#model.fit(triplets, y=np.zeros(len(triplets)), batch_size=1)
for i in range(len(data)):
model.fit(list(triplets[0]), y=[0], batch_size=1, verbose=10)
如果你已经正确地训练了你的
嵌入网络
,你现在就不需要再使用三胞胎了。基本上,三重态损失概念的全部要点是学习与预定义度量兼容的嵌入(例如,通常只是欧几里德距离),然后使用此嵌入进行简单的
KNN
分类,如您所述。因此,获取标记的数据并通过
嵌入网络传递所有点
现在在(低维?)空间中有一组点,其中“闭合点”属于同一类。同样,这取决于数据、培训的成功程度等。
接下来要做的自然事情是通过相同的嵌入网络
,并将其距离与嵌入空间中的标记点进行比较。
KNN是一个可行的分类解决方案,但真正的问题是,您的数据已被非常非线性地转换为一个“舒适”的空间,其中许多经典和简单的方法将更容易工作;聚类、分类,你可以说。
希望这有帮助,祝你好运 如果使用name=
标记模型的“正常”部分,则可以提取所需的层。为此,我们使用以下代码:
def triplet2normal(model, keep_str='pos', out='score'):
""" take a triplet model, keep half of the model """
new_out_layer_name = next(model.name for model in model.layers if keep_str in model.name and out in model.name)
model_half = Model(inputs=[i for i in model.input if keep_str in i.name],
outputs=model.get_layer(new_out_layer_name).output
)
return model_half
如果模型为任何三线模型-以下示例用于推荐,例如movielens装置:
# Input placeholders
positive_item_input = Input((1,), name='pos_item_input')
negative_item_input = Input((1,), name='neg_item_input')
user_input = Input((1,), name='pos_neg_user_input')
# Embedding layers for the items and for users
item_embedding_layer = Embedding(num_items, latent_dim, name='pos_neg_item_embedding', input_length=1)
user_embedding_layer = Embedding(num_users, latent_dim, name='pos_neg_user_embedding', input_length=1)
# Flatten the embedding layers
positive_item_embedding = Flatten(name='pos_item_embedded')(item_embedding_layer(positive_item_input))
negative_item_embedding = Flatten(name='neg_item_embedded')(item_embedding_layer(negative_item_input))
user_embedding = Flatten(name='pos_neg_user_embedded')(user_embedding_layer(user_input))
# Dot product - Matrix factorization
positive_scores = Dot(axes=1, name='positive_scores')([user_embedding, positive_item_embedding])
negative_scores = Dot(axes=1, name='negative_scores')([user_embedding, negative_item_embedding])
# Compare scores
delta_scores_1 = Subtract(name='delta_scores')([negative_scores, positive_scores])
loss = Activation('sigmoid')(delta_scores_1)
# Define model
model = Model(
inputs=[user_input, positive_item_input, negative_item_input],
outputs=loss,
)