Image processing 将图像匹配到图像采集
我收集了大量的卡片图片,还有一张特定卡片的照片。我可以使用什么工具来查找与我最相似的收藏图像 以下是采集样本:Image processing 将图像匹配到图像采集,image-processing,cbir,Image Processing,Cbir,我收集了大量的卡片图片,还有一张特定卡片的照片。我可以使用什么工具来查找与我最相似的收藏图像 以下是采集样本: 以下是我想要找到的: 感谢您发布一些照片 我编写了一个名为“感知哈希”的算法,这是Neal Krawetz博士发现的。通过将您的图像与卡片进行比较,我得到了以下相似度百分比: Card vs. Abundance 79% Card vs. Aggressive 83% Card vs. Demystify 85% 所以,对于你的图像类型来说,它不是一个理想的鉴别器,但在
- 感谢您发布一些照片
我编写了一个名为“感知哈希”的算法,这是Neal Krawetz博士发现的。通过将您的图像与卡片进行比较,我得到了以下相似度百分比:
Card vs. Abundance 79%
Card vs. Aggressive 83%
Card vs. Demystify 85%
所以,对于你的图像类型来说,它不是一个理想的鉴别器,但在某种程度上是可行的。您可能希望使用它来为您的用例定制它
我会为你收集的每个图像计算一个哈希值,一次一个,然后只为每个图像存储一次哈希值。然后,当你得到一张新卡时,计算它的散列并将其与存储的卡进行比较
#!/bin/bash
################################################################################
# Similarity
# Mark Setchell
#
# Calculate percentage similarity of two images using Perceptual Hashing
# See article by Dr Neal Krawetz entitled "Looks Like It" - www.hackerfactor.com
#
# Method:
# 1) Resize image to black and white 8x8 pixel square regardless
# 2) Calculate mean brightness of those 64 pixels
# 3) For each pixel, store "1" if pixel>mean else store "0" if less than mean
# 4) Convert resulting 64bit string of 1's and 0's, 16 hex digit "Perceptual Hash"
#
# If finding difference between Perceptual Hashes, simply total up number of bits
# that differ between the two strings - this is the Hamming distance.
#
# Requires ImageMagick - www.imagemagick.org
#
# Usage:
#
# Similarity image|imageHash [image|imageHash]
# If you pass one image filename, it will tell you the Perceptual hash as a 16
# character hex string that you may want to store in an alternate stream or as
# an attribute or tag in filesystems that support such things. Do this in order
# to just calculate the hash once for each image.
#
# If you pass in two images, or two hashes, or an image and a hash, it will try
# to compare them and give a percentage similarity between them.
################################################################################
function PerceptualHash(){
TEMP="tmp$$.png"
# Force image to 8x8 pixels and greyscale
convert "$1" -colorspace gray -quality 80 -resize 8x8! PNG8:"$TEMP"
# Calculate mean brightness and correct to range 0..255
MEAN=$(convert "$TEMP" -format "%[fx:int(mean*255)]" info:)
# Now extract all 64 pixels and build string containing "1" where pixel > mean else "0"
hash=""
for i in {0..7}; do
for j in {0..7}; do
pixel=$(convert "${TEMP}"[1x1+${i}+${j}] -colorspace gray text: | grep -Eo "\(\d+," | tr -d '(,' )
bit="0"
[ $pixel -gt $MEAN ] && bit="1"
hash="$hash$bit"
done
done
hex=$(echo "obase=16;ibase=2;$hash" | bc)
printf "%016s\n" $hex
#rm "$TEMP" > /dev/null 2>&1
}
function HammingDistance(){
# Convert input hex strings to upper case like bc requires
STR1=$(tr '[a-z]' '[A-Z]' <<< $1)
STR2=$(tr '[a-z]' '[A-Z]' <<< $2)
# Convert hex to binary and zero left pad to 64 binary digits
STR1=$(printf "%064s" $(echo "obase=2;ibase=16;$STR1" | bc))
STR2=$(printf "%064s" $(echo "obase=2;ibase=16;$STR2" | bc))
# Calculate Hamming distance between two strings, each differing bit adds 1
hamming=0
for i in {0..63};do
a=${STR1:i:1}
b=${STR2:i:1}
[ $a != $b ] && ((hamming++))
done
# Hamming distance is in range 0..64 and small means more similar
# We want percentage similarity, so we do a little maths
similarity=$((100-(hamming*100/64)))
echo $similarity
}
function Usage(){
echo "Usage: Similarity image|imageHash [image|imageHash]" >&2
exit 1
}
################################################################################
# Main
################################################################################
if [ $# -eq 1 ]; then
# Expecting a single image file for which to generate hash
if [ ! -f "$1" ]; then
echo "ERROR: File $1 does not exist" >&2
exit 1
fi
PerceptualHash "$1"
exit 0
fi
if [ $# -eq 2 ]; then
# Expecting 2 things, i.e. 2 image files, 2 hashes or one of each
if [ -f "$1" ]; then
hash1=$(PerceptualHash "$1")
else
hash1=$1
fi
if [ -f "$2" ]; then
hash2=$(PerceptualHash "$2")
else
hash2=$2
fi
HammingDistance $hash1 $hash2
exit 0
fi
Usage
#/bin/bash
################################################################################
#相似性
#马克·塞切尔
#
#使用感知哈希计算两幅图像的相似度百分比
#见Neal Krawetz博士题为“看起来像”的文章——www.hackerfactor.com
#
#方法:
#1)将图像大小调整为黑白8x8像素正方形
#2)计算这64个像素的平均亮度
#3)对于每个像素,如果像素>平均值,则存储“1”,否则如果小于平均值,则存储“0”
#4)转换生成的1和0的64位字符串,16个十六进制数字“感知哈希”
#
#若要找出感知散列之间的差异,只需将位数相加即可
#这两个字符串之间的差异-这是汉明距离。
#
#需要ImageMagick-www.ImageMagick.org
#
#用法:
#
#相似性图像| imageHash[图像| imageHash]
#如果您传递一个图像文件名,它将告诉您感知哈希为16
#您可能希望存储在备用流中的十六进制字符串,或作为
#文件系统中支持这类功能的属性或标记。按顺序做这件事
#仅为每个图像计算一次哈希。
#
#如果传入两个图像,或两个哈希,或一个图像和一个哈希,它将尝试
#比较它们并给出它们之间的相似性百分比。
################################################################################
函数PerceptualHash(){
TEMP=“tmp$$.png”
#强制图像为8x8像素和灰度
转换“$1”-颜色空间灰色-质量80-调整大小8x8!PNG8:$TEMP”
#计算平均亮度并校正至0..255范围
平均值=$(转换“$TEMP”-格式“%[fx:int(平均值*255)]”信息:)
#现在提取所有64个像素并构建包含“1”的字符串,其中像素>平均值为“0”
hash=“”
因为{0..7}中的i;do
对于{0..7}中的j;do
像素=$(转换“${TEMP}”[1x1+${i}+${j}]-颜色空间灰色文本:| grep-Eo“\(\d+,“| tr-d'(,'))
bit=“0”
[$pixel-gt$MEAN]&&bit=“1”
hash=“$hash$bit”
完成
完成
十六进制=$(echo“obase=16;ibase=2;$hash”| bc)
printf“%016s\n”$hex
#rm“$TEMP”>/dev/null 2>&1
}
函数HammingDistance(){
#将输入十六进制字符串转换为大写,如bc所需
STR1=$(tr'[a-z]'[a-z]'我还尝试了将每张图像与卡进行归一化互相关,如下所示:
#!/bin/bash
size="300x400!"
convert card.png -colorspace RGB -normalize -resize $size card.jpg
for i in *.jpg
do
cc=$(convert $i -colorspace RGB -normalize -resize $size JPG:- | \
compare - card.jpg -metric NCC null: 2>&1)
echo "$cc:$i"
done | sort -n
我得到了这个输出(按匹配质量排序):
这表明该卡与demystify.jpg
的相关性最好
请注意,我将所有图像的大小调整为相同的大小,并将其对比度标准化,以便易于比较,并将对比度差异产生的影响降至最低。将它们缩小也可以减少相关性所需的时间。新方法!
似乎下面的ImageMagick命令,或者它的一个变体,取决于查看更多的图像选择,将提取卡顶部的文字
convert aggressiveurge.jpg -crop 80%x10%+10%+10% crop.png
它获取图像的顶部10%和宽度的80%(从左上角的10%开始,并将其存储在crop.png
中,如下所示:
tesseract crop.png agg
如果您通过tesseract
OCR运行该命令,如下所示:
tesseract crop.png agg
您会得到一个名为agg.txt
的文件,其中包含:
E‘ Aggressive Urge \L® E
您可以通过grep
进行清理,只查找彼此相邻的大写和小写字母:
grep -Eo "\<[A-Za-z]+\>" agg.txt
:-)我尝试将图像数据排列为一个向量,并在收集的图像向量和搜索的图像向量之间取内积。最相似的向量将得到最高的内积。我将所有图像调整为相同大小,以获得相等长度的向量,以便取内积。此调整大小将增加合理地减少内积计算成本,并给出实际图像的粗略近似值
你可以用Matlab或Octave快速检查。下面是Matlab/Octave脚本。我在那里添加了注释。我尝试将变量mult从1变为8(你可以尝试任何整数值),对于所有这些情况,image Demystify与卡片图像的内积最高。对于mult=8,我在Matlab中得到以下ip向量:
知识产权=
683007892
558305537
604013365
如您所见,它为图像去神秘化提供了最高的内积683007892
% load images
imCardPhoto = imread('0.png');
imDemystify = imread('1.jpg');
imAggressiveUrge = imread('2.jpg');
imAbundance = imread('3.jpg');
% you can experiment with the size by varying mult
mult = 8;
size = [17 12]*mult;
% resize with nearest neighbor interpolation
smallCardPhoto = imresize(imCardPhoto, size);
smallDemystify = imresize(imDemystify, size);
smallAggressiveUrge = imresize(imAggressiveUrge, size);
smallAbundance = imresize(imAbundance, size);
% image collection: each image is vectorized. if we have n images, this
% will be a (size_rows*size_columns*channels) x n matrix
collection = [double(smallDemystify(:)) ...
double(smallAggressiveUrge(:)) ...
double(smallAbundance(:))];
% vectorize searched image. this will be a (size_rows*size_columns*channels) x 1
% vector
x = double(smallCardPhoto(:));
% take the inner product of x and each image vector in collection. this
% will result in a n x 1 vector. the higher the inner product is, more similar the
% image and searched image(that is x)
ip = collection' * x;
编辑
我尝试了另一种方法,基本上是采用参考图像和卡片图像之间的欧几里德距离(l2范数),这给了我一个非常好的结果,我在这里找到了大量的参考图像(383张图像),用于测试卡片图像
在这里,我没有拍摄整个图像,而是提取了包含图像的上半部分,并将其用于比较
在以下步骤中,在进行任何处理之前,将所有训练图像和测试图像的大小调整为预定义大小
- 从训练图像中提取图像区域
- 在上执行形态学关闭
% load images
imCardPhoto = imread('0.png');
imDemystify = imread('1.jpg');
imAggressiveUrge = imread('2.jpg');
imAbundance = imread('3.jpg');
% you can experiment with the size by varying mult
mult = 8;
size = [17 12]*mult;
% resize with nearest neighbor interpolation
smallCardPhoto = imresize(imCardPhoto, size);
smallDemystify = imresize(imDemystify, size);
smallAggressiveUrge = imresize(imAggressiveUrge, size);
smallAbundance = imresize(imAbundance, size);
% image collection: each image is vectorized. if we have n images, this
% will be a (size_rows*size_columns*channels) x n matrix
collection = [double(smallDemystify(:)) ...
double(smallAggressiveUrge(:)) ...
double(smallAbundance(:))];
% vectorize searched image. this will be a (size_rows*size_columns*channels) x 1
% vector
x = double(smallCardPhoto(:));
% take the inner product of x and each image vector in collection. this
% will result in a n x 1 vector. the higher the inner product is, more similar the
% image and searched image(that is x)
ip = collection' * x;
#include <opencv2/opencv.hpp>
#include <iostream>
#include <algorithm>
#include <string.h>
#include <windows.h>
using namespace cv;
using namespace std;
#define INPUT_FOLDER_PATH string("Your test image folder path")
#define TRAIN_IMG_FOLDER_PATH string("Your training image folder path")
void search()
{
WIN32_FIND_DATA ffd;
HANDLE hFind = INVALID_HANDLE_VALUE;
vector<Mat> images;
vector<string> labelNames;
int label = 0;
double scale = .2; // you can experiment with scale
Size imgSize(200*scale, 285*scale); // training sample images are all 200 x 285 (width x height)
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
// get all training samples in the directory
hFind = FindFirstFile((TRAIN_IMG_FOLDER_PATH + string("*")).c_str(), &ffd);
if (INVALID_HANDLE_VALUE == hFind)
{
cout << "INVALID_HANDLE_VALUE: " << GetLastError() << endl;
return;
}
do
{
if (!(ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
{
Mat im = imread(TRAIN_IMG_FOLDER_PATH+string(ffd.cFileName));
Mat re;
resize(im, re, imgSize, 0, 0); // resize the image
// extract only the upper part that contains the image
Mat roi = re(Rect(re.cols*.1, re.rows*35/285.0, re.cols*.8, re.rows*125/285.0));
// get a coarse approximation
morphologyEx(roi, roi, MORPH_CLOSE, kernel);
images.push_back(roi.reshape(1)); // vectorize the roi
labelNames.push_back(string(ffd.cFileName));
}
}
while (FindNextFile(hFind, &ffd) != 0);
// load the test image, apply the same preprocessing done for training images
Mat test = imread(INPUT_FOLDER_PATH+string("0.png"));
Mat re;
resize(test, re, imgSize, 0, 0);
Mat roi = re(Rect(re.cols*.1, re.rows*35/285.0, re.cols*.8, re.rows*125/285.0));
morphologyEx(roi, roi, MORPH_CLOSE, kernel);
Mat testre = roi.reshape(1);
struct imgnorm2_t
{
string name;
double norm2;
};
vector<imgnorm2_t> imgnorm;
for (size_t i = 0; i < images.size(); i++)
{
imgnorm2_t data = {labelNames[i],
norm(images[i], testre) /* take the l2-norm (euclidean distance) */};
imgnorm.push_back(data); // store data
}
// sort stored data based on euclidean-distance in the ascending order
sort(imgnorm.begin(), imgnorm.end(),
[] (imgnorm2_t& first, imgnorm2_t& second) { return (first.norm2 < second.norm2); });
for (size_t i = 0; i < imgnorm.size(); i++)
{
cout << imgnorm[i].name << " : " << imgnorm[i].norm2 << endl;
}
}