基于CUDA的线路检测
我正在尝试使用CUDA进行实时线路检测。我计算了hough变换以及每个箱子的最小、最大线坐标。为了得到我正在跟踪的线段(使用Bresenham的直线算法),通过从最小到最大的点,得到每个箱子上的线段。当hough阈值较低且图像中存在大量线时,跟踪线需要花费大量时间才能完成 在GTX 660上,hough变换(hough_line_变换)计算每帧大约需要5-10ms(1280x720)(观察到比CPU实现快10倍)。但从最小点到最大点追踪线段需要1ms-15ms 我有两个关于在线检测的问题基于CUDA的线路检测,cuda,hough-transform,Cuda,Hough Transform,我正在尝试使用CUDA进行实时线路检测。我计算了hough变换以及每个箱子的最小、最大线坐标。为了得到我正在跟踪的线段(使用Bresenham的直线算法),通过从最小到最大的点,得到每个箱子上的线段。当hough阈值较低且图像中存在大量线时,跟踪线需要花费大量时间才能完成 在GTX 660上,hough变换(hough_line_变换)计算每帧大约需要5-10ms(1280x720)(观察到比CPU实现快10倍)。但从最小点到最大点追踪线段需要1ms-15ms 我有两个关于在线检测的问题 是否有
#ifndef _HOUGH_LINES_H_
#define _HOUGH_LINES_H_
#include <cuda_gl_interop.h>
#include <thrust/device_vector.h>
union Pos;
struct Line;
struct Hough_params
{
int w;
int h;
int r;
};
class Hough_lines
{
public:
enum Type {INT, SHORT_INT, FLOAT};
Hough_lines(int _w, int _h);
~Hough_lines();
public:
bool init();
bool detect_lines(GLuint tex_edge, int threshold, int min_length, int min_gap, GLuint line, Type type, int& count);
protected:
void get_edges(thrust::device_vector<Pos>& d_coords, int& size);
void get_hough_lines(int threshold, thrust::device_vector<Line>& d_lines, int& size);
void get_lines(int threshold, int min_length, int min_gap, GLuint line, Hough_lines::Type type, int& count);
void trace_all_lines(int min_len, int min_gap, thrust::device_vector<Line>& d_lines, int size, int* d_line_coord, int& count);
static void compute_trig_funcs();
protected:
Hough_params params;
thrust::device_vector<Hough_params> d_param;
static bool trig_init;
};
#endif
\ifndef\u HOUGH\u行_
#定义线_
#包括
#包括
联合销售点;
结构线;
结构Hough_参数
{
int w;
int-h;
INTR;
};
类Hough_线
{
公众:
枚举类型{INT,SHORT_INT,FLOAT};
Hough_线(int_w,int_h);
~Hough_line();
公众:
bool init();
布尔检测线(胶合纹理边缘、整数阈值、整数最小长度、整数最小间隙、胶合线、类型、整数和计数);
受保护的:
无效获取边(推力::设备向量和坐标、整数和大小);
void get_hough_线(int阈值,推力::设备向量和d_线,int和大小);
void get_行(int threshold、int minu length、int minu gap、GLuint行、Hough_行::Type Type、int&count);
无效跟踪所有线(int-min-len、int-min-gap、推力::设备向量和d线、int-size、int*d线坐标、int&count);
静态void compute_trig_funcs();
受保护的:
霍夫参数;
推力:设备矢量d参数;
静态布尔触发器初始化;
};
#恩迪夫
身体
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#定义角度大小360
#定义每个线程的最大线10
联合销售点
{
结构
{
uint16_t x;
uint16_t y;
};
uint32_t值;
};
结构Hough_信息
{
Pos端;
Pos启动;
整数计数;
};
结构线
{
Pos启动;
Pos端;
};
结构行信息
{
整数行计数;
线-线[每螺纹的最大线];
};
__恒定浮动偏差[角度大小];
__固定浮动开发成本[角度大小];
纹理纹理纹理;
bool-Hough_线::trig_init=false;
__全局无效标记边(常量Hough参数*参数,整数*边)
{
intx=(blockIdx.x*blockDim.x+threadIdx.x);
int y=(blockIdx.y*blockDim.y+threadIdx.y);
int pos=x+(参数->w*y);
边[pos]=(255==tex2D(luma_-tex,x,y))?1:0;
}
__全局无效获取坐标(常数Hough参数*参数,整数*边,位置*坐标)
{
整数指数;
intx=(blockIdx.x*blockDim.x+threadIdx.x);
int y=(blockIdx.y*blockDim.y+threadIdx.y);
int pos=x+(参数->w*y);
if(255==tex2D(luma_-tex,x,y))
{
索引=边[pos];
坐标[index].y=y;
坐标[index].x=x;
}
}
__全局无效hough线变换(常数hough参数*参数,整数大小,常数位置*坐标,整数阈值,整数*标记,hough信息*输出)
{
int i;
内角;
int rdata;
__共享的Hough信息sh_rho_数据[1001];
i=threadIdx.x;
而(ir)
{
sh_rho_data[i].end.value=0x0;
sh_rho_data[i].start.value=0xFFFFFFFF;
sh_rho_数据[i].计数=0;
i+=blockDim.x;
}
__同步线程();
i=threadIdx.x;
角度=块IDx.x;
常数浮动cos_角度=偏差成本[角度];
常数浮点正弦角=偏差正弦角;
而(iw>>1))*cos_角)+(浮点)((参数->h>>1)-coord[i].y)*sin_角);
如果(rdata>=0)
{
atomicMax(&sh_rho_data[rdata].end.value,coord[i].value);
atomicMin(&sh_rho_data[rdata].start.value,coord[i].value);
原子添加(&sh_rho_数据[rdata]。计数,1);
}
i+=blockDim.x;
}
__同步线程();
i=threadIdx.x;
rdata=(角度*参数->r);
而(ir)
{
memcpy(&out[rdata+i],&sh_rho_数据[i],sizeof(Hough_信息));
标记[rdata+i]=(sh_rho_数据[i]。计数>=阈值)?1:0;
i+=blockDim.x;
}
}
__全局无效获取行(常量Hough参数*param,int阈值,Hough信息*hdata,int*标记,行*Line)
{
int pos;
int i=threadIdx.x;
int offset=(blockIdx.x*param->r);
而(ir)
{
如果(hdata[offset+i]。计数>=阈值)
{
pos=标记[偏移量+i];
行[pos].start.value=hdata[offset+i].start.value;
行[pos].end.value=hdata[offset+i].end.value;
}
i+=blockDim.x;
}
}
__设备无效添加线(int-xs、int-ys、int-xe、int-ye、int-min\u-len、line\u-info*line)
{
intd=abs(xexs)+abs(yeys);
如果((d>=最小长度)和&(线->线计数<每线最大线数))
{
行->行[行->行计数].start.x=xs;
行->行[行->行计数].start.y=ys;
行->行[行->行计数].end.x=xe;
行->行[行->行计数].end.y=ye;
++行->行计数;
//printf(“\n(%d%d)(%d%d)%d”,xs,ys,xe,ye,d);
}
}
__全局无效跟踪线(常量线*输入,整数输入大小,整数最小长度,整数最小间距,线信息*线信息,整数*标记)
{
int d;
int dsub;
int dstep;
int-xstep;
int ystep;
int xs,ys,xe,ye;
inti=(blockIdx.x*blockDim.x+threadIdx.x);
如果(i>=inp\u尺寸)
{
返回;
}
xs=输入[i].start.x;
ys=输入[i]。开始。y;
xe=输入[i].end.x;
ye=输入[i]。结束。y;
行信息[i]。行计数=0;
int dx=abs(xe-xs);
int-dy=abs(ye-ys);
int
#include <hough_lines.h>
#include <math.h>
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/scan.h>
#define ANGLE_SIZE 360
#define MAX_LINE_PER_THREAD 10
union Pos
{
struct
{
uint16_t x;
uint16_t y;
};
uint32_t value;
};
struct Hough_info
{
Pos end;
Pos start;
int count;
};
struct Line
{
Pos start;
Pos end;
};
struct Line_info
{
int line_count;
Line line[MAX_LINE_PER_THREAD];
};
__constant__ float dev_sint[ANGLE_SIZE];
__constant__ float dev_cost[ANGLE_SIZE];
texture<uint8_t, 2, cudaReadModeElementType> luma_tex;
bool Hough_lines::trig_init = false;
__global__ void mark_edges(const Hough_params* param, int* edge)
{
int x = (blockIdx.x*blockDim.x+threadIdx.x);
int y = (blockIdx.y*blockDim.y+threadIdx.y);
int pos = x+(param->w*y);
edge[pos] = (255 == tex2D(luma_tex, x, y))?1:0;
}
__global__ void get_coords(const Hough_params* param, int* edge, Pos* coord)
{
int index;
int x = (blockIdx.x*blockDim.x+threadIdx.x);
int y = (blockIdx.y*blockDim.y+threadIdx.y);
int pos = x+(param->w*y);
if (255 == tex2D(luma_tex, x, y))
{
index = edge[pos];
coord[index].y = y;
coord[index].x = x;
}
}
__global__ void hough_line_transform(const Hough_params* param, int size, const Pos* coord, int threshold, int *mark, Hough_info* out)
{
int i;
int angle;
int rdata;
__shared__ Hough_info sh_rho_data[1001];
i = threadIdx.x;
while (i < param->r)
{
sh_rho_data[i].end.value = 0x0;
sh_rho_data[i].start.value = 0xFFFFFFFF;
sh_rho_data[i].count = 0;
i += blockDim.x;
}
__syncthreads();
i = threadIdx.x;
angle = blockIdx.x;
const float cos_angle = dev_cost[angle];
const float sin_angle = dev_sint[angle];
while (i < size)
{
rdata = (int)ceil(((float)(coord[i].x-(param->w>>1))*cos_angle)+((float)((param->h>>1)-coord[i].y)*sin_angle));
if (rdata >= 0)
{
atomicMax(&sh_rho_data[rdata].end.value, coord[i].value);
atomicMin(&sh_rho_data[rdata].start.value, coord[i].value);
atomicAdd(&sh_rho_data[rdata].count, 1);
}
i += blockDim.x;
}
__syncthreads();
i = threadIdx.x;
rdata = (angle*param->r);
while (i < param->r)
{
memcpy(&out[rdata+i], &sh_rho_data[i], sizeof(Hough_info));
mark[rdata+i] = (sh_rho_data[i].count >= threshold)?1:0;
i += blockDim.x;
}
}
__global__ void get_lines(const Hough_params* param, int threshold, Hough_info* hdata, int* mark, Line* lines)
{
int pos;
int i = threadIdx.x;
int offset = (blockIdx.x*param->r);
while (i < param->r)
{
if (hdata[offset+i].count >= threshold)
{
pos = mark[offset+i];
lines[pos].start.value = hdata[offset+i].start.value;
lines[pos].end.value = hdata[offset+i].end.value;
}
i += blockDim.x;
}
}
__device__ void add_line(int xs, int ys, int xe, int ye, int min_len, Line_info* line)
{
int d = abs(xe-xs)+abs(ye-ys);
if ((d >= min_len) && (line->line_count < MAX_LINE_PER_THREAD))
{
line->line[line->line_count].start.x = xs;
line->line[line->line_count].start.y = ys;
line->line[line->line_count].end.x = xe;
line->line[line->line_count].end.y = ye;
++line->line_count;
//printf("\n(%d %d) (%d %d) %d", xs, ys, xe, ye, d);
}
}
__global__ void trace_lines(const Line* input, int inp_size, int min_len, int min_gap, Line_info* line_info, int* mark)
{
int d;
int dsub;
int dstep;
int xstep;
int ystep;
int xs, ys, xe, ye;
int i = (blockIdx.x*blockDim.x+threadIdx.x);
if (i >= inp_size)
{
return;
}
xs = input[i].start.x;
ys = input[i].start.y;
xe = input[i].end.x;
ye = input[i].end.y;
line_info[i].line_count = 0;
int dx = abs(xe-xs);
int dy = abs(ye-ys);
int xinc = (xe > xs)?1:-1;
int yinc = (ye > ys)?1:-1;
int gap = 0;
bool sflag;
int s_x, s_y, e_x, e_y;
if (dx > dy)
{
dsub = (dx<<1);
dstep = (dy<<1);
d = dstep-dx;
xstep = xinc;
ystep = 0;
xinc = 0;
}
else
{
dsub = (dy<<1);
dstep = (dx<<1);
d = dstep-dy;
xstep = 0;
ystep = yinc;
yinc = 0;
}
sflag = true;
s_x = xs;
s_y = ys;
e_x = xs;
e_y = ys;
int x = xs;
int y = ys;
while ((abs(x-xs) <= dx) && (abs(y-ys) <= dy))
{
x += xstep;
y += ystep;
if (d > 0)
{
x += xinc;
y += yinc;
d -= dsub;
}
d += dstep;
if (255 == tex2D(luma_tex, x, y))
{
e_x = x;
e_y = y;
gap = 0;
if (!sflag)
{
s_x = x;
s_y = y;
sflag = true;
}
}
else if (sflag)
{
++gap;
if (gap >= min_gap)
{
sflag = false;
add_line(s_x, s_y, e_x, e_y, min_len, &line_info[i]);
}
}
}
if (sflag)
{
add_line(s_x, s_y, xe, ye, min_len, &line_info[i]);
}
mark[i] = line_info[i].line_count;
}
__global__ void copy_line_coords(const Hough_params* param, Line_info* line, int size, int* mark, int* coords, int* count)
{
int index = (blockIdx.x*blockDim.x+threadIdx.x);
if (index >= size)
{
return;
}
int pos;
int start = 4*mark[index];
Line* line_data = &line[index].line[0];
for (int i = 0; i < line[index].line_count; i++)
{
pos = start+(4*i);
coords[pos] = line_data[i].start.x-(param->w>>1);
coords[pos+1] = (param->h>>1)-line_data[i].start.y;
coords[pos+2] = line_data[i].end.x-(param->w>>1);
coords[pos+3] = (param->h>>1)-line_data[i].end.y;
}
if ((index+1) == size)
{
*count = mark[index];
}
}
Hough_lines::Hough_lines(int _w, int _h)
:d_param(1)
{
params.w = _w;
params.h = _h;
params.r = (int)ceil(0.5*sqrt((_w*_w)+(_h*_h)));
thrust::copy_n(¶ms, 1, d_param.begin());
}
Hough_lines::~Hough_lines()
{
}
bool Hough_lines::init()
{
if (false == trig_init)
{
trig_init = true;
compute_trig_funcs();
}
return true;
}
void Hough_lines::compute_trig_funcs()
{
float theta;
cudaError_t err = cudaSuccess;
static float sint[ANGLE_SIZE];
static float cost[ANGLE_SIZE];
for (int i = 0; i < ANGLE_SIZE; i++)
{
theta = (M_PI*(float)i)/180.0;
sint[i] = sin(theta);
cost[i] = cos(theta);
}
err = cudaMemcpyToSymbol(dev_sint, sint, ANGLE_SIZE*sizeof(float));
err = (cudaSuccess == err) ? cudaMemcpyToSymbol(dev_cost, cost, ANGLE_SIZE*sizeof(float)):err;
if (cudaSuccess != err)
{
printf("\n%s", cudaGetErrorString(cudaGetLastError()));
}
}
void Hough_lines::get_edges(thrust::device_vector<Pos>& d_coords, int& size)
{
dim3 bsize(16, 16);
dim3 gsize(params.w/bsize.x, params.h/bsize.y);
thrust::device_vector<int> d_mark(params.w*params.h);
size = 0;
mark_edges<<<gsize, bsize>>>(thrust::raw_pointer_cast(d_param.data()),
thrust::raw_pointer_cast(d_mark.data()));
thrust::exclusive_scan(d_mark.begin(), d_mark.end(), d_mark.begin());
get_coords<<<gsize, bsize>>>(thrust::raw_pointer_cast(d_param.data()),
thrust::raw_pointer_cast(d_mark.data()),
thrust::raw_pointer_cast(d_coords.data()));
thrust::copy_n(d_mark.begin()+d_mark.size()-1, 1, &size);
}
void Hough_lines::get_hough_lines(int threshold, thrust::device_vector<Line>& d_lines, int& size)
{
int edge_count = 0;
thrust::device_vector<Pos> d_coords(params.w*params.h);
get_edges(d_coords, edge_count);
thrust::device_vector<int> d_mark(params.r*360);
thrust::device_vector<Hough_info> d_hough_data(params.r*360);
hough_line_transform<<<360, 256>>>(thrust::raw_pointer_cast(d_param.data()),
edge_count,
thrust::raw_pointer_cast(d_coords.data()), threshold,
thrust::raw_pointer_cast(d_mark.data()),
thrust::raw_pointer_cast(d_hough_data.data()));
thrust::exclusive_scan(d_mark.begin(), d_mark.end(), d_mark.begin());
::get_lines<<<360, 256>>>(thrust::raw_pointer_cast(d_param.data()),
threshold,
thrust::raw_pointer_cast(d_hough_data.data()),
thrust::raw_pointer_cast(d_mark.data()),
thrust::raw_pointer_cast(d_lines.data()));
thrust::copy_n(d_mark.begin()+d_mark.size()-1, 1, &size);
}
void Hough_lines::trace_all_lines(int min_len, int min_gap, thrust::device_vector<Line>& d_lines, int size, int* d_line_coord, int& count)
{
thrust::device_vector<int> d_mark_line(size);
thrust::device_vector<Line_info> d_nlines(size);
trace_lines<<<(1+(size/512)), 512>>>(thrust::raw_pointer_cast(d_lines.data()),
size, min_len, min_gap, thrust::raw_pointer_cast(d_nlines.data()),
thrust::raw_pointer_cast(d_mark_line.data()));
thrust::exclusive_scan(d_mark_line.begin(), d_mark_line.end(), d_mark_line.begin());
thrust::device_vector<int> d_count(1);
copy_line_coords<<<(1+(size/512)), 512>>>(thrust::raw_pointer_cast(d_param.data()),
thrust::raw_pointer_cast(d_nlines.data()), size,
thrust::raw_pointer_cast(d_mark_line.data()), d_line_coord,
thrust::raw_pointer_cast(d_count.data()));
thrust::copy(d_count.begin(), d_count.end(), &count);
//printf("\nLine count: %d", count);
}
void Hough_lines::get_lines(int threshold, int min_len, int min_gap, GLuint line, Hough_lines::Type type, int& count)
{
int* d_line_coord = 0;
cudaGLRegisterBufferObject(line);
cudaGLMapBufferObject((void **)&d_line_coord, line);
int size = 0;
thrust::device_vector<Line> d_lines(params.r*360);
get_hough_lines(threshold, d_lines, size);
//printf("\nget_hough_lines: %d", size);
trace_all_lines(min_len, min_gap, d_lines, size, d_line_coord, count);
cudaGLUnmapBufferObject(line);
cudaGLUnregisterBufferObject(line);
}
bool Hough_lines::detect_lines(GLuint tex_edge, int threshold, int min_length, int min_gap, GLuint line, Hough_lines::Type type, int& count)
{
cudaError_t err;
cudaArray* array_edge;
cudaGraphicsResource* res_edge;
err = cudaGraphicsGLRegisterImage(&res_edge, tex_edge, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsReadOnly);
if (err != cudaSuccess)
{
printf("cudaGraphicsGLRegisterImage Failed: %s", cudaGetErrorString(cudaGetLastError()));
exit(0);
}
cudaGraphicsMapResources(1, &res_edge);
cudaChannelFormatDesc chan_desc = cudaCreateChannelDesc<uint8_t>();
err = cudaGraphicsSubResourceGetMappedArray(&array_edge, res_edge, 0, 0);
if (err != cudaSuccess)
{
printf("cudaGraphicsSubResourceGetMappedArray Failed: %s", cudaGetErrorString(cudaGetLastError()));
exit(0);
}
if (cudaBindTextureToArray(&luma_tex, array_edge, &chan_desc) != cudaSuccess)
{
printf("Failed to bind texture - %s\n", cudaGetErrorString(cudaGetLastError()));
exit(0);
}
float time = 0.0;
//static float max = 0.0;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
count = 0;
get_lines(threshold, min_length, min_gap, line, type, count);
cudaEventRecord(stop);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
//static int frame = 0;
//frame++;
//if (time > max)
{
//max = time;
printf("\nElpased time: %f ms", time);
}
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaUnbindTexture(luma_tex);
cudaGraphicsUnmapResources(1, &res_edge);
cudaGraphicsUnregisterResource(res_edge);
return true;
}