C++ C/C+中的双线性插值+；和库达_C++_Cuda

C++ C/C+中的双线性插值+；和库达

c++ cuda

C++ C/C+中的双线性插值+；和库达,c++,cuda,C++,Cuda,我想在CPU上模拟CUDA双线性插值的行为，但我发现tex2D的返回值似乎不适合该函数我猜将插值系数从float转换为9-位固定点格式，使用8位的分数值会产生不同的值根据转换公式，当系数为1/2^n，且n=0,1，…，8时，转换结果将与输入的float相同，但我仍然（并非总是）收到奇怪的值下面我报告一个奇怪值的例子。在这种情况下，id=2*n+1时总是会出现奇怪的值，有人能告诉我为什么吗 Src数组： Src[0][0] = 38; Src[1][0] = 39; Src[0]

我想在CPU上模拟CUDA双线性插值的行为，但我发现

tex2D

的返回值似乎不适合该函数

我猜将插值系数从

float

转换为

-位固定点格式，使用

位的分数值会产生不同的值

根据转换公式，当系数为

1/2^n

，且

n=0,1，…，8

时，转换结果将与输入的

float

相同，但我仍然（并非总是）收到奇怪的值

下面我报告一个奇怪值的例子。在这种情况下，id=2*n+1
时总是会出现奇怪的值，有人能告诉我为什么吗
Src数组：

Src[0][0] = 38; Src[1][0] = 39; Src[0][1] = 118; Src[1][1] = 13;

static texture<float4, 2, cudaReadModeElementType> texElnt; texElnt.addressMode[0] = cudaAddressModeClamp; texElnt.addressMode[1] = cudaAddressModeClamp; texElnt.filterMode = cudaFilterModeLinear; texElnt.normalized = false;

static __global__ void kernel_texElnt(float* pdata, int w, int h, int c, float stride/*0.03125f*/) { const int gx = blockIdx.x*blockDim.x + threadIdx.x; const int gy = blockIdx.y*blockDim.y + threadIdx.y; const int gw = gridDim.x * blockDim.x; const int gid = gy*gw + gx; if (gx >= w || gy >= h) { return; } float2 pnt; pnt.x = (gx)*(stride)/*1/32*/; pnt.y = 0.0625f/*1/16*/; float4 result = tex2D( texElnt, pnt.x + 0.5, pnt.y + 0.5f); pdata[gid*3 + 0] = pnt.x; pdata[gid*3 + 1] = pnt.y; pdata[gid*3 + 2] = result.x; }
纹理定义：

Src[0][0] = 38; Src[1][0] = 39; Src[0][1] = 118; Src[1][1] = 13;

static texture<float4, 2, cudaReadModeElementType> texElnt; texElnt.addressMode[0] = cudaAddressModeClamp; texElnt.addressMode[1] = cudaAddressModeClamp; texElnt.filterMode = cudaFilterModeLinear; texElnt.normalized = false;

static __global__ void kernel_texElnt(float* pdata, int w, int h, int c, float stride/*0.03125f*/) { const int gx = blockIdx.x*blockDim.x + threadIdx.x; const int gy = blockIdx.y*blockDim.y + threadIdx.y; const int gw = gridDim.x * blockDim.x; const int gid = gy*gw + gx; if (gx >= w || gy >= h) { return; } float2 pnt; pnt.x = (gx)*(stride)/*1/32*/; pnt.y = 0.0625f/*1/16*/; float4 result = tex2D( texElnt, pnt.x + 0.5, pnt.y + 0.5f); pdata[gid*3 + 0] = pnt.x; pdata[gid*3 + 1] = pnt.y; pdata[gid*3 + 2] = result.x; }
CUDA的双线性结果

id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.0000000 1 0.03125 0.0625 42.6171875 2 0.06250 0.0625 42.6484375 3 0.09375 0.0625 42.2656250 4 0.12500 0.0625 42.2968750 5 0.15625 0.0625 41.9140625 6 0.18750 0.0625 41.9453125 7 0.21875 0.0625 41.5625000 8 0.25000 0.0625 41.5937500 9 0.28125 0.0625 41.2109375 0 0.31250 0.0625 41.2421875 10 0.34375 0.0625 40.8593750 11 0.37500 0.0625 40.8906250 12 0.40625 0.0625 40.5078125 13 0.43750 0.0625 40.5390625 14 0.46875 0.0625 40.1562500 15 0.50000 0.0625 40.1875000 16 0.53125 0.0625 39.8046875 17 0.56250 0.0625 39.8359375 18 0.59375 0.0625 39.4531250 19 0.62500 0.0625 39.4843750 20 0.65625 0.0625 39.1015625 21 0.68750 0.0625 39.1328125 22 0.71875 0.0625 38.7500000 23 0.75000 0.0625 38.7812500 24 0.78125 0.0625 38.3984375 25 0.81250 0.0625 38.4296875 26 0.84375 0.0625 38.0468750 27 0.87500 0.0625 38.0781250 28 0.90625 0.0625 37.6953125 29 0.93750 0.0625 37.7265625 30 0.96875 0.0625 37.3437500 31 1.00000 0.0625 37.3750000

// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.00000000 1 0.03125 0.0625 43.23046875 2 0.06250 0.0625 42.64843750 3 0.09375 0.0625 42.87890625 4 0.12500 0.0625 42.29687500 5 0.15625 0.0625 42.52734375 6 0.18750 0.0625 41.94531250 7 0.21875 0.0625 42.17578125 8 0.25000 0.0625 41.59375000 9 0.28125 0.0625 41.82421875 0 0.31250 0.0625 41.24218750 10 0.34375 0.0625 41.47265625 11 0.37500 0.0625 40.89062500 12 0.40625 0.0625 41.12109375 13 0.43750 0.0625 40.53906250 14 0.46875 0.0625 40.76953125 15 0.50000 0.0625 40.18750000 16 0.53125 0.0625 40.41796875 17 0.56250 0.0625 39.83593750 18 0.59375 0.0625 40.06640625 19 0.62500 0.0625 39.48437500 20 0.65625 0.0625 39.71484375 21 0.68750 0.0625 39.13281250 22 0.71875 0.0625 39.36328125 23 0.75000 0.0625 38.78125000 24 0.78125 0.0625 39.01171875 25 0.81250 0.0625 38.42968750 26 0.84375 0.0625 38.66015625 27 0.87500 0.0625 38.07812500 28 0.90625 0.0625 38.30859375 29 0.93750 0.0625 37.72656250 30 0.96875 0.0625 37.95703125 31 1.00000 0.0625 37.37500000

tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1]) // frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values. float frac( float x ) { float frac, tmp = x - (float)(int)(x); float frac256 = (float)(int)( tmp*256.0f + 0.5f ); frac = frac256 / 256.0f; return frac; }
CPU结果：

id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.0000000 1 0.03125 0.0625 42.6171875 2 0.06250 0.0625 42.6484375 3 0.09375 0.0625 42.2656250 4 0.12500 0.0625 42.2968750 5 0.15625 0.0625 41.9140625 6 0.18750 0.0625 41.9453125 7 0.21875 0.0625 41.5625000 8 0.25000 0.0625 41.5937500 9 0.28125 0.0625 41.2109375 0 0.31250 0.0625 41.2421875 10 0.34375 0.0625 40.8593750 11 0.37500 0.0625 40.8906250 12 0.40625 0.0625 40.5078125 13 0.43750 0.0625 40.5390625 14 0.46875 0.0625 40.1562500 15 0.50000 0.0625 40.1875000 16 0.53125 0.0625 39.8046875 17 0.56250 0.0625 39.8359375 18 0.59375 0.0625 39.4531250 19 0.62500 0.0625 39.4843750 20 0.65625 0.0625 39.1015625 21 0.68750 0.0625 39.1328125 22 0.71875 0.0625 38.7500000 23 0.75000 0.0625 38.7812500 24 0.78125 0.0625 38.3984375 25 0.81250 0.0625 38.4296875 26 0.84375 0.0625 38.0468750 27 0.87500 0.0625 38.0781250 28 0.90625 0.0625 37.6953125 29 0.93750 0.0625 37.7265625 30 0.96875 0.0625 37.3437500 31 1.00000 0.0625 37.3750000

// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.00000000 1 0.03125 0.0625 43.23046875 2 0.06250 0.0625 42.64843750 3 0.09375 0.0625 42.87890625 4 0.12500 0.0625 42.29687500 5 0.15625 0.0625 42.52734375 6 0.18750 0.0625 41.94531250 7 0.21875 0.0625 42.17578125 8 0.25000 0.0625 41.59375000 9 0.28125 0.0625 41.82421875 0 0.31250 0.0625 41.24218750 10 0.34375 0.0625 41.47265625 11 0.37500 0.0625 40.89062500 12 0.40625 0.0625 41.12109375 13 0.43750 0.0625 40.53906250 14 0.46875 0.0625 40.76953125 15 0.50000 0.0625 40.18750000 16 0.53125 0.0625 40.41796875 17 0.56250 0.0625 39.83593750 18 0.59375 0.0625 40.06640625 19 0.62500 0.0625 39.48437500 20 0.65625 0.0625 39.71484375 21 0.68750 0.0625 39.13281250 22 0.71875 0.0625 39.36328125 23 0.75000 0.0625 38.78125000 24 0.78125 0.0625 39.01171875 25 0.81250 0.0625 38.42968750 26 0.84375 0.0625 38.66015625 27 0.87500 0.0625 38.07812500 28 0.90625 0.0625 38.30859375 29 0.93750 0.0625 37.72656250 30 0.96875 0.0625 37.95703125 31 1.00000 0.0625 37.37500000

tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1]) // frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values. float frac( float x ) { float frac, tmp = x - (float)(int)(x); float frac256 = (float)(int)( tmp*256.0f + 0.5f ); frac = frac256 / 256.0f; return frac; }
我保留了一个简单的代码，运行程序后，您将在
D:\
中获得两个文件
编辑2014/01/20

我以不同的增量运行程序，发现
tex2D
“当
alpha
乘以
beta
小于
0.00390625
时，
tex2D
的返回与双线性插值公式不匹配”UV插值被截断为9位，不是参与的texel值。在CUDA手册的第10章（纹理）中，对1D情况进行了详细描述（包括CPU仿真代码）。这段代码是开源的，可以在
中找到。错误的双线性插值公式使得纹理提取的结果很奇怪

id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.0000000 1 0.03125 0.0625 42.6171875 2 0.06250 0.0625 42.6484375 3 0.09375 0.0625 42.2656250 4 0.12500 0.0625 42.2968750 5 0.15625 0.0625 41.9140625 6 0.18750 0.0625 41.9453125 7 0.21875 0.0625 41.5625000 8 0.25000 0.0625 41.5937500 9 0.28125 0.0625 41.2109375 0 0.31250 0.0625 41.2421875 10 0.34375 0.0625 40.8593750 11 0.37500 0.0625 40.8906250 12 0.40625 0.0625 40.5078125 13 0.43750 0.0625 40.5390625 14 0.46875 0.0625 40.1562500 15 0.50000 0.0625 40.1875000 16 0.53125 0.0625 39.8046875 17 0.56250 0.0625 39.8359375 18 0.59375 0.0625 39.4531250 19 0.62500 0.0625 39.4843750 20 0.65625 0.0625 39.1015625 21 0.68750 0.0625 39.1328125 22 0.71875 0.0625 38.7500000 23 0.75000 0.0625 38.7812500 24 0.78125 0.0625 38.3984375 25 0.81250 0.0625 38.4296875 26 0.84375 0.0625 38.0468750 27 0.87500 0.0625 38.0781250 28 0.90625 0.0625 37.6953125 29 0.93750 0.0625 37.7265625 30 0.96875 0.0625 37.3437500 31 1.00000 0.0625 37.3750000

// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.00000000 1 0.03125 0.0625 43.23046875 2 0.06250 0.0625 42.64843750 3 0.09375 0.0625 42.87890625 4 0.12500 0.0625 42.29687500 5 0.15625 0.0625 42.52734375 6 0.18750 0.0625 41.94531250 7 0.21875 0.0625 42.17578125 8 0.25000 0.0625 41.59375000 9 0.28125 0.0625 41.82421875 0 0.31250 0.0625 41.24218750 10 0.34375 0.0625 41.47265625 11 0.37500 0.0625 40.89062500 12 0.40625 0.0625 41.12109375 13 0.43750 0.0625 40.53906250 14 0.46875 0.0625 40.76953125 15 0.50000 0.0625 40.18750000 16 0.53125 0.0625 40.41796875 17 0.56250 0.0625 39.83593750 18 0.59375 0.0625 40.06640625 19 0.62500 0.0625 39.48437500 20 0.65625 0.0625 39.71484375 21 0.68750 0.0625 39.13281250 22 0.71875 0.0625 39.36328125 23 0.75000 0.0625 38.78125000 24 0.78125 0.0625 39.01171875 25 0.81250 0.0625 38.42968750 26 0.84375 0.0625 38.66015625 27 0.87500 0.0625 38.07812500 28 0.90625 0.0625 38.30859375 29 0.93750 0.0625 37.72656250 30 0.96875 0.0625 37.95703125 31 1.00000 0.0625 37.37500000

tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1]) // frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values. float frac( float x ) { float frac, tmp = x - (float)(int)(x); float frac256 = (float)(int)( tmp*256.0f + 0.5f ); frac = frac256 / 256.0f; return frac; }
公式-1：您可以在cuda附录或wiki中轻松找到它

tex(x,y)=(1−α)(1−β)T[i,j] + α(1−β)T[i+1,j] + (1−α)βT[i,j+1] + αβT[i+1,j+1]
公式-2：减少乘法次数

tex(x,y)=T[i,j] + α(T[i+1,j]-T[i,j]) + β(T[i,j+1]-T[i,j]) + αβ(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1])
如果对公式1使用9位定点格式，将得到纹理获取的不匹配结果，但公式2可以正常工作
结论：
如果要模拟cuda纹理实现的双线性插值，应使用公式3。试试看
公式-3:

id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.0000000 1 0.03125 0.0625 42.6171875 2 0.06250 0.0625 42.6484375 3 0.09375 0.0625 42.2656250 4 0.12500 0.0625 42.2968750 5 0.15625 0.0625 41.9140625 6 0.18750 0.0625 41.9453125 7 0.21875 0.0625 41.5625000 8 0.25000 0.0625 41.5937500 9 0.28125 0.0625 41.2109375 0 0.31250 0.0625 41.2421875 10 0.34375 0.0625 40.8593750 11 0.37500 0.0625 40.8906250 12 0.40625 0.0625 40.5078125 13 0.43750 0.0625 40.5390625 14 0.46875 0.0625 40.1562500 15 0.50000 0.0625 40.1875000 16 0.53125 0.0625 39.8046875 17 0.56250 0.0625 39.8359375 18 0.59375 0.0625 39.4531250 19 0.62500 0.0625 39.4843750 20 0.65625 0.0625 39.1015625 21 0.68750 0.0625 39.1328125 22 0.71875 0.0625 38.7500000 23 0.75000 0.0625 38.7812500 24 0.78125 0.0625 38.3984375 25 0.81250 0.0625 38.4296875 26 0.84375 0.0625 38.0468750 27 0.87500 0.0625 38.0781250 28 0.90625 0.0625 37.6953125 29 0.93750 0.0625 37.7265625 30 0.96875 0.0625 37.3437500 31 1.00000 0.0625 37.3750000

// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format id pnt.x pnt.y tex2D 0 0.00000 0.0625 43.00000000 1 0.03125 0.0625 43.23046875 2 0.06250 0.0625 42.64843750 3 0.09375 0.0625 42.87890625 4 0.12500 0.0625 42.29687500 5 0.15625 0.0625 42.52734375 6 0.18750 0.0625 41.94531250 7 0.21875 0.0625 42.17578125 8 0.25000 0.0625 41.59375000 9 0.28125 0.0625 41.82421875 0 0.31250 0.0625 41.24218750 10 0.34375 0.0625 41.47265625 11 0.37500 0.0625 40.89062500 12 0.40625 0.0625 41.12109375 13 0.43750 0.0625 40.53906250 14 0.46875 0.0625 40.76953125 15 0.50000 0.0625 40.18750000 16 0.53125 0.0625 40.41796875 17 0.56250 0.0625 39.83593750 18 0.59375 0.0625 40.06640625 19 0.62500 0.0625 39.48437500 20 0.65625 0.0625 39.71484375 21 0.68750 0.0625 39.13281250 22 0.71875 0.0625 39.36328125 23 0.75000 0.0625 38.78125000 24 0.78125 0.0625 39.01171875 25 0.81250 0.0625 38.42968750 26 0.84375 0.0625 38.66015625 27 0.87500 0.0625 38.07812500 28 0.90625 0.0625 38.30859375 29 0.93750 0.0625 37.72656250 30 0.96875 0.0625 37.95703125 31 1.00000 0.0625 37.37500000

tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1]) // frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values. float frac( float x ) { float frac, tmp = x - (float)(int)(x); float frac256 = (float)(int)( tmp*256.0f + 0.5f ); frac = frac256 / 256.0f; return frac; }

已经提供了满意的答案，所以现在我只想给出一个关于双线性插值有用的信息的概要，它如何在C++中实现，以及它在CUDA中可以做的不同的方式。p> 双线性插值背后的数学

假设原始函数
T（x，y）
在点的笛卡尔规则网格
（i，j）
中使用
0进行采样，你能添加其他人可以编译和运行的最短完整示例吗？谢谢你的建议@Talonmes，我提供了示例代码的链接。谢谢你的回复，我以前也读过代码（与文章中的链接2相同）。根据链接，如果x=1/2^n（n=1，2，…8），那么frac应该等于x。我得到了与tex2D不匹配的结果，所以我在这里发布了我的问题。