C++ C/C+中的双线性插值+;和库达

C++ C/C+中的双线性插值+;和库达,c++,cuda,C++,Cuda,我想在CPU上模拟CUDA双线性插值的行为,但我发现tex2D的返回值似乎不适合该函数 我猜将插值系数从float转换为9-位固定点格式,使用8位的分数值会产生不同的值 根据转换公式,当系数为1/2^n,且n=0,1,…,8时,转换结果将与输入的float相同,但我仍然(并非总是)收到奇怪的值 下面我报告一个奇怪值的例子。在这种情况下,id=2*n+1时总是会出现奇怪的值,有人能告诉我为什么吗 Src数组: Src[0][0] = 38; Src[1][0] = 39; Src[0]

我想在CPU上模拟CUDA双线性插值的行为,但我发现
tex2D
的返回值似乎不适合该函数

我猜将插值系数从
float
转换为
9
-位固定点格式,使用
8
位的分数值会产生不同的值

根据转换公式,当系数为
1/2^n
,且
n=0,1,…,8
时,转换结果将与输入的
float
相同,但我仍然(并非总是)收到奇怪的值

下面我报告一个奇怪值的例子。在这种情况下,
id=2*n+1
时总是会出现奇怪的值,有人能告诉我为什么吗

Src数组:

Src[0][0] =  38;  
Src[1][0] =  39;  
Src[0][1] = 118;  
Src[1][1] =  13;  
static texture<float4, 2, cudaReadModeElementType> texElnt;
texElnt.addressMode[0] = cudaAddressModeClamp;
texElnt.addressMode[1] = cudaAddressModeClamp;
texElnt.filterMode = cudaFilterModeLinear;
texElnt.normalized = false;
static __global__ void kernel_texElnt(float* pdata, int w, int h, int c, float stride/*0.03125f*/) {
    const int gx = blockIdx.x*blockDim.x + threadIdx.x;
    const int gy = blockIdx.y*blockDim.y + threadIdx.y;
    const int gw = gridDim.x * blockDim.x;
    const int gid = gy*gw + gx;
    if (gx >= w || gy >= h) {
        return;
    }

    float2 pnt;
    pnt.x = (gx)*(stride)/*1/32*/;
    pnt.y = 0.0625f/*1/16*/;

    float4 result = tex2D( texElnt, pnt.x + 0.5, pnt.y + 0.5f);
    pdata[gid*3 + 0] = pnt.x;
    pdata[gid*3 + 1] = pnt.y;
    pdata[gid*3 + 2] = result.x;

}
纹理定义:

Src[0][0] =  38;  
Src[1][0] =  39;  
Src[0][1] = 118;  
Src[1][1] =  13;  
static texture<float4, 2, cudaReadModeElementType> texElnt;
texElnt.addressMode[0] = cudaAddressModeClamp;
texElnt.addressMode[1] = cudaAddressModeClamp;
texElnt.filterMode = cudaFilterModeLinear;
texElnt.normalized = false;
static __global__ void kernel_texElnt(float* pdata, int w, int h, int c, float stride/*0.03125f*/) {
    const int gx = blockIdx.x*blockDim.x + threadIdx.x;
    const int gy = blockIdx.y*blockDim.y + threadIdx.y;
    const int gw = gridDim.x * blockDim.x;
    const int gid = gy*gw + gx;
    if (gx >= w || gy >= h) {
        return;
    }

    float2 pnt;
    pnt.x = (gx)*(stride)/*1/32*/;
    pnt.y = 0.0625f/*1/16*/;

    float4 result = tex2D( texElnt, pnt.x + 0.5, pnt.y + 0.5f);
    pdata[gid*3 + 0] = pnt.x;
    pdata[gid*3 + 1] = pnt.y;
    pdata[gid*3 + 2] = result.x;

}
CUDA的双线性结果

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625  43.0000000  
 1  0.03125 0.0625  42.6171875  
 2  0.06250 0.0625  42.6484375  
 3  0.09375 0.0625  42.2656250  
 4  0.12500 0.0625  42.2968750  
 5  0.15625 0.0625  41.9140625  
 6  0.18750 0.0625  41.9453125  
 7  0.21875 0.0625  41.5625000  
 8  0.25000 0.0625  41.5937500  
 9  0.28125 0.0625  41.2109375  
 0  0.31250 0.0625  41.2421875  
10  0.34375 0.0625  40.8593750  
11  0.37500 0.0625  40.8906250  
12  0.40625 0.0625  40.5078125  
13  0.43750 0.0625  40.5390625  
14  0.46875 0.0625  40.1562500  
15  0.50000 0.0625  40.1875000  
16  0.53125 0.0625  39.8046875  
17  0.56250 0.0625  39.8359375  
18  0.59375 0.0625  39.4531250  
19  0.62500 0.0625  39.4843750  
20  0.65625 0.0625  39.1015625  
21  0.68750 0.0625  39.1328125  
22  0.71875 0.0625  38.7500000  
23  0.75000 0.0625  38.7812500  
24  0.78125 0.0625  38.3984375  
25  0.81250 0.0625  38.4296875  
26  0.84375 0.0625  38.0468750  
27  0.87500 0.0625  38.0781250  
28  0.90625 0.0625  37.6953125  
29  0.93750 0.0625  37.7265625  
30  0.96875 0.0625  37.3437500  
31  1.00000 0.0625  37.3750000
// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format  

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625 43.00000000  
 1  0.03125 0.0625 43.23046875  
 2  0.06250 0.0625 42.64843750  
 3  0.09375 0.0625 42.87890625  
 4  0.12500 0.0625 42.29687500  
 5  0.15625 0.0625 42.52734375  
 6  0.18750 0.0625 41.94531250  
 7  0.21875 0.0625 42.17578125  
 8  0.25000 0.0625 41.59375000  
 9  0.28125 0.0625 41.82421875  
 0  0.31250 0.0625 41.24218750  
10  0.34375 0.0625 41.47265625  
11  0.37500 0.0625 40.89062500  
12  0.40625 0.0625 41.12109375  
13  0.43750 0.0625 40.53906250  
14  0.46875 0.0625 40.76953125  
15  0.50000 0.0625 40.18750000  
16  0.53125 0.0625 40.41796875  
17  0.56250 0.0625 39.83593750  
18  0.59375 0.0625 40.06640625  
19  0.62500 0.0625 39.48437500  
20  0.65625 0.0625 39.71484375  
21  0.68750 0.0625 39.13281250  
22  0.71875 0.0625 39.36328125  
23  0.75000 0.0625 38.78125000  
24  0.78125 0.0625 39.01171875  
25  0.81250 0.0625 38.42968750  
26  0.84375 0.0625 38.66015625  
27  0.87500 0.0625 38.07812500  
28  0.90625 0.0625 38.30859375  
29  0.93750 0.0625 37.72656250  
30  0.96875 0.0625 37.95703125  
31  1.00000 0.0625 37.37500000
tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1])  

// frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values.     
float frac( float x ) {
    float frac, tmp = x - (float)(int)(x);
    float frac256 = (float)(int)( tmp*256.0f + 0.5f );
    frac = frac256 / 256.0f;
    return frac;
}
CPU结果:

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625  43.0000000  
 1  0.03125 0.0625  42.6171875  
 2  0.06250 0.0625  42.6484375  
 3  0.09375 0.0625  42.2656250  
 4  0.12500 0.0625  42.2968750  
 5  0.15625 0.0625  41.9140625  
 6  0.18750 0.0625  41.9453125  
 7  0.21875 0.0625  41.5625000  
 8  0.25000 0.0625  41.5937500  
 9  0.28125 0.0625  41.2109375  
 0  0.31250 0.0625  41.2421875  
10  0.34375 0.0625  40.8593750  
11  0.37500 0.0625  40.8906250  
12  0.40625 0.0625  40.5078125  
13  0.43750 0.0625  40.5390625  
14  0.46875 0.0625  40.1562500  
15  0.50000 0.0625  40.1875000  
16  0.53125 0.0625  39.8046875  
17  0.56250 0.0625  39.8359375  
18  0.59375 0.0625  39.4531250  
19  0.62500 0.0625  39.4843750  
20  0.65625 0.0625  39.1015625  
21  0.68750 0.0625  39.1328125  
22  0.71875 0.0625  38.7500000  
23  0.75000 0.0625  38.7812500  
24  0.78125 0.0625  38.3984375  
25  0.81250 0.0625  38.4296875  
26  0.84375 0.0625  38.0468750  
27  0.87500 0.0625  38.0781250  
28  0.90625 0.0625  37.6953125  
29  0.93750 0.0625  37.7265625  
30  0.96875 0.0625  37.3437500  
31  1.00000 0.0625  37.3750000
// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format  

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625 43.00000000  
 1  0.03125 0.0625 43.23046875  
 2  0.06250 0.0625 42.64843750  
 3  0.09375 0.0625 42.87890625  
 4  0.12500 0.0625 42.29687500  
 5  0.15625 0.0625 42.52734375  
 6  0.18750 0.0625 41.94531250  
 7  0.21875 0.0625 42.17578125  
 8  0.25000 0.0625 41.59375000  
 9  0.28125 0.0625 41.82421875  
 0  0.31250 0.0625 41.24218750  
10  0.34375 0.0625 41.47265625  
11  0.37500 0.0625 40.89062500  
12  0.40625 0.0625 41.12109375  
13  0.43750 0.0625 40.53906250  
14  0.46875 0.0625 40.76953125  
15  0.50000 0.0625 40.18750000  
16  0.53125 0.0625 40.41796875  
17  0.56250 0.0625 39.83593750  
18  0.59375 0.0625 40.06640625  
19  0.62500 0.0625 39.48437500  
20  0.65625 0.0625 39.71484375  
21  0.68750 0.0625 39.13281250  
22  0.71875 0.0625 39.36328125  
23  0.75000 0.0625 38.78125000  
24  0.78125 0.0625 39.01171875  
25  0.81250 0.0625 38.42968750  
26  0.84375 0.0625 38.66015625  
27  0.87500 0.0625 38.07812500  
28  0.90625 0.0625 38.30859375  
29  0.93750 0.0625 37.72656250  
30  0.96875 0.0625 37.95703125  
31  1.00000 0.0625 37.37500000
tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1])  

// frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values.     
float frac( float x ) {
    float frac, tmp = x - (float)(int)(x);
    float frac256 = (float)(int)( tmp*256.0f + 0.5f );
    frac = frac256 / 256.0f;
    return frac;
}
我保留了一个简单的代码,运行程序后,您将在
D:\
中获得两个文件

编辑2014/01/20


我以不同的增量运行程序,发现
tex2D
“当
alpha
乘以
beta
小于
0.00390625
时,
tex2D
的返回与双线性插值公式不匹配”
UV插值被截断为9位,不是参与的texel值。在CUDA手册的第10章(纹理)中,对1D情况进行了详细描述(包括CPU仿真代码)。这段代码是开源的,可以在

中找到。错误的双线性插值公式使得纹理提取的结果很奇怪

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625  43.0000000  
 1  0.03125 0.0625  42.6171875  
 2  0.06250 0.0625  42.6484375  
 3  0.09375 0.0625  42.2656250  
 4  0.12500 0.0625  42.2968750  
 5  0.15625 0.0625  41.9140625  
 6  0.18750 0.0625  41.9453125  
 7  0.21875 0.0625  41.5625000  
 8  0.25000 0.0625  41.5937500  
 9  0.28125 0.0625  41.2109375  
 0  0.31250 0.0625  41.2421875  
10  0.34375 0.0625  40.8593750  
11  0.37500 0.0625  40.8906250  
12  0.40625 0.0625  40.5078125  
13  0.43750 0.0625  40.5390625  
14  0.46875 0.0625  40.1562500  
15  0.50000 0.0625  40.1875000  
16  0.53125 0.0625  39.8046875  
17  0.56250 0.0625  39.8359375  
18  0.59375 0.0625  39.4531250  
19  0.62500 0.0625  39.4843750  
20  0.65625 0.0625  39.1015625  
21  0.68750 0.0625  39.1328125  
22  0.71875 0.0625  38.7500000  
23  0.75000 0.0625  38.7812500  
24  0.78125 0.0625  38.3984375  
25  0.81250 0.0625  38.4296875  
26  0.84375 0.0625  38.0468750  
27  0.87500 0.0625  38.0781250  
28  0.90625 0.0625  37.6953125  
29  0.93750 0.0625  37.7265625  
30  0.96875 0.0625  37.3437500  
31  1.00000 0.0625  37.3750000
// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format  

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625 43.00000000  
 1  0.03125 0.0625 43.23046875  
 2  0.06250 0.0625 42.64843750  
 3  0.09375 0.0625 42.87890625  
 4  0.12500 0.0625 42.29687500  
 5  0.15625 0.0625 42.52734375  
 6  0.18750 0.0625 41.94531250  
 7  0.21875 0.0625 42.17578125  
 8  0.25000 0.0625 41.59375000  
 9  0.28125 0.0625 41.82421875  
 0  0.31250 0.0625 41.24218750  
10  0.34375 0.0625 41.47265625  
11  0.37500 0.0625 40.89062500  
12  0.40625 0.0625 41.12109375  
13  0.43750 0.0625 40.53906250  
14  0.46875 0.0625 40.76953125  
15  0.50000 0.0625 40.18750000  
16  0.53125 0.0625 40.41796875  
17  0.56250 0.0625 39.83593750  
18  0.59375 0.0625 40.06640625  
19  0.62500 0.0625 39.48437500  
20  0.65625 0.0625 39.71484375  
21  0.68750 0.0625 39.13281250  
22  0.71875 0.0625 39.36328125  
23  0.75000 0.0625 38.78125000  
24  0.78125 0.0625 39.01171875  
25  0.81250 0.0625 38.42968750  
26  0.84375 0.0625 38.66015625  
27  0.87500 0.0625 38.07812500  
28  0.90625 0.0625 38.30859375  
29  0.93750 0.0625 37.72656250  
30  0.96875 0.0625 37.95703125  
31  1.00000 0.0625 37.37500000
tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1])  

// frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values.     
float frac( float x ) {
    float frac, tmp = x - (float)(int)(x);
    float frac256 = (float)(int)( tmp*256.0f + 0.5f );
    frac = frac256 / 256.0f;
    return frac;
}
公式-1:您可以在cuda附录或wiki中轻松找到它

tex(x,y)=(1−α)(1−β)T[i,j] + α(1−β)T[i+1,j] + (1−α)βT[i,j+1] + αβT[i+1,j+1]
公式-2:减少乘法次数

tex(x,y)=T[i,j] + α(T[i+1,j]-T[i,j]) + β(T[i,j+1]-T[i,j]) + αβ(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1])
如果对公式1使用9位定点格式,将得到纹理获取的不匹配结果,但公式2可以正常工作

结论:
如果要模拟cuda纹理实现的双线性插值,应使用公式3。试试看

公式-3:

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625  43.0000000  
 1  0.03125 0.0625  42.6171875  
 2  0.06250 0.0625  42.6484375  
 3  0.09375 0.0625  42.2656250  
 4  0.12500 0.0625  42.2968750  
 5  0.15625 0.0625  41.9140625  
 6  0.18750 0.0625  41.9453125  
 7  0.21875 0.0625  41.5625000  
 8  0.25000 0.0625  41.5937500  
 9  0.28125 0.0625  41.2109375  
 0  0.31250 0.0625  41.2421875  
10  0.34375 0.0625  40.8593750  
11  0.37500 0.0625  40.8906250  
12  0.40625 0.0625  40.5078125  
13  0.43750 0.0625  40.5390625  
14  0.46875 0.0625  40.1562500  
15  0.50000 0.0625  40.1875000  
16  0.53125 0.0625  39.8046875  
17  0.56250 0.0625  39.8359375  
18  0.59375 0.0625  39.4531250  
19  0.62500 0.0625  39.4843750  
20  0.65625 0.0625  39.1015625  
21  0.68750 0.0625  39.1328125  
22  0.71875 0.0625  38.7500000  
23  0.75000 0.0625  38.7812500  
24  0.78125 0.0625  38.3984375  
25  0.81250 0.0625  38.4296875  
26  0.84375 0.0625  38.0468750  
27  0.87500 0.0625  38.0781250  
28  0.90625 0.0625  37.6953125  
29  0.93750 0.0625  37.7265625  
30  0.96875 0.0625  37.3437500  
31  1.00000 0.0625  37.3750000
// convert coefficient ((1-α)*(1-β)), (α*(1-β)), ((1-α)*β), (α*β) to fixed point format  

id  pnt.x   pnt.y   tex2D
 0  0.00000 0.0625 43.00000000  
 1  0.03125 0.0625 43.23046875  
 2  0.06250 0.0625 42.64843750  
 3  0.09375 0.0625 42.87890625  
 4  0.12500 0.0625 42.29687500  
 5  0.15625 0.0625 42.52734375  
 6  0.18750 0.0625 41.94531250  
 7  0.21875 0.0625 42.17578125  
 8  0.25000 0.0625 41.59375000  
 9  0.28125 0.0625 41.82421875  
 0  0.31250 0.0625 41.24218750  
10  0.34375 0.0625 41.47265625  
11  0.37500 0.0625 40.89062500  
12  0.40625 0.0625 41.12109375  
13  0.43750 0.0625 40.53906250  
14  0.46875 0.0625 40.76953125  
15  0.50000 0.0625 40.18750000  
16  0.53125 0.0625 40.41796875  
17  0.56250 0.0625 39.83593750  
18  0.59375 0.0625 40.06640625  
19  0.62500 0.0625 39.48437500  
20  0.65625 0.0625 39.71484375  
21  0.68750 0.0625 39.13281250  
22  0.71875 0.0625 39.36328125  
23  0.75000 0.0625 38.78125000  
24  0.78125 0.0625 39.01171875  
25  0.81250 0.0625 38.42968750  
26  0.84375 0.0625 38.66015625  
27  0.87500 0.0625 38.07812500  
28  0.90625 0.0625 38.30859375  
29  0.93750 0.0625 37.72656250  
30  0.96875 0.0625 37.95703125  
31  1.00000 0.0625 37.37500000
tex(x,y)=T[i,j] + frac(α)(T[i+1,j]-T[i,j]) + frac(β)(T[i,j+1]-T[i,j]) + frac(αβ)(T[i,j]+T[i+1,j+1] - T[i+1, j]-T[i,j+1])  

// frac(x) turns float to 9-bit fixed point format with 8 bits of fraction values.     
float frac( float x ) {
    float frac, tmp = x - (float)(int)(x);
    float frac256 = (float)(int)( tmp*256.0f + 0.5f );
    frac = frac256 / 256.0f;
    return frac;
}

已经提供了满意的答案,所以现在我只想给出一个关于双线性插值有用的信息的概要,它如何在C++中实现,以及它在CUDA中可以做的不同的方式。p> 双线性插值背后的数学


假设原始函数
T(x,y)
在点的笛卡尔规则网格
(i,j)
中使用
0进行采样,你能添加其他人可以编译和运行的最短完整示例吗?谢谢你的建议@Talonmes,我提供了示例代码的链接。谢谢你的回复,我以前也读过代码(与文章中的链接2相同)。根据链接,如果x=1/2^n(n=1,2,…8),那么frac应该等于x。我得到了与tex2D不匹配的结果,所以我在这里发布了我的问题。