Image processing 给出错误结果的双三次插值

Image processing 给出错误结果的双三次插值,image-processing,bicubic,Image Processing,Bicubic,我正在尝试为图像变换(如缩放和倾斜)实现双三次插值,但图像输出似乎不准确。插值像素有时溢出超过255 代码如下: #include <algorithm> #include <sal.h> #define ASSERT _ASSERTE template<typename T> class bicubic_sampler { // use to offset into int 4x4 array to get individual channel

我正在尝试为图像变换(如缩放和倾斜)实现双三次插值,但图像输出似乎不准确。插值像素有时溢出超过255

代码如下:

#include <algorithm>
#include <sal.h>
#define ASSERT _ASSERTE 
template<typename T>
class bicubic_sampler
{
    // use to offset into int 4x4 array to get individual channel 
    #pragma region constants
    static unsigned constexpr x00 = 0;
    static unsigned constexpr x01 = 4;
    static unsigned constexpr x02 = 8;
    static unsigned constexpr x03 = 12;
    static unsigned constexpr x10 = 16;
    static unsigned constexpr x11 = 20;
    static unsigned constexpr x12 = 24;
    static unsigned constexpr x13 = 28;
    static unsigned constexpr x20 = 32;
    static unsigned constexpr x21 = 36;
    static unsigned constexpr x22 = 40;
    static unsigned constexpr x23 = 44;
    static unsigned constexpr x30 = 48;
    static unsigned constexpr x31 = 52;
    static unsigned constexpr x32 = 56;
    static unsigned constexpr x33 = 60;
    #pragma endregion

    T   a00, a01, a02, a03,
        a10, a11, a12, a13,
        a20, a21, a22, a23,
        a30, a31, a32, a33;

public:

    void sample(_In_ UINT32(&pix)[4][4], _In_ T x, _In_ T y, _Inout_ BYTE(&output)[4])
    {
        auto channel_count = 3u; // skip alpha
        for (auto c = 0; c != channel_count; ++c)
        {
            auto p = reinterpret_cast<BYTE*>(pix) + c;

            a00 = p[x11];
            a01 = -.5*p[x10] + .5*p[x12];
            a02 = p[x10] - 2.5*p[x11] + 2 * p[x12] - .5*p[x13];
            a03 = -.5*p[x10] + 1.5*p[x11] - 1.5*p[x12] + .5*p[x13];
            a10 = -.5*p[x01] + .5*p[x21];
            a11 = .25*p[x00] - .25*p[x02] - .25*p[x20] + .25*p[x22];
            a12 = -.5*p[x00] + 1.25*p[x01] - p[x02] + .25*p[x03] + .5*p[x20] - 1.25*p[x21] + p[x22] - .25*p[x23];
            a13 = .25*p[x00] - .75*p[x01] + .75*p[x02] - .25*p[x03] - .25*p[x20] + .75*p[x21] - .75*p[x22] + .25*p[x23];
            a20 = p[x01] - 2.5*p[x11] + 2 * p[x21] - .5*p[x31];
            a21 = -.5*p[x00] + .5*p[x02] + 1.25*p[x10] - 1.25*p[x12] - p[x20] + p[x22] + .25*p[x30] - .25*p[x32];
            a22 = p[x00] - 2.5*p[x01] + 2 * p[x02] - .5*p[x03] - 2.5*p[x10] + 6.25*p[x11] - 5 * p[x12] + 1.25*p[x13] + 2 * p[x20] - 5 * p[x21] + 4 * p[x22] - p[x23] - .5*p[x30] + 1.25*p[x31] - p[x32] + .25*p[x33];
            a23 = -.5*p[x00] + 1.5*p[x01] - 1.5*p[x02] + .5*p[x03] + 1.25*p[x10] - 3.75*p[x11] + 3.75*p[x12] - 1.25*p[x13] - p[x20] + 3 * p[x21] - 3 * p[x22] + p[x23] + .25*p[x30] - .75*p[x31] + .75*p[x32] - .25*p[x33];
            a30 = -.5*p[x01] + 1.5*p[x11] - 1.5*p[x21] + .5*p[x31];
            a31 = .25*p[x00] - .25*p[x02] - .75*p[x10] + .75*p[x12] + .75*p[x20] - .75*p[x22] - .25*p[x30] + .25*p[x32];
            a32 = -.5*p[x00] + 1.25*p[x01] - p[x02] + .25*p[x03] + 1.5*p[x10] - 3.75*p[x11] + 3 * p[x12] - .75*p[x13] - 1.5*p[x20] + 3.75*p[x21] - 3 * p[x22] + .75*p[x23] + .5*p[x30] - 1.25*p[x31] + p[x32] - .25*p[x33];
            a33 = .25*p[x00] - .75*p[x01] + .75*p[x02] - .25*p[x03] - .75*p[x10] + 2.25*p[x11] - 2.25*p[x12] + .75*p[x13] + .75*p[x20] - 2.25*p[x21] + 2.25*p[x22] - .75*p[x23] - .25*p[x30] + .75*p[x31] - .75*p[x32] + .25*p[x33];

            auto x2 = x * x;
            auto x3 = x2 * x;
            auto y2 = y * y;
            auto y3 = y2 * y;

            auto dd = (a00 + a01 * y + a02 * y2 + a03 * y3) +
                (a10 + a11 * y + a12 * y2 + a13 * y3) * x +
                (a20 + a21 * y + a22 * y2 + a23 * y3) * x2 +
                (a30 + a31 * y + a32 * y2 + a33 * y3) * x3;
            //ASSERT(dd <= 0xff);  // this is overflowing beyond 255

            auto finalValue = (std::min)(255.0, dd);
            output[c] = static_cast<BYTE>(finalValue);
        }
    }
};

template<typename T, typename Matrix>
void transform_pixels(_In_  T* src, _Inout_ T* dest, _In_ const int width, _In_ const int height, _In_ const Matrix & mat)
{

    auto bc_sampler = bicubic_sampler<double>{};

    const ptrdiff_t channelCount = 4;

    for (auto y = 0; y != height; ++y)
    {
        for (auto x = 0; x != width; ++x)
        {
            auto p0 = point<double>(x, y);   //original point
            auto p = transform_point(mat, p0);  // calculate the tranform point after applying matrix mul, like scale, skewing, rotation

            auto pf = point < std::ptrdiff_t >(pt_floor(p));
            auto frac = point < double >{ p.x - pf.x, p.y - pf.y };

            if (pf.x < 0 || pf.y < 0 || pf.x >= width || pf.y >= height)
            {
                continue;
            }

            BYTE mp[4]{};  // one pixel transformed output

            auto loc = (src + (pf.y * width + pf.x) * channelCount);

            auto stride = width * channelCount;
            if (pf.x - 1 >= 0 && pf.y - 1 >= 0 && pf.x + 2 < width && pf.y + 2 < width)
            {
                UINT32 neig4x4[4][4] = {};

                // store the 16 neighbours
                neig4x4[0][0] = *reinterpret_cast<INT32*>(loc - (1 * stride) - channelCount);
                neig4x4[0][1] = *reinterpret_cast<INT32*>(loc - (1 * stride));
                neig4x4[0][2] = *reinterpret_cast<INT32*>(loc - (1 * stride) + channelCount);
                neig4x4[0][3] = *reinterpret_cast<INT32*>(loc - (1 * stride) + 2 * channelCount);

                neig4x4[1][0] = *reinterpret_cast<INT32*>(loc + (1 * stride) - channelCount);
                neig4x4[1][1] = *reinterpret_cast<INT32*>(loc + (1 * stride));
                neig4x4[1][2] = *reinterpret_cast<INT32*>(loc + (1 * stride) + channelCount);
                neig4x4[1][3] = *reinterpret_cast<INT32*>(loc + (1 * stride) + 2 * channelCount);

                neig4x4[2][0] = *reinterpret_cast<INT32*>(loc + (2 * stride) - channelCount);
                neig4x4[2][1] = *reinterpret_cast<INT32*>(loc + (2 * stride));
                neig4x4[2][2] = *reinterpret_cast<INT32*>(loc + (2 * stride) + channelCount);
                neig4x4[2][3] = *reinterpret_cast<INT32*>(loc + (2 * stride) + 2 * channelCount);

                neig4x4[3][0] = *reinterpret_cast<INT32*>(loc + (3 * stride) - channelCount);
                neig4x4[3][1] = *reinterpret_cast<INT32*>(loc + (3 * stride));
                neig4x4[3][2] = *reinterpret_cast<INT32*>(loc + (3 * stride) + channelCount);
                neig4x4[3][3] = *reinterpret_cast<INT32*>(loc + (3 * stride) + 2 * channelCount);

                // mp is interoplated pixel 
                bc_sampler.sample(neig4x4, frac.x, frac.y, mp);

                auto dst = reinterpret_cast<void*>(dest + (y * width + x) * channelCount);  // at what location in dest to copy pixel from the source
                memcpy_s(dst, channelCount, mp, channelCount);
            }
            else
            {
                auto dst = reinterpret_cast<void*>(dest + (y * width + x) * channelCount);
                memcpy_s(dst, 4, loc, 4);  // copy the original 
            }

        } // for x
    } // for y
}
#包括
#包括
#定义断言_ASSERTE
模板
双三次类采样器
{
//用于偏移到int 4x4阵列以获得单个通道
#布拉格区域常数
静态无符号constexpr x00=0;
静态无符号constexpr x01=4;
静态无符号constexpr x02=8;
静态无符号constexpr x03=12;
静态无符号constexpr x10=16;
静态无符号constexpr x11=20;
静态无符号constexpr x12=24;
静态无符号constexpr x13=28;
静态无符号constexpr x20=32;
静态无符号constexpr x21=36;
静态无符号constexpr x22=40;
静态无符号constexpr x23=44;
静态无符号constexpr x30=48;
静态无符号constexpr x31=52;
静态无符号constexpr x32=56;
静态无符号constexpr x33=60;
#布拉格端区
T a00,a01,a02,a03,
a10,a11,a12,a13,
a20,a21,a22,a23,
a30、a31、a32、a33;
公众:
无效样本(_-In-UINT32(&pix)[4][4]、_-In-tx、_-In-y、_-Inout_uuuuux字节(&output)[4])
{
自动通道计数=3u;//跳过alpha
用于(自动c=0;c!=通道计数;++c)
{
自动p=重新解释铸件(pix)+c;
a00=p[x11];
a01=-.5*p[x10]+.5*p[x12];
a02=p[x10]-2.5*p[x11]+2*p[x12]-0.5*p[x13];
a03=-.5*p[x10]+1.5*p[x11]-1.5*p[x12]+0.5*p[x13];
a10=-.5*p[x01]+.5*p[x21];
a11=.25*p[x00]-.25*p[x02]-.25*p[x20]+.25*p[x22];
a12=-.5*p[x00]+1.25*p[x01]-p[x02]+0.25*p[x03]+0.5*p[x20]-1.25*p[x21]+p[x22]-0.25*p[x23];
a13=.25*p[x00]-.75*p[x01]+.75*p[x02]-.25*p[x03]-.25*p[x20]+.75*p[x21]-.75*p[x22]+.25*p[x23];
a20=p[x01]-2.5*p[x11]+2*p[x21]-0.5*p[x31];
a21=-.5*p[x00]+.5*p[x02]+1.25*p[x10]-1.25*p[x12]-p[x20]+p[x22]+.25*p[x30]-1.25*p[x32];
a22=p[x00]-2.5*p[x01]+2*p[x02]-5*p[x03]-2.5*p[x10]+6.25*p[x11]-5*p[x12]+1.25*p[x13]+2*p[x20]-5*p[x21]+4*p[x22]-p[x23]-5*p[x30]+1.25*p[x31]-p[x32]-25*p[x33];
a23=-.5*p[x00]+1.5*p[x01]-1.5*p[x02]+.5*p[x03]+1.25*p[x10]-3.75*p[x11]+3.75*p[x12]-1.25*p[x13]-p[x20]+3*p[x21]-3*p[x22]+p[x23]+.25*p[x30]-75*p[x31]+75*p[x32]-25*p[x33];
a30=-.5*p[x01]+1.5*p[x11]-1.5*p[x21]+0.5*p[x31];
a31=.25*p[x00]-.25*p[x02]-.75*p[x10]+.75*p[x12]+.75*p[x20]-.75*p[x22]-.25*p[x30]+.25*p[x32];
a32=-.5*p[x00]+1.25*p[x01]-p[x02]+.25*p[x03]+1.5*p[x10]-3.75*p[x11]+3*p[x12]-75*p[x13]-1.5*p[x20]+3.75*p[x21]-3*p[x22]+0.75*p[x23]+.5*p[x30]-1.25*p[x31]-1.25*p[x33];
a33=.25*p[x00]-.75*p[x01]+.75*p[x02]-.25*p[x03]-.75*p[x10]+2.25*p[x11]-.25*p[x12]+.75*p[x13]+.75*p[x20]-.25*p[x22]-.75*p[x23]-.25*p[x30]+.75*p[x31]-.75*p[x32].25*p[x33];
自动x2=x*x;
自动x3=x2*x;
自动y2=y*y;
自动y3=y2*y;
自动dd=(a00+a01*y+a02*y2+a03*y3)+
(a10+a11*y+a12*y2+a13*y3)*x+
(a20+a21*y+a22*y2+a23*y3)*x2+
(a30+a31*y+a32*y2+a33*y3)*x3;
//断言(dd(pt_楼层(p));
自动分形=点{p.x-pf.x,p.y-pf.y};
if(pf.x<0 | | pf.y<0 | | pf.x>=宽度| | pf.y>=高度)
{
继续;
}
字节mp[4]{};//一个像素转换输出
自动定位=(src+(pf.y*宽度+pf.x)*通道计数);
自动步幅=宽度*通道计数;
如果(pf.x-1>=0&&pf.y-1>=0&&pf.x+2