C++ DXGI_错误_设备_挂起由C++；安培法_C++_Directx_Gpgpu_C++ Amp

C++ DXGI_错误_设备_挂起由C++；安培法

c++ directx

C++ DXGI_错误_设备_挂起由C++；安培法,c++,directx,gpgpu,c++-amp,C++,Directx,Gpgpu,C++ Amp,我正在尝试实现一个函数，该函数使用C++AMP计算高斯-拉盖尔数值积分法的权重和横坐标，以并行化该过程，运行该函数时，我得到一个DXGI\u ERROR\u DEVICE\u HUNG错误这是我在GPU上计算gamma函数对数的助手方法： template <typename T> T gammaln_fast( T tArg ) restrict( amp ) { const T tCoefficients[] = { T( 57.1562356658629235f ),

我正在尝试实现一个函数，该函数使用C++AMP计算高斯-拉盖尔数值积分法的权重和横坐标，以并行化该过程，运行该函数时，我得到一个

DXGI\u ERROR\u DEVICE\u HUNG

错误

这是我在GPU上计算gamma函数对数的助手方法：

template <typename T>
T gammaln_fast( T tArg ) restrict( amp )
{
    const T tCoefficients[] = { T( 57.1562356658629235f ), T( -59.5979603554754912f ),
        T( 14.1360979747417471f ), T( -0.491913816097620199f ), T( 0.339946499848118887E-4f ),
        T( 0.465236289270485756E-4f ), T( -0.983744753048795646E-4f ), T( 0.158088703224912494E-3f ),
        T( -0.210264441724104883E-3f ), T( 0.217439618115212643E-3f ), T( -0.164318106536763890E-3f ),
        T( 0.844182239838527433E-4f ), T( -0.261908384015814087E-4f ), T( 0.386991826595316234E-5f ) };

    T y = tArg, tTemp = tArg + T( 5.2421875f );
    tTemp = (tArg + T( 0.5f )) * concurrency::fast_math::log( tTemp ) - tTemp;

    T tSer = T( 0.999999999999997092f );

    for( std::size_t s = 0; s < (sizeof( tCoefficients ) / sizeof( T )); ++s )
    {
        tSer += tCoefficients[s] / ++y;
    }

    return tTemp + concurrency::fast_math::log( T( 2.5066282746310005f ) * tSer / tArg );
}

模板
T gammaln_快速（T目标）限制（安培）
{
常数T T T效率[]={T（57.156235658629235f），T（-59.5979603554754912f），
T（14.13609797417471F）、T（-0.491913816097620199f）、T（0.339946499848118887E-4f），
T（0.465236289270485756E-4f）、T（-0.983744753048795646E-4f）、T（0.15808703224912494E-3f），
T（-0.21026441724104883E-3f）、T（0.217439618115212643E-3f）、T（-0.164318106536763890E-3f），
T（0.844182298983527433E-4f）、T（-0.261908384015814087E-4f）、T（0.386991826595316234E-5f）}；
TY=tArg，tTemp=tArg+T（5.2421875f）；
tTemp=（tArg+T（0.5f））*并发：：快速数学：：日志（tTemp）-tTemp；
T tSer=T（0.9999999997092f）；
对于（std:：size_t s=0；s<（sizeof（t效率）/sizeof（t））；++s）
{
tSer+=t效率[s]/++y；
}
返回tTemp+concurrency:：fast_math:：log（T（2.506628746310005f）*tSer/tArg）；
}

这是我计算重量和横坐标的函数：

template <typename T>
ArrayPair<T> CalculateGaussLaguerreWeights_fast( const T tExponent, const std::size_t sNumPoints, T tEps = std::numeric_limits<T>::epsilon() )
{
    static_assert(std::is_floating_point<T>::value, "You can only instantiate this function with a floating point data type");
    static_assert(!std::is_same<T, long double>::value, "You can not instantiate this function with long double type"); // The long double type is not currently supported by C++AMP

    T tCurrentGuess, tFatherGuess, tGrandFatherGuess;
    std::vector<T> vecInitialGuesses( sNumPoints );
    for( std::size_t s = 0; s < sNumPoints; ++s )
    {
        if( s == 0 )
        {
            tCurrentGuess = (T( 1.0f ) + tExponent) * (T( 3.0f ) + T( 0.92f ) * tExponent) / (T( 1.0f ) + T( 2.4f ) * sNumPoints + T( 1.8f ) * tExponent);
        }
        else if( s == 1 )
        {
            tFatherGuess = tCurrentGuess;
            tCurrentGuess += (T( 15.0f ) + T( 6.25f ) * tExponent) / (T( 1.0f ) + T( 0.9f ) * tExponent + T( 2.5f ) * sNumPoints);
        }
        else
        {
            tGrandFatherGuess = tFatherGuess;
            tFatherGuess = tCurrentGuess;
            std::size_t sDec = s - 1U;
            tCurrentGuess += ((T( 1.0f ) + T( 2.55f ) * sDec) / (T( 1.9f ) * sDec) + T( 1.26f ) * sDec * tExponent
                / (T( 1.0f ) + T( 3.5f ) * sDec)) * (tCurrentGuess - tGrandFatherGuess) / (T( 1.0f ) + T( 0.3f ) * tExponent);
        }
        vecInitialGuesses[s] = tCurrentGuess;
    }

    concurrency::array<T> arrWeights( sNumPoints ), arrAbsciasses( sNumPoints, std::begin(vecInitialGuesses) );

    try {
        concurrency::parallel_for_each( arrAbsciasses.extent, [=, &arrAbsciasses, &arrWeights]( concurrency::index<1> index ) restrict( amp ) {
            T tVal = arrAbsciasses[index], tIntermediate;
            T tPolynomial1 = T( 1.0f ), tPolynomial2 = T( 0.0f ), tPolynomial3, tDerivative;
            std::size_t sIterationNum = 0;
            do {
                tPolynomial1 = T( 1.0f ), tPolynomial2 = T( 0.0f );

                for( std::size_t s = 0; s < sNumPoints; ++s )
                {
                    tPolynomial3 = tPolynomial2;
                    tPolynomial2 = tPolynomial1;
                    tPolynomial1 = ((2 * s + 1 + tExponent - tVal) * tPolynomial2 - (s + tExponent) * tPolynomial3) / (s + 1);
                }

                tDerivative = (sNumPoints * tPolynomial1 - (sNumPoints + tExponent) * tPolynomial2) / tVal;
                tIntermediate = tVal;
                tVal = tIntermediate - tPolynomial1 / tDerivative;
                ++sIterationNum;

            } while( concurrency::fast_math::fabs( tVal - tIntermediate ) > tEps || sIterationNum < 10 );

            arrAbsciasses[index] = tVal;
            arrWeights[index] = -concurrency::fast_math::exp( gammaln_fast( tExponent + sNumPoints ) - gammaln_fast( T( sNumPoints ) ) ) / (tDerivative * sNumPoints * tPolynomial2);
        } );
    }
    catch( concurrency::runtime_exception& e )
    {
        std::cerr << "Runtime error, code: " << e.get_error_code() << "; message: " << e.what() << std::endl;
    }

    return std::make_pair( std::move( arrAbsciasses ), std::move( arrWeights ) );
}

模板
ArrayPair CalculateGausslaguerrewweights\u fast（常数T tExponent，常数std:：size\u T snimpoints，T tEps=std:：numeric\u limits:：epsilon（））
{
static_assert（std:：is_floating_point:：value，“只能用浮点数据类型实例化此函数”）；
static_assert（！std:：is_same:：value，“不能用长双精度类型实例化此函数”）；//长双精度类型当前不受C++AMP支持
T当前猜测，T推理，T父亲猜测；
向量向量初始猜测（sNumPoints）；
对于（std:：size_t s=0；stEps | | sitererationnum<10）；
ARRABSCIASES[指数]=tVal；
arrWeights[index]=-concurrency:：fast\u math:：exp（gammaln\u fast（tExponent+sNumPoints）-gammaln\u fast（T（sNumPoints））/（T派生*sNumPoints*tpolymonial2）；
} );
}
捕获（并发：：运行时\异常&e）
{
std:：cerr使用DirectCompute时，主要的挑战是编写不与Direct3D自动“GPU挂起”检测超时相冲突的计算。默认情况下，系统假设如果着色器花费的时间超过几秒钟，GPU实际上挂起。这种启发式方法适用于视觉着色器，但您可以轻松创建DirectCompute这需要很长时间才能完成
解决方案是禁用超时检测。您可以通过使用D3D11\u CREATE\u device\u disable\u GPU\u timeout
创建Direct3D 11设备来实现这一点。请参阅博客文章。需要记住的主要一点是D3D11\u CREATE\u device\u disable\u GPU timeout
需要Windows 8.x和c附带的DirectX 11.1或更高版本运行时可以在Windows 7 Service Pack 1上安装KB。有关使用KB2670838的一些注意事项，请参阅和