Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/visual-studio-2010/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Visual studio 2010 SSE绝对差和等效代码_Visual Studio 2010_H.264_Sse_Encoder_Decoder - Fatal编程技术网

Visual studio 2010 SSE绝对差和等效代码

Visual studio 2010 SSE绝对差和等效代码,visual-studio-2010,h.264,sse,encoder,decoder,Visual Studio 2010,H.264,Sse,Encoder,Decoder,我在H264AVC编码器/解码器中有这个函数,它被反复调用: UInt XDistortion::xGetSAD16x( XDistSearchStruct* pcDSS ) { XPel* pucCur = pcDSS->pYSearch; XPel* pucOrg = pcDSS->pYOrg; Int iStride = pcDSS->iYStride; Int iRows = pcDSS->iRows; UInt uiSum

我在H264AVC编码器/解码器中有这个函数,它被反复调用:

UInt XDistortion::xGetSAD16x( XDistSearchStruct* pcDSS )
{
  XPel* pucCur  = pcDSS->pYSearch;
  XPel* pucOrg  = pcDSS->pYOrg;
  Int   iStride = pcDSS->iYStride;
  Int   iRows   = pcDSS->iRows;

  UInt uiSum = 0;

  for( ; iRows != 0; iRows-- )
  {
    uiSum += Abs( pucOrg[0x0] - pucCur[0x0] );
    uiSum += Abs( pucOrg[0x1] - pucCur[0x1] );
    uiSum += Abs( pucOrg[0x2] - pucCur[0x2] );
    uiSum += Abs( pucOrg[0x3] - pucCur[0x3] );
    uiSum += Abs( pucOrg[0x4] - pucCur[0x4] );
    uiSum += Abs( pucOrg[0x5] - pucCur[0x5] );
    uiSum += Abs( pucOrg[0x6] - pucCur[0x6] );
    uiSum += Abs( pucOrg[0x7] - pucCur[0x7] );
    uiSum += Abs( pucOrg[0x8] - pucCur[0x8] );
    uiSum += Abs( pucOrg[0x9] - pucCur[0x9] );
    uiSum += Abs( pucOrg[0xa] - pucCur[0xa] );
    uiSum += Abs( pucOrg[0xb] - pucCur[0xb] );
    uiSum += Abs( pucOrg[0xc] - pucCur[0xc] );
    uiSum += Abs( pucOrg[0xd] - pucCur[0xd] );
    uiSum += Abs( pucOrg[0xe] - pucCur[0xe] );
    uiSum += Abs( pucOrg[0xf] - pucCur[0xf] );
    pucOrg += MB_BUFFER_WIDTH;
    pucCur += iStride;
  }
  return uiSum;
}
我已将其替换为以下SSE代码,但它不起作用:

UInt XDistortion::xGetSAD16x( XDistSearchStruct* pcDSS )
{
    XPel* pucCur  = pcDSS->pYSearch;
    XPel* pucOrg  = pcDSS->pYOrg;
    Int   iStride = pcDSS->iYStride;
    Int   iRows   = pcDSS->iRows;

    __m128i uiSum = _mm_set1_epi32(0);
    __m128i x1 = _mm_set1_epi8(MB_BUFFER_WIDTH);
    __m128i x2 = _mm_set1_epi8(iStride);

    for( ; iRows > 0; iRows -= 4)
        {
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x0]), _mm_load_si128((__m128i*)&pucCur[0x0])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x1]), _mm_load_si128((__m128i*)&pucCur[0x1])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x2]), _mm_load_si128((__m128i*)&pucCur[0x2])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x3]), _mm_load_si128((__m128i*)&pucCur[0x3])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x4]), _mm_load_si128((__m128i*)&pucCur[0x4])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x5]), _mm_load_si128((__m128i*)&pucCur[0x5])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x6]), _mm_load_si128((__m128i*)&pucCur[0x6])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x7]), _mm_load_si128((__m128i*)&pucCur[0x7])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x8]), _mm_load_si128((__m128i*)&pucCur[0x8])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0x9]), _mm_load_si128((__m128i*)&pucCur[0x9])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0xa]), _mm_load_si128((__m128i*)&pucCur[0xa])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0xb]), _mm_load_si128((__m128i*)&pucCur[0xb])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0xc]), _mm_load_si128((__m128i*)&pucCur[0xc])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0xd]), _mm_load_si128((__m128i*)&pucCur[0xd])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0xe]), _mm_load_si128((__m128i*)&pucCur[0xe])))));
            _mm_store_si128(&uiSum, _mm_add_epi32(_mm_load_si128(&uiSum), _mm_abs_epi32(_mm_sub_epi32(_mm_load_si128((__m128i*)&pucOrg[0xf]), _mm_load_si128((__m128i*)&pucCur[0xf])))));

            _mm_store_si128((__m128i*)pucOrg, _mm_add_epi32(_mm_load_si128((__m128i*)pucOrg), _mm_load_si128(&x1)));
            _mm_store_si128((__m128i*)pucCur, _mm_add_epi32(_mm_load_si128((__m128i*)pucCur), _mm_load_si128(&x2)));
        }
        return _mm_extract_epi32(uiSum, 0);
}
我不知道我在转换过程中是否有任何错误,因为我不是SSE方面的专家。
有什么帮助吗?

如果你能使用SSE,那就容易多了。我用过SSE,但它不起作用,我不知道为什么@Paul RYou需要更具体一些。哪一部分坏了?(看起来您试图将pucOrg视为128位的值,但结果不太理想。)