Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/138.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
为每个上下文并行复制数组 我是AMP C++的新手。如果我在“parallel_for_each”函数中使用“memcpy”,一切都会正常工作,但我知道这不是最佳实践。我尝试使用“copy_to”,但它引发了一个异常。下面是一个简化的代码,重点是我遇到的问题。提前谢谢 typedef std::vector<DWORD> CArrDwData; class CdataMatrix { public: CdataMatrix(int nChCount) : m_ChCount(nChCount) { } void SetSize(UINT uSize) { // MUST be multiple of m_ChCount*DWORD ASSERT(uSize%sizeof(DWORD) == 0); m_PackedLength = uSize/sizeof(DWORD); m_arrChannels.resize(m_ChCount*m_PackedLength); } UINT GetChannelPackedLen() const { return m_PackedLength; } const LPBYTE GetChannelBuffer(UINT uChannel) const { CArrDwData::const_pointer cPtr = m_arrChannels.data() + m_PackedLength*uChannel; return (const LPBYTE)cPtr; } public: CArrDwData m_arrChannels; protected: UINT m_ChCount; UINT m_PackedLength; }; void CtypDiskHeader::ParalelProcess() { const int nJobs = 6; const int nChannelCount = 3; UINT uAmount = 250000; int vch; CArrDwData arrCompData; // Check buffers sizes ASSERT((~uAmount & 0x00000003) == 3); // DWORD aligned const UINT uInDWSize = uAmount/sizeof(DWORD); // in size give in DWORDs CdataMatrix arrChData(nJobs); arrCompData.resize(nJobs*uInDWSize); vector<int> a(nJobs); for(vch = 0; vch < nJobs; vch++) a[vch] = vch; arrChData.SetSize(uAmount+16); // note: 16 bytes or 4 DWORDs larger than uInDWSize accelerator_view acc_view = accelerator().default_view; Concurrency::extent<2> eIn(nJobs, uInDWSize); Concurrency::extent<2> eOut(nJobs, arrChData.GetChannelPackedLen()); array_view<DWORD, 2> viewOut(eOut, arrChData.m_arrChannels); array_view<DWORD, 2> viewIn(eIn, arrCompData); concurrency::parallel_for_each(begin(a), end(a), [&](int vch) { vector<DWORD>::pointer ptr = (LPDWORD)viewIn(vch).data(); LPDWORD bufCompIn = (LPDWORD)ptr; ptr = viewOut(vch).data(); LPDWORD bufExpandedIn = (LPDWORD)ptr; if(ConditionNotOk()) { // Copy raw data bufCompIn to bufExpandedIn // Works fine, but not the best way, I suppose: memcpy(bufExpandedIn, bufCompIn, uAmount); // Raises exception: //viewIn(vch).copy_to(viewOut(vch)); } else { // Some data processing here } }); } typedef std::vector carrdwddata; 类CDATA矩阵 { 公众: CDATA矩阵(int-nChCount):m\u-ChCount(nChCount) { } 无效设置大小(UINT uSize) { //必须是m_ChCount*DWORD的倍数 断言(使用%sizeof(DWORD)==0); m_packedleength=使用/sizeof(DWORD); m_arrChannels.resize(m_ChCount*m_packedleength); } UINT GetChannelPackedLen()常量 { 返回m_包装长度; } 常量LPBYTE GetChannelBuffer(UINT uChannel)常量 { CArrDwData::const_pointer cPtr=m_arrChannels.data()+m_packedlelength*uChannel; 返回(常量LPBYTE)cPtr; } 公众: 数据传输信道; 受保护的: 单位m_ChCount; 单位包装长度; }; void ctypdisksheader::ParalelProcess() { 常数int nJobs=6; 常量int nChannelCount=3; UINT uAmount=250000; int vch; CArrDwData arrCompData; //检查缓冲区大小 ASSERT((~uAmount&0x00000003)==3);//DWORD对齐 const UINT uInDWSize=uAmount/sizeof(DWORD);//在大小上以DWORD表示 CDATA矩阵阵列数据(nJobs); arrCompData.resize(nJobs*uInDWSize); 载体a(nJobs); 对于(vch=0;vch_C++_Vector_Parallel Processing_Copy_C++ Amp - Fatal编程技术网

为每个上下文并行复制数组 我是AMP C++的新手。如果我在“parallel_for_each”函数中使用“memcpy”,一切都会正常工作,但我知道这不是最佳实践。我尝试使用“copy_to”,但它引发了一个异常。下面是一个简化的代码,重点是我遇到的问题。提前谢谢 typedef std::vector<DWORD> CArrDwData; class CdataMatrix { public: CdataMatrix(int nChCount) : m_ChCount(nChCount) { } void SetSize(UINT uSize) { // MUST be multiple of m_ChCount*DWORD ASSERT(uSize%sizeof(DWORD) == 0); m_PackedLength = uSize/sizeof(DWORD); m_arrChannels.resize(m_ChCount*m_PackedLength); } UINT GetChannelPackedLen() const { return m_PackedLength; } const LPBYTE GetChannelBuffer(UINT uChannel) const { CArrDwData::const_pointer cPtr = m_arrChannels.data() + m_PackedLength*uChannel; return (const LPBYTE)cPtr; } public: CArrDwData m_arrChannels; protected: UINT m_ChCount; UINT m_PackedLength; }; void CtypDiskHeader::ParalelProcess() { const int nJobs = 6; const int nChannelCount = 3; UINT uAmount = 250000; int vch; CArrDwData arrCompData; // Check buffers sizes ASSERT((~uAmount & 0x00000003) == 3); // DWORD aligned const UINT uInDWSize = uAmount/sizeof(DWORD); // in size give in DWORDs CdataMatrix arrChData(nJobs); arrCompData.resize(nJobs*uInDWSize); vector<int> a(nJobs); for(vch = 0; vch < nJobs; vch++) a[vch] = vch; arrChData.SetSize(uAmount+16); // note: 16 bytes or 4 DWORDs larger than uInDWSize accelerator_view acc_view = accelerator().default_view; Concurrency::extent<2> eIn(nJobs, uInDWSize); Concurrency::extent<2> eOut(nJobs, arrChData.GetChannelPackedLen()); array_view<DWORD, 2> viewOut(eOut, arrChData.m_arrChannels); array_view<DWORD, 2> viewIn(eIn, arrCompData); concurrency::parallel_for_each(begin(a), end(a), [&](int vch) { vector<DWORD>::pointer ptr = (LPDWORD)viewIn(vch).data(); LPDWORD bufCompIn = (LPDWORD)ptr; ptr = viewOut(vch).data(); LPDWORD bufExpandedIn = (LPDWORD)ptr; if(ConditionNotOk()) { // Copy raw data bufCompIn to bufExpandedIn // Works fine, but not the best way, I suppose: memcpy(bufExpandedIn, bufCompIn, uAmount); // Raises exception: //viewIn(vch).copy_to(viewOut(vch)); } else { // Some data processing here } }); } typedef std::vector carrdwddata; 类CDATA矩阵 { 公众: CDATA矩阵(int-nChCount):m\u-ChCount(nChCount) { } 无效设置大小(UINT uSize) { //必须是m_ChCount*DWORD的倍数 断言(使用%sizeof(DWORD)==0); m_packedleength=使用/sizeof(DWORD); m_arrChannels.resize(m_ChCount*m_packedleength); } UINT GetChannelPackedLen()常量 { 返回m_包装长度; } 常量LPBYTE GetChannelBuffer(UINT uChannel)常量 { CArrDwData::const_pointer cPtr=m_arrChannels.data()+m_packedlelength*uChannel; 返回(常量LPBYTE)cPtr; } 公众: 数据传输信道; 受保护的: 单位m_ChCount; 单位包装长度; }; void ctypdisksheader::ParalelProcess() { 常数int nJobs=6; 常量int nChannelCount=3; UINT uAmount=250000; int vch; CArrDwData arrCompData; //检查缓冲区大小 ASSERT((~uAmount&0x00000003)==3);//DWORD对齐 const UINT uInDWSize=uAmount/sizeof(DWORD);//在大小上以DWORD表示 CDATA矩阵阵列数据(nJobs); arrCompData.resize(nJobs*uInDWSize); 载体a(nJobs); 对于(vch=0;vch

为每个上下文并行复制数组 我是AMP C++的新手。如果我在“parallel_for_each”函数中使用“memcpy”,一切都会正常工作,但我知道这不是最佳实践。我尝试使用“copy_to”,但它引发了一个异常。下面是一个简化的代码,重点是我遇到的问题。提前谢谢 typedef std::vector<DWORD> CArrDwData; class CdataMatrix { public: CdataMatrix(int nChCount) : m_ChCount(nChCount) { } void SetSize(UINT uSize) { // MUST be multiple of m_ChCount*DWORD ASSERT(uSize%sizeof(DWORD) == 0); m_PackedLength = uSize/sizeof(DWORD); m_arrChannels.resize(m_ChCount*m_PackedLength); } UINT GetChannelPackedLen() const { return m_PackedLength; } const LPBYTE GetChannelBuffer(UINT uChannel) const { CArrDwData::const_pointer cPtr = m_arrChannels.data() + m_PackedLength*uChannel; return (const LPBYTE)cPtr; } public: CArrDwData m_arrChannels; protected: UINT m_ChCount; UINT m_PackedLength; }; void CtypDiskHeader::ParalelProcess() { const int nJobs = 6; const int nChannelCount = 3; UINT uAmount = 250000; int vch; CArrDwData arrCompData; // Check buffers sizes ASSERT((~uAmount & 0x00000003) == 3); // DWORD aligned const UINT uInDWSize = uAmount/sizeof(DWORD); // in size give in DWORDs CdataMatrix arrChData(nJobs); arrCompData.resize(nJobs*uInDWSize); vector<int> a(nJobs); for(vch = 0; vch < nJobs; vch++) a[vch] = vch; arrChData.SetSize(uAmount+16); // note: 16 bytes or 4 DWORDs larger than uInDWSize accelerator_view acc_view = accelerator().default_view; Concurrency::extent<2> eIn(nJobs, uInDWSize); Concurrency::extent<2> eOut(nJobs, arrChData.GetChannelPackedLen()); array_view<DWORD, 2> viewOut(eOut, arrChData.m_arrChannels); array_view<DWORD, 2> viewIn(eIn, arrCompData); concurrency::parallel_for_each(begin(a), end(a), [&](int vch) { vector<DWORD>::pointer ptr = (LPDWORD)viewIn(vch).data(); LPDWORD bufCompIn = (LPDWORD)ptr; ptr = viewOut(vch).data(); LPDWORD bufExpandedIn = (LPDWORD)ptr; if(ConditionNotOk()) { // Copy raw data bufCompIn to bufExpandedIn // Works fine, but not the best way, I suppose: memcpy(bufExpandedIn, bufCompIn, uAmount); // Raises exception: //viewIn(vch).copy_to(viewOut(vch)); } else { // Some data processing here } }); } typedef std::vector carrdwddata; 类CDATA矩阵 { 公众: CDATA矩阵(int-nChCount):m\u-ChCount(nChCount) { } 无效设置大小(UINT uSize) { //必须是m_ChCount*DWORD的倍数 断言(使用%sizeof(DWORD)==0); m_packedleength=使用/sizeof(DWORD); m_arrChannels.resize(m_ChCount*m_packedleength); } UINT GetChannelPackedLen()常量 { 返回m_包装长度; } 常量LPBYTE GetChannelBuffer(UINT uChannel)常量 { CArrDwData::const_pointer cPtr=m_arrChannels.data()+m_packedlelength*uChannel; 返回(常量LPBYTE)cPtr; } 公众: 数据传输信道; 受保护的: 单位m_ChCount; 单位包装长度; }; void ctypdisksheader::ParalelProcess() { 常数int nJobs=6; 常量int nChannelCount=3; UINT uAmount=250000; int vch; CArrDwData arrCompData; //检查缓冲区大小 ASSERT((~uAmount&0x00000003)==3);//DWORD对齐 const UINT uInDWSize=uAmount/sizeof(DWORD);//在大小上以DWORD表示 CDATA矩阵阵列数据(nJobs); arrCompData.resize(nJobs*uInDWSize); 载体a(nJobs); 对于(vch=0;vch,c++,vector,parallel-processing,copy,c++-amp,C++,Vector,Parallel Processing,Copy,C++ Amp,与每个的并行\u无关它似乎是一个已知的bug,具有数组\u视图::复制到。见以下帖子: 您可以改为使用显式的view\u as()来解决此问题。我相信你的代码应该是这样的 viewIn(vch).copy_to(viewOut(vch)); // Becomes... viewIn[vch].view_as<1>(concurrency::extent<1>(uInDWSize)).copy_to(viewOut(vch)); 是我的错。在原始代码中,viewOu

与每个
并行\u无关
它似乎是一个已知的bug,具有
数组\u视图::复制到
。见以下帖子:

您可以改为使用显式的
view\u as()
来解决此问题。我相信你的代码应该是这样的

viewIn(vch).copy_to(viewOut(vch));

// Becomes...

viewIn[vch].view_as<1>(concurrency::extent<1>(uInDWSize)).copy_to(viewOut(vch));

是我的错。在原始代码中,viewOut(vch)的范围略大于viewIn(vch)范围。使用这种方式,它会引发一个异常“runtime_exception”。捕获时,它会提供以下消息xcp.what()=“由于扩展数据块不匹配,复制失败”

我修复了用以下代码替换原始代码的问题:viewIn(vch).copy_to(viewOut(vch).section(viewIn(vch).extent));
它只复制源区,这就是我需要的。但是只编译而不编译受限AMP.

我在原来的帖子中犯了一个重大错误,我刚刚修复了它。源阵列和目标阵列的数据块不相同。很抱歉给您带来不便,谢谢您的回复。请检查我的答案。上面的代码中没有任何限制说明符。此外,您不能使用memcpy或copy_来内部限制(amp)代码,因为它们本身都不是限制(amp)。是的,我知道我的代码不是限制(amp)。因为我写了“但是只编译没有限制的AMP”。如果您知道一个等效代码,允许执行与restrict(amp)指令兼容的向量复制(输入范围小于输出范围),我将不胜感激。我已经在MSDN上搜索过了,但我很不幸地找到了包含矢量副本示例的文档。Rgds。
array<int, 1> source(1000);
array<int, 1> dest(500);

parallel_for_each(source.extent, [=, &source, &dest](index<1> idx)
{
    if (dest.extent.contains(idx))
        dest[idx] = source[idx];
});