cuda-directx 12纹理2D(在1D阵列中)互操作
我正在尝试在cuda中更新directx12中使用的纹理。我可能会错过一些东西,但我没有关于它的提示cuda-directx 12纹理2D(在1D阵列中)互操作,cuda,directx-12,Cuda,Directx 12,我正在尝试在cuda中更新directx12中使用的纹理。我可能会错过一些东西,但我没有关于它的提示 在图像的右上角区域有一个“始终黑色”区域 只有当所有像素的rgb值都相同时,我才能得到预期的结果(对第一个问题进行模化),否则我会得到意外的伪影,就像数组没有预期的结构一样 我错过了什么 以下是纹理的创建过程: { TextureWidth = m_width; TextureHeight = m_height; auto nPixels = TextureWidth *
{
TextureWidth = m_width;
TextureHeight = m_height;
auto nPixels = TextureWidth * TextureHeight * 3;
auto pixelBufferSize = sizeof(float)* nPixels;
D3D12_RESOURCE_DESC textureDesc{};
textureDesc.MipLevels = 1;
textureDesc.Format = DXGI_FORMAT_R32G32B32_FLOAT;
textureDesc.Width = TextureWidth;
textureDesc.Height = TextureHeight;
textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
textureDesc.DepthOrArraySize = 1;
textureDesc.SampleDesc.Count = 1;
textureDesc.SampleDesc.Quality = 0;
textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
ThrowIfFailed(m_device->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_SHARED,
&textureDesc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&m_textureBuffer)));
NAME_D3D12_OBJECT(m_textureBuffer);
// Describe and create a SRV for the texture.
{
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc{};
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Format = textureDesc.Format;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MipLevels = 1;
m_device->CreateShaderResourceView(m_textureBuffer.Get(), &srvDesc, m_srvHeap->GetCPUDescriptorHandleForHeapStart());
NAME_D3D12_OBJECT(m_srvHeap);
}
// Share m_textureBuffer with cuda
{
HANDLE sharedHandle{};
WindowsSecurityAttributes windowsSecurityAttributes{};
LPCWSTR name{};
ThrowIfFailed(m_device->CreateSharedHandle(m_textureBuffer.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle));
D3D12_RESOURCE_ALLOCATION_INFO d3d12ResourceAllocationInfo;
d3d12ResourceAllocationInfo = m_device->GetResourceAllocationInfo(m_nodeMask, 1, &CD3DX12_RESOURCE_DESC::Buffer(pixelBufferSize));
auto actualSize = d3d12ResourceAllocationInfo.SizeInBytes;
cudaExternalMemoryHandleDesc externalMemoryHandleDesc;
memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc));
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource;
externalMemoryHandleDesc.handle.win32.handle = sharedHandle;
externalMemoryHandleDesc.size = actualSize;
externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated;
checkCudaErrors(cudaImportExternalMemory(&m_externalMemory, &externalMemoryHandleDesc));
cudaExternalMemoryBufferDesc externalMemoryBufferDesc;
memset(&externalMemoryBufferDesc, 0, sizeof(externalMemoryBufferDesc));
externalMemoryBufferDesc.offset = 0;
externalMemoryBufferDesc.size = pixelBufferSize;
externalMemoryBufferDesc.flags = 0;
checkCudaErrors(cudaExternalMemoryGetMappedBuffer(&m_cudaDevVertptr, m_externalMemory, &externalMemoryBufferDesc));
RunKernel(TextureWidth, TextureHeight, (float*)m_cudaDevVertptr, m_streamToRun, 1.0f);
checkCudaErrors(cudaStreamSynchronize(m_streamToRun));
}
}
这里是更新此纹理的cuda代码:
int iDivUp(int a, int b) { return a % b != 0 ? a / b + 1 : a / b; }
__global__ void TextureKernel(float *pixels, unsigned int width, unsigned int height, float time)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (y < height && x < width)
{
auto pos = (y * width + x) * 3;
auto sint = __sinf(time) * 0.1f + 0.10f;
auto sintAlt = (x / 32) % 2 == 0 ? 1.0f : sint;
pixels[pos + 0] = sintAlt; //RED
pixels[pos + 1] = 0; // (x + y) % 2 == 0 ? 1.0f : __sinf(time) * 0.25f + 0.75f; //GREEN
pixels[pos + 2] = 0; // (x + y) % 2 == 0 ? 1.0f : 0.0f; //BLUE
//pixels[pos + 0] = __sinf(time + 0.) * 0.5f + 0.5f;
//pixels[pos + 1] = __sinf(time * 0.09) * 0.5f + 0.5f;
//pixels[pos + 2] = __sinf(time + 2) * 0.5f + 0.5f;
}
}
void RunKernel(size_t meshWidth, size_t meshHeight, float *texture_dev, cudaStream_t streamToRun, float animTime)
{
//dim3 block(16, 16, 1);
//dim3 grid(meshWidth / 16, meshHeight / 16, 1);
auto unit = 32;
dim3 threads(unit, unit);
dim3 grid(iDivUp(meshWidth, unit), iDivUp(meshHeight, unit));
TextureKernel <<<grid, threads, 0, streamToRun >>>(texture_dev, meshWidth, meshHeight, animTime);
getLastCudaError("TextureKernel execution failed.\n");
}
假设具有三个float类型通道的2D纹理图像将具有简单的行线性内存布局。正如您的结果所表明的,这通常是不正确的 纹理经过优化以实现空间一致性访问。他们的内存布局设计为使在n维纹理空间中接近的东西在内存中保持接近。通过简单的行主内存布局,任何具有多个维度的对象都无法实现这一点。特定纹理图像的确切内存布局通常不是您可以假定知道或依赖的。它将取决于您使用的GPU(通常,数据将以某种方式存储,使用诸如平铺或填充的方式来保持内容对齐)
正如您自己所注意到的,您要做的是将CUDA数组(数组是CUDA类比纹理图像)映射到来自D3D12的外部数据。此CUDA数组的格式必须与D3D12中创建的纹理的格式匹配。然后,您应该能够使用CUDA运行时API的纹理或曲面函数来访问此CUDA数组表示的纹理图像…您假设具有三个float类型通道的2D纹理图像将具有简单的行线性内存布局。正如您的结果所表明的,这通常是不正确的 纹理经过优化以实现空间一致性访问。他们的内存布局设计为使在n维纹理空间中接近的东西在内存中保持接近。通过简单的行主内存布局,任何具有多个维度的对象都无法实现这一点。特定纹理图像的确切内存布局通常不是您可以假定知道或依赖的。它将取决于您使用的GPU(通常,数据将以某种方式存储,使用诸如平铺或填充的方式来保持内容对齐)
正如您自己所注意到的,您要做的是将CUDA数组(数组是CUDA类比纹理图像)映射到来自D3D12的外部数据。此CUDA数组的格式必须与D3D12中创建的纹理的格式匹配。然后,您应该能够使用CUDA运行时API的纹理或曲面函数访问此CUDA数组表示的纹理图像…正确的做法是将纹理作为外部内存导入,然后作为mipmap数组导入,然后使用此数组创建CUDA曲面,然后在CUDA内核中修改此曲面 导入和映射是通过以下方式完成的:
cudaExternalMemoryMipmappedArrayDesc cuExtmemMipDesc{};
cuExtmemMipDesc.extent = make_cudaExtent(texDesc.Width, texDesc.Height, 0);
cuExtmemMipDesc.formatDesc = cudaCreateChannelDesc<float4>();
cuExtmemMipDesc.numLevels = 1;
cuExtmemMipDesc.flags = cudaArraySurfaceLoadStore;
cudaMipmappedArray_t cuMipArray{};
CheckCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&cuMipArray, m_externalMemory, &cuExtmemMipDesc));
cudaArray_t cuArray{};
CheckCudaErrors(cudaGetMipmappedArrayLevel(&cuArray, cuMipArray, 0));
cudaResourceDesc cuResDesc{};
cuResDesc.resType = cudaResourceTypeArray;
cuResDesc.res.array.array = cuArray;
checkCudaErrors(cudaCreateSurfaceObject(&cuSurface, &cuResDesc));
// where cudaSurfaceObject_t cuSurface{};
cudaexternalemorymipmpappedarraydesc cuExtmemMipDesc{};
cuExtmemMipDesc.extent=make_cudaExtent(texDesc.Width,texDesc.Height,0);
cuExtmemMipDesc.formatDesc=cudaCreateChannelDesc();
cuExtmemMipDesc.numLevels=1;
cuExtmemMipDesc.flags=cudaArraySurfaceLoadStore;
cudaMipmappedArray_u t cuMipArray{};
检查CUDAERRORS(CUDAEXTERNALMEMORYGETMAPPEDMIPPEDRARRAY(&cuMipArray,m_externalMemory,&CUEXTMEMIPDESC));
cudaArray_t cuArray{};
检查CUDAERRORS(CUDAGETMIPMAPPEDRARALLEVEL(&cuArray,cuMipArray,0));
cudaResourceDesc cuResDesc{};
cuResDesc.resType=cudaResourceTypeArray;
cuResDesc.res.array.array=cuArray;
检查CUDAERRORS(cudaCreateSurfaceObject(&cuSurface,&cuResDesc));
//其中cudaSurfaceObject_u t cussurface{};
cuda部分如下所示:
int iDivUp(int a, int b) { return a % b != 0 ? a / b + 1 : a / b; }
__global__ void UpdateSurface(cudaSurfaceObject_t surf, unsigned int width, unsigned int height, float time)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y >= height | x >= width) return;
auto xVar = (float)x / (float)width;
auto yVar = (float)y / (float)height;
auto cost = __cosf(time) * 0.5f + 0.5f;
auto costx = __cosf(time) * 0.5f + xVar;
auto costy = __cosf(time) * 0.5f + yVar;
auto costxx = (__cosf(time) * 0.5f + 0.5f) * width;
auto costyy = (__cosf(time) * 0.5f + 0.5f) * height;
auto costxMany = __cosf(y * time) * 0.5f + yVar;
auto costyMany = __cosf((float)x/100 * time) * 0.5f + xVar;
auto margin = 1;
float4 pixel{};
if (y == 0) // paint the first row
pixel = make_float4(costyMany * 0.3, costyMany * 1, costyMany * 0.4, 1);
else if (y == height - 1) // paint the last row
pixel = make_float4(costyMany * 0.6, costyMany * 0.7, costyMany * 1, 1);
else if (x % 5 == 0) // paint a column of 1 pixel wide every 5 pixels
{
if (x > width / 2) // a certain color for the right half
pixel = make_float4(0.1, 0.5, costx * 1, 1);
else // another color for the left half
pixel = make_float4(costx * 1, 0.1, 0.2, 1);
}
else if (x > width - margin - 1 | x <= margin) // first and last columns
pixel = make_float4(costxMany, costxMany * 0.9, costxMany * 0.6, 1);
else // all the rest of the texture
pixel = make_float4(costx * 0.3, costx * 0.4, costx * 0.6, 1);
surf2Dwrite(pixel, surf, x * 16, y);
}
void RunKernel(size_t textureW, size_t textureH, cudaSurfaceObject_t surfaceObject, cudaStream_t streamToRun, float animTime)
{
auto unit = 10;
dim3 threads(unit, unit);
dim3 grid(iDivUp(textureW, unit), iDivUp(textureH, unit));
UpdateSurface <<<grid, threads, 0, streamToRun >>> (surfaceObject, textureW, textureH, animTime);
getLastCudaError("UpdateSurface execution failed.\n");
}
intidivup(inta,intb){返回a%b!=0?a/b+1:a/b;}
__全局无效更新曲面(CUDASURFACHEOBJECT\t surf、无符号整数宽度、无符号整数高度、浮点时间)
{
无符号整数x=blockIdx.x*blockDim.x+threadIdx.x;
无符号整数y=blockIdx.y*blockDim.y+threadIdx.y;
如果(y>=高度| x>=宽度)返回;
自动xVar=(浮动)x/(浮动)宽度;
自动yVar=(浮动)y/(浮动)高度;
自动成本=u cosf(时间)*0.5f+0.5f;
自动成本x=u cosf(时间)*0.5f+xVar;
自动成本=u cosf(时间)*0.5f+yVar;
自动成本XX=(uu cosf(时间)*0.5f+0.5f)*宽度;
自动成本Y=(uu cosf(时间)*0.5f+0.5f)*高度;
自动成本xMany=uu cosf(y*时间)*0.5f+yVar;
自动成本数量=u cosf((浮动)x/100*时间)*0.5f+xVar;
自动边距=1;
浮动4像素{};
如果(y==0)//绘制第一行
像素=make_float4(costyMany*0.3,costyMany*1,costyMany*0.4,1);
else如果(y==高度-1)//绘制最后一行
像素=make_float4(costyMany*0.6,costyMany*0.7,costyMany*1,1);
else if(x%5==0)//每5个像素绘制一个1像素宽的列
{
if(x>width/2)//右半部分的特定颜色
像素=make_float4(0.1,0.5,costx*1,1);
else//左半部分的另一种颜色
像素=make_float4(costx*1,0.1,0.2,1);
}
否则,如果(x>width-margin-1 | x正确的做法是将纹理作为外部内存导入,然后作为mipmap数组,然后使用此数组创建cuda曲面,然后在cuda内核中修改此曲面
导入和映射是通过以下方式完成的:
cudaExternalMemoryMipmappedArrayDesc cuExtmemMipDesc{};
cuExtmemMipDesc.extent = make_cudaExtent(texDesc.Width, texDesc.Height, 0);
cuExtmemMipDesc.formatDesc = cudaCreateChannelDesc<float4>();
cuExtmemMipDesc.numLevels = 1;
cuExtmemMipDesc.flags = cudaArraySurfaceLoadStore;
cudaMipmappedArray_t cuMipArray{};
CheckCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&cuMipArray, m_externalMemory, &cuExtmemMipDesc));
cudaArray_t cuArray{};
CheckCudaErrors(cudaGetMipmappedArrayLevel(&cuArray, cuMipArray, 0));
cudaResourceDesc cuResDesc{};
cuResDesc.resType = cudaResourceTypeArray;
cuResDesc.res.array.array = cuArray;
checkCudaErrors(cudaCreateSurfaceObject(&cuSurface, &cuResDesc));
// where cudaSurfaceObject_t cuSurface{};
cudaexternalemorymipmpappedarraydesc cuExtmemMipDesc{};
cuExtmemMipDesc.extent=make_cudaExtent(texDesc.Width,texDesc.Height,0);
cuExtmemMipDesc.formatDesc=cudaCreateChannelDesc();
cuExtmemMipDesc.numLevels=1;
cuExtmemMipDesc.flags=cudaArraySurfaceLoadStore;
cudaMipmappedArray_u t cuMipArray{};
检查CUDAERRORS(CUDAEXTERNALMEMORYGETMAPPEDMIPPEDRARRAY(&cuMipArray,m_外部内存,