Ios 由于向数组值添加数字时执行过程中出错,命令缓冲区的Swift Metal Shader执行被中止
我正在尝试一些非常简单的算法,使用金属GPU加速来计算数组中的一些值。着色器在某些情况下会抛出错误,我将对此进行解释。 错误:由于执行期间出错,命令缓冲区的执行被中止。因导致先前/过多GPU错误IOAF代码4而忽略 着色器仅在向数组索引处的现有值添加值时才会引发此错误。例如: 这不会导致错误:Ios 由于向数组值添加数字时执行过程中出错,命令缓冲区的Swift Metal Shader执行被中止,ios,swift,shader,metal,Ios,Swift,Shader,Metal,我正在尝试一些非常简单的算法,使用金属GPU加速来计算数组中的一些值。着色器在某些情况下会抛出错误,我将对此进行解释。 错误:由于执行期间出错,命令缓冲区的执行被中止。因导致先前/过多GPU错误IOAF代码4而忽略 着色器仅在向数组索引处的现有值添加值时才会引发此错误。例如: 这不会导致错误: kernel void shader (device int *wPointsIntensity [[buffer(0)]], const device uint
kernel void shader (device int *wPointsIntensity [[buffer(0)]],
const device uint *wPointsXCoord [[buffer(1)]],
const device uint *wPointsYCoord [[buffer(2)]],
device float *pixelSignalIntensity [[buffer(3)]],
device float *pixelDistance [[buffer(4)]],
const device uint& noOfPoints [[ buffer(5) ]],
const device uint& width [[ buffer(6) ]],
const device uint& height [[ buffer(7) ]],
uint id [[ thread_position_in_grid ]]) {
//this does not throw error
for (uint wpIndex = 0; wpIndex < noOfPoints; wpIndex++) {
for (uint heightIndex = 0; heightIndex < height; heightIndex++) {
for (uint widthIndex = 0; widthIndex < width; widthIndex++) {
uint pixelIndex = heightIndex * width + widthIndex;
pixelDistance[pixelIndex] = float(pixelIndex);
pixelSignalIntensity[pixelIndex] = float(pixelIndex);
}}}}
而如果你改变
pixelDistance[pixelIndex]=浮动pixelIndex;
与
pixelDistance[pixelIndex]+=floatpixelIndex
它将抛出一个错误
以下是swift代码:
var wPointsValues = [Int32](repeating:0, count: wPoints.count)
var wPointsXLocations = [Int32](repeating:0, count: wPoints.count)
var wPointsYLocations = [Int32](repeating:0, count: wPoints.count)
for i in 0..<wPoints.count {
wPointsValues[i] = Int32(wPoints[i].signalIntensity)
wPointsXLocations[i] = Int32(wPoints[i].location.x)
wPointsYLocations[i] = Int32(wPoints[i].location.y)
}
var numberOfWPoints:Int32 = Int32(wPoints.count)
var int32Width = Int32(width)
var int32Height = Int32(height)
//output arrays
let numberOfResults = wPoints.count * Int(width) * Int(height)
var wPointsSignalIntensity = [Float32](repeating:0.0, count: numberOfResults)
var wPointsDistance = [Float32](repeating:0.0, count: numberOfResults)
//local variables
var signalDensity:[Float32] = [Float32](repeating:0.0, count: numberOfResults)
var signalDistance:[Float32] = [Float32](repeating:0.0, count: numberOfResults)
//create input buffers
let inWPointSignalValues = device.makeBuffer(bytes: wPointsValues, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
let inWPointXCoordBuffer = device.makeBuffer(bytes: wPointsXLocations, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
let inWPointYCoordBuffer = device.makeBuffer(bytes: wPointsYLocations, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
//create putput buffers
let outPixelSignalIntensityBuffer = device.makeBuffer(bytes: wPointsSignalIntensity, length: (MemoryLayout<Float32>.stride * numberOfResults), options: [])
let outPixelDistanceBuffer = device.makeBuffer(bytes: wPointsDistance, length: (MemoryLayout<Float32>.stride * numberOfResults), options: [])
let commandBuffer = (mtlCommmandQueue?.makeCommandBuffer())!
let computeCommandEncoder = (commandBuffer.makeComputeCommandEncoder())!
computeCommandEncoder.setComputePipelineState(mtlComputePipelineFilter!)
//set input buffers
computeCommandEncoder.setBuffer(inWPointSignalValues, offset: 0, index: 0)
computeCommandEncoder.setBuffer(inWPointXCoordBuffer, offset: 0, index: 1)
computeCommandEncoder.setBuffer(inWPointYCoordBuffer, offset: 0, index: 2)
//set output buffers
computeCommandEncoder.setBuffer(outPixelSignalIntensityBuffer, offset: 0, index: 3)
computeCommandEncoder.setBuffer(outPixelDistanceBuffer, offset: 0, index: 4)
//set constants
computeCommandEncoder.setBytes(&numberOfWPoints, length: MemoryLayout<Int32>.stride, index: 5)
computeCommandEncoder.setBytes(&int32Width, length: MemoryLayout<Int32>.stride, index: 6)
computeCommandEncoder.setBytes(&int32Height, length: MemoryLayout<Int32>.stride, index: 7)
let threadsPerGroup = MTLSize(width:2,height:2,depth:2)
let numThreadgroups = MTLSize(width:2, height:2, depth:2)
computeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
let endBufferAllocation = mach_absolute_time()
print("time for creating and setting buffert: time: \(Double(endBufferAllocation - start) / Double(NSEC_PER_SEC))")
computeCommandEncoder.endEncoding()
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
let allComplete = mach_absolute_time()
self.signalDistance = (outPixelDistanceBuffer?.contents())!
self.signalDensity = (outPixelSignalIntensityBuffer?.contents())!
我有这个问题很久了,程序断断续续地崩溃了。事实证明,我正在访问内核中没有被缓冲区分配的内存。在内核中,我做了一个for循环0..Hmm。您使用的缓冲区超出了需要。您可以将输入和制服合并到结构中。此外,在这种情况下,const device one可以是常量。在任何情况下,都会导致所有计算线程执行相同的工作。他们很可能是步调一致。因此,所有线程都同时执行pixelDistance[pixelIndex]的增量,这至少会产生未定义的结果。GPU加速的主要好处是并行性,使线程独立处理问题。如果您根据线程ID分割工作,您将获得更好的性能,还可能避免错误