如何规范iOS中的视差数据？_Ios_Swift_Depth

如何规范iOS中的视差数据？

ios swift

如何规范iOS中的视差数据？,ios,swift,depth,Ios,Swift,Depth,在WWDC会议“深度图像编辑”中，他们提到了几次normalizedDisparity和normalizedDisparityImage： “基本的想法是，我们将绘制我们的标准化差异图将值转换为介于0和1之间的值” “因此，一旦知道了最小值和最大值，就可以将深度或视差标准化为0和1之间的值。” 我试着先得到这样的不平等图像： let disparityImage = depthImage.applyingFilter( "CIDepthToDisparity", withInputPa

在WWDC会议“深度图像编辑”中，他们提到了几次

normalizedDisparity

和

normalizedDisparityImage

：

“基本的想法是，我们将绘制我们的标准化差异图将值转换为介于0和1之间的值”

“因此，一旦知道了最小值和最大值，就可以将深度或视差标准化为0和1之间的值。”

我试着先得到这样的不平等图像：

let disparityImage = depthImage.applyingFilter(
    "CIDepthToDisparity", withInputParameters: nil)

然后我试着去做标准化，但没有成功。我在正确的轨道上吗？如果您能给我一些建议，我将不胜感激

编辑：

这是我的测试代码，抱歉质量问题。我得到

min

和

max

，然后我尝试循环数据以对其进行规范化（

让normalizedPoint=（point-min）/（max-min）

）

raywenderlich.com上有一篇名为“”的新博文，其中包含示例应用程序和与深度相关的详细信息。示例代码显示了如何使用

CVPixelBuffer

扩展规范化深度数据：

extension CVPixelBuffer {

  func normalize() {

    let width = CVPixelBufferGetWidth(self)
    let height = CVPixelBufferGetHeight(self)

    CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
    let floatBuffer = unsafeBitCast(CVPixelBufferGetBaseAddress(self), to: UnsafeMutablePointer<Float>.self)

    var minPixel: Float = 1.0
    var maxPixel: Float = 0.0

    for y in 0 ..< height {
      for x in 0 ..< width {
        let pixel = floatBuffer[y * width + x]
        minPixel = min(pixel, minPixel)
        maxPixel = max(pixel, maxPixel)
      }
    }

    let range = maxPixel - minPixel

    for y in 0 ..< height {
      for x in 0 ..< width {
        let pixel = floatBuffer[y * width + x]
        floatBuffer[y * width + x] = (pixel - minPixel) / range
      }
    }

    CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
  }
}

扩展CVPixelBuffer{ func正规化（）{ let width=CVPixelBufferGetWidth（自） let height=CVPixelBufferGetHeight（自） CVPixelBufferLockBaseAddress（自身，CVPixelBufferLockFlags（原始值：0））让floatBuffer=unsafeBitCast（CVPixelBufferGetBaseAddress（self），to:UnsafeMutablePointer.self） var minPixel:Float=1.0 var maxPixel:Float=0.0 对于y，0..<高度{ 对于0中的x..<宽度{ 设像素=浮动缓冲区[y*宽度+x] 最小像素=最小（像素，最小像素） maxPixel=max（像素，maxPixel） } } 让范围=最大像素-最小像素对于y，0..<高度{ 对于0中的x..<宽度{ 设像素=浮动缓冲区[y*宽度+x] 浮动缓冲区[y*宽度+x]=（像素-最小像素）/范围 } } CVPixelBufferUnlockBaseAddress（self，CVPixelBufferLockFlags（rawValue:0）） } }

在处理深度数据时要记住，深度数据的分辨率低于实际图像，因此需要放大（更多信息请参见博客和中的）

尝试使用加速框架vDSP向量函数。。下面是两个函数中的规格化

将cvPixel缓冲区更改为0..1标准化范围

myCVPixelBuffer.setUpNormalize()


import Accelerate

extension CVPixelBuffer {
    func vectorNormalize( targetVector: UnsafeMutableBufferPointer<Float>) -> [Float] {
        // range = max - min
        // normalized to 0..1 is (pixel - minPixel) / range

        // see Documentation "Using vDSP for Vector-based Arithmetic" in vDSP under system "Accelerate" documentation

        // see also the Accelerate documentation section 'Vector extrema calculation'
        // Maximium static func maximum<U>(U) -> Float
        //      Returns the maximum element of a single-precision vector.

        //static func minimum<U>(U) -> Float
        //      Returns the minimum element of a single-precision vector.


        let maxValue = vDSP.maximum(targetVector)
        let minValue = vDSP.minimum(targetVector)

        let range = maxValue - minValue
        let negMinValue = -minValue

        let subtractVector = vDSP.add(negMinValue, targetVector)
            // adding negative value is subtracting
        let result = vDSP.divide(subtractVector, range)

        return result
    }

    func setUpNormalize() -> CVPixelBuffer {
        // grayscale buffer float32 ie Float
        // return normalized CVPixelBuffer

        CVPixelBufferLockBaseAddress(self,
                                     CVPixelBufferLockFlags(rawValue: 0))
        let width = CVPixelBufferGetWidthOfPlane(self, 0)
        let height = CVPixelBufferGetHeightOfPlane(self, 0)
        let count = width * height

        let bufferBaseAddress = CVPixelBufferGetBaseAddressOfPlane(self, 0)
            // UnsafeMutableRawPointer

        let pixelBufferBase  = unsafeBitCast(bufferBaseAddress, to: UnsafeMutablePointer<Float>.self)

        let depthCopy  =   UnsafeMutablePointer<Float>.allocate(capacity: count)
        depthCopy.initialize(from: pixelBufferBase, count: count)
        let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: depthCopy, count: count)

        let normalizedDisparity = vectorNormalize(targetVector: depthCopyBuffer)

        pixelBufferBase.initialize(from: normalizedDisparity, count: count)
            // copy back the normalized map into the CVPixelBuffer

        depthCopy.deallocate()
//        depthCopyBuffer.deallocate()

        CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))

        return self

    }

}

上面威尔的回答很好，但可以改进如下。我把它和一张照片的深度数据一起使用，如果深度数据不在16位之后，就像上面提到的，它就可能不起作用。我还没找到这样的照片。我很惊讶在核心图像中没有一个过滤器来处理这个问题

extension CVPixelBuffer {

func normalize() {
    CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
    
    let width = CVPixelBufferGetWidthOfPlane(self, 0)
    let height = CVPixelBufferGetHeightOfPlane(self, 0)
    let count = width * height

    let pixelBufferBase = unsafeBitCast(CVPixelBufferGetBaseAddressOfPlane(self, 0), to: UnsafeMutablePointer<Float>.self)
    let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: pixelBufferBase, count: count)

    let maxValue = vDSP.maximum(depthCopyBuffer)
    let minValue = vDSP.minimum(depthCopyBuffer)
    let range = maxValue - minValue
    let negMinValue = -minValue

    let subtractVector = vDSP.add(negMinValue, depthCopyBuffer)
    let normalizedDisparity = vDSP.divide(subtractVector, range)
    pixelBufferBase.initialize(from: normalizedDisparity, count: count)

    CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
}

扩展CVPixelBuffer{ func正规化（）{ CVPixelBufferLockBaseAddress（自身，CVPixelBufferLockFlags（原始值：0）） let width=CVPixelBufferGetWidthOfPlane（self，0） let height=CVPixelBufferGetHeightof平面（自，0）让计数=宽度*高度设pixelBufferBase=unsafeBitCast（CVPixelBufferGetBaseAddressOfPlane（self，0），to:UnsafemeutablePointer.self）让depthCopyBuffer=UnsafemeutableBufferPointer（开始：pixelBufferBase，计数：计数）设maxValue=vDSP.maximum（depthCopyBuffer）设minValue=vDSP.minimum（depthCopyBuffer）让范围=最大值-最小值设negMinValue=-minValue 让subtractVector=vDSP.add（negMinValue，depthCopyBuffer）让normalizedDisparity=vDSP.divide（减法向量，范围） pixelBufferBase.initialize（起始：normalizedDisparity，计数：计数） CVPixelBufferUnlockBaseAddress（self，CVPixelBufferLockFlags（rawValue:0）） }

}

“它不起作用”。。。你的代码是什么，你期望什么，你得到了什么？@jcaron请看一看我添加了一些测试代码。嘿，吉米，也在深度数据中挖掘。你知道如何将实际背景和前景图像作为UIImages获取吗？嘿@RoiMulia，使用深度数据，你可以创建一个遮罩并区分前景/背景。WWDC“深度图像编辑”课程有很多相关信息，我现在可能已经看了100遍了。嘿@Jimmy，我尝试了几天来完成这项任务，但都没有成功。我知道你可能非常忙，但是如果你以前做过，你能分享代码吗？如果需要的话，我愿意为它付费，这让我很沮丧，因为我花了这么多天时间在它上面。我过去已经成功地使用过它，但突然我发现它在中途失败了，抱怨它无法访问浮点数组中的数据。好的，请记住，此函数不考虑像素缓冲区的像素格式。如果你的组件不是每像素16位，这是行不通的。我有一个32位的单组件缓冲区。我还建议您使用

Float.greatestFiniteMagitide

作为初始最小大小，以防您的所有值都大于1，并且出于类似原因，此值的负值作为初始最大值。

myCVPixelBuffer.setUpNormalize()


import Accelerate

extension CVPixelBuffer {
    func vectorNormalize( targetVector: UnsafeMutableBufferPointer<Float>) -> [Float] {
        // range = max - min
        // normalized to 0..1 is (pixel - minPixel) / range

        // see Documentation "Using vDSP for Vector-based Arithmetic" in vDSP under system "Accelerate" documentation

        // see also the Accelerate documentation section 'Vector extrema calculation'
        // Maximium static func maximum<U>(U) -> Float
        //      Returns the maximum element of a single-precision vector.

        //static func minimum<U>(U) -> Float
        //      Returns the minimum element of a single-precision vector.


        let maxValue = vDSP.maximum(targetVector)
        let minValue = vDSP.minimum(targetVector)

        let range = maxValue - minValue
        let negMinValue = -minValue

        let subtractVector = vDSP.add(negMinValue, targetVector)
            // adding negative value is subtracting
        let result = vDSP.divide(subtractVector, range)

        return result
    }

    func setUpNormalize() -> CVPixelBuffer {
        // grayscale buffer float32 ie Float
        // return normalized CVPixelBuffer

        CVPixelBufferLockBaseAddress(self,
                                     CVPixelBufferLockFlags(rawValue: 0))
        let width = CVPixelBufferGetWidthOfPlane(self, 0)
        let height = CVPixelBufferGetHeightOfPlane(self, 0)
        let count = width * height

        let bufferBaseAddress = CVPixelBufferGetBaseAddressOfPlane(self, 0)
            // UnsafeMutableRawPointer

        let pixelBufferBase  = unsafeBitCast(bufferBaseAddress, to: UnsafeMutablePointer<Float>.self)

        let depthCopy  =   UnsafeMutablePointer<Float>.allocate(capacity: count)
        depthCopy.initialize(from: pixelBufferBase, count: count)
        let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: depthCopy, count: count)

        let normalizedDisparity = vectorNormalize(targetVector: depthCopyBuffer)

        pixelBufferBase.initialize(from: normalizedDisparity, count: count)
            // copy back the normalized map into the CVPixelBuffer

        depthCopy.deallocate()
//        depthCopyBuffer.deallocate()

        CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))

        return self

    }

}

https://github.com/racewalkWill/PhotoBrowseModified

extension CVPixelBuffer {

func normalize() {
    CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
    
    let width = CVPixelBufferGetWidthOfPlane(self, 0)
    let height = CVPixelBufferGetHeightOfPlane(self, 0)
    let count = width * height

    let pixelBufferBase = unsafeBitCast(CVPixelBufferGetBaseAddressOfPlane(self, 0), to: UnsafeMutablePointer<Float>.self)
    let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: pixelBufferBase, count: count)

    let maxValue = vDSP.maximum(depthCopyBuffer)
    let minValue = vDSP.minimum(depthCopyBuffer)
    let range = maxValue - minValue
    let negMinValue = -minValue

    let subtractVector = vDSP.add(negMinValue, depthCopyBuffer)
    let normalizedDisparity = vDSP.divide(subtractVector, range)
    pixelBufferBase.initialize(from: normalizedDisparity, count: count)

    CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
}