如何规范iOS中的视差数据?
在WWDC会议“深度图像编辑”中,他们提到了几次如何规范iOS中的视差数据?,ios,swift,depth,Ios,Swift,Depth,在WWDC会议“深度图像编辑”中,他们提到了几次normalizedDisparity和normalizedDisparityImage: “基本的想法是,我们将绘制我们的标准化差异图 将值转换为介于0和1之间的值” “因此,一旦知道了最小值和最大值,就可以将深度或视差标准化为0和1之间的值。” 我试着先得到这样的不平等图像: let disparityImage = depthImage.applyingFilter( "CIDepthToDisparity", withInputPa
normalizedDisparity
和normalizedDisparityImage
:
“基本的想法是,我们将绘制我们的标准化差异图
将值转换为介于0和1之间的值”
“因此,一旦知道了最小值和最大值,就可以将深度或视差标准化为0和1之间的值。”
我试着先得到这样的不平等图像:
let disparityImage = depthImage.applyingFilter(
"CIDepthToDisparity", withInputParameters: nil)
然后我试着去做标准化,但没有成功。我在正确的轨道上吗?如果您能给我一些建议,我将不胜感激
编辑:
这是我的测试代码,抱歉质量问题。我得到min
和max
,然后我尝试循环数据以对其进行规范化(让normalizedPoint=(point-min)/(max-min)
)
raywenderlich.com上有一篇名为“”的新博文,其中包含示例应用程序和与深度相关的详细信息。示例代码显示了如何使用
CVPixelBuffer
扩展规范化深度数据:
extension CVPixelBuffer {
func normalize() {
let width = CVPixelBufferGetWidth(self)
let height = CVPixelBufferGetHeight(self)
CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
let floatBuffer = unsafeBitCast(CVPixelBufferGetBaseAddress(self), to: UnsafeMutablePointer<Float>.self)
var minPixel: Float = 1.0
var maxPixel: Float = 0.0
for y in 0 ..< height {
for x in 0 ..< width {
let pixel = floatBuffer[y * width + x]
minPixel = min(pixel, minPixel)
maxPixel = max(pixel, maxPixel)
}
}
let range = maxPixel - minPixel
for y in 0 ..< height {
for x in 0 ..< width {
let pixel = floatBuffer[y * width + x]
floatBuffer[y * width + x] = (pixel - minPixel) / range
}
}
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
}
}
扩展CVPixelBuffer{
func正规化(){
let width=CVPixelBufferGetWidth(自)
let height=CVPixelBufferGetHeight(自)
CVPixelBufferLockBaseAddress(自身,CVPixelBufferLockFlags(原始值:0))
让floatBuffer=unsafeBitCast(CVPixelBufferGetBaseAddress(self),to:UnsafeMutablePointer.self)
var minPixel:Float=1.0
var maxPixel:Float=0.0
对于y,0..<高度{
对于0中的x..<宽度{
设像素=浮动缓冲区[y*宽度+x]
最小像素=最小(像素,最小像素)
maxPixel=max(像素,maxPixel)
}
}
让范围=最大像素-最小像素
对于y,0..<高度{
对于0中的x..<宽度{
设像素=浮动缓冲区[y*宽度+x]
浮动缓冲区[y*宽度+x]=(像素-最小像素)/范围
}
}
CVPixelBufferUnlockBaseAddress(self,CVPixelBufferLockFlags(rawValue:0))
}
}
在处理深度数据时要记住,深度数据的分辨率低于实际图像,因此需要放大(更多信息请参见博客和中的)尝试使用加速框架vDSP向量函数。。下面是两个函数中的规格化 将cvPixel缓冲区更改为0..1标准化范围
myCVPixelBuffer.setUpNormalize()
import Accelerate
extension CVPixelBuffer {
func vectorNormalize( targetVector: UnsafeMutableBufferPointer<Float>) -> [Float] {
// range = max - min
// normalized to 0..1 is (pixel - minPixel) / range
// see Documentation "Using vDSP for Vector-based Arithmetic" in vDSP under system "Accelerate" documentation
// see also the Accelerate documentation section 'Vector extrema calculation'
// Maximium static func maximum<U>(U) -> Float
// Returns the maximum element of a single-precision vector.
//static func minimum<U>(U) -> Float
// Returns the minimum element of a single-precision vector.
let maxValue = vDSP.maximum(targetVector)
let minValue = vDSP.minimum(targetVector)
let range = maxValue - minValue
let negMinValue = -minValue
let subtractVector = vDSP.add(negMinValue, targetVector)
// adding negative value is subtracting
let result = vDSP.divide(subtractVector, range)
return result
}
func setUpNormalize() -> CVPixelBuffer {
// grayscale buffer float32 ie Float
// return normalized CVPixelBuffer
CVPixelBufferLockBaseAddress(self,
CVPixelBufferLockFlags(rawValue: 0))
let width = CVPixelBufferGetWidthOfPlane(self, 0)
let height = CVPixelBufferGetHeightOfPlane(self, 0)
let count = width * height
let bufferBaseAddress = CVPixelBufferGetBaseAddressOfPlane(self, 0)
// UnsafeMutableRawPointer
let pixelBufferBase = unsafeBitCast(bufferBaseAddress, to: UnsafeMutablePointer<Float>.self)
let depthCopy = UnsafeMutablePointer<Float>.allocate(capacity: count)
depthCopy.initialize(from: pixelBufferBase, count: count)
let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: depthCopy, count: count)
let normalizedDisparity = vectorNormalize(targetVector: depthCopyBuffer)
pixelBufferBase.initialize(from: normalizedDisparity, count: count)
// copy back the normalized map into the CVPixelBuffer
depthCopy.deallocate()
// depthCopyBuffer.deallocate()
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
return self
}
}
上面威尔的回答很好,但可以改进如下。我把它和一张照片的深度数据一起使用,如果深度数据不在16位之后,就像上面提到的,它就可能不起作用。我还没找到这样的照片。我很惊讶在核心图像中没有一个过滤器来处理这个问题
extension CVPixelBuffer {
func normalize() {
CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
let width = CVPixelBufferGetWidthOfPlane(self, 0)
let height = CVPixelBufferGetHeightOfPlane(self, 0)
let count = width * height
let pixelBufferBase = unsafeBitCast(CVPixelBufferGetBaseAddressOfPlane(self, 0), to: UnsafeMutablePointer<Float>.self)
let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: pixelBufferBase, count: count)
let maxValue = vDSP.maximum(depthCopyBuffer)
let minValue = vDSP.minimum(depthCopyBuffer)
let range = maxValue - minValue
let negMinValue = -minValue
let subtractVector = vDSP.add(negMinValue, depthCopyBuffer)
let normalizedDisparity = vDSP.divide(subtractVector, range)
pixelBufferBase.initialize(from: normalizedDisparity, count: count)
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
}
扩展CVPixelBuffer{
func正规化(){
CVPixelBufferLockBaseAddress(自身,CVPixelBufferLockFlags(原始值:0))
let width=CVPixelBufferGetWidthOfPlane(self,0)
let height=CVPixelBufferGetHeightof平面(自,0)
让计数=宽度*高度
设pixelBufferBase=unsafeBitCast(CVPixelBufferGetBaseAddressOfPlane(self,0),to:UnsafemeutablePointer.self)
让depthCopyBuffer=UnsafemeutableBufferPointer(开始:pixelBufferBase,计数:计数)
设maxValue=vDSP.maximum(depthCopyBuffer)
设minValue=vDSP.minimum(depthCopyBuffer)
让范围=最大值-最小值
设negMinValue=-minValue
让subtractVector=vDSP.add(negMinValue,depthCopyBuffer)
让normalizedDisparity=vDSP.divide(减法向量,范围)
pixelBufferBase.initialize(起始:normalizedDisparity,计数:计数)
CVPixelBufferUnlockBaseAddress(self,CVPixelBufferLockFlags(rawValue:0))
}
}“它不起作用”。。。你的代码是什么,你期望什么,你得到了什么?@jcaron请看一看我添加了一些测试代码。嘿,吉米,也在深度数据中挖掘。你知道如何将实际背景和前景图像作为UIImages获取吗?嘿@RoiMulia,使用深度数据,你可以创建一个遮罩并区分前景/背景。WWDC“深度图像编辑”课程有很多相关信息,我现在可能已经看了100遍了。嘿@Jimmy,我尝试了几天来完成这项任务,但都没有成功。我知道你可能非常忙,但是如果你以前做过,你能分享代码吗?如果需要的话,我愿意为它付费,这让我很沮丧,因为我花了这么多天时间在它上面。我过去已经成功地使用过它,但突然我发现它在中途失败了,抱怨它无法访问浮点数组中的数据。好的,请记住,此函数不考虑像素缓冲区的像素格式。如果你的组件不是每像素16位,这是行不通的。我有一个32位的单组件缓冲区。我还建议您使用
Float.greatestFiniteMagitide
作为初始最小大小,以防您的所有值都大于1,并且出于类似原因,此值的负值作为初始最大值。
myCVPixelBuffer.setUpNormalize()
import Accelerate
extension CVPixelBuffer {
func vectorNormalize( targetVector: UnsafeMutableBufferPointer<Float>) -> [Float] {
// range = max - min
// normalized to 0..1 is (pixel - minPixel) / range
// see Documentation "Using vDSP for Vector-based Arithmetic" in vDSP under system "Accelerate" documentation
// see also the Accelerate documentation section 'Vector extrema calculation'
// Maximium static func maximum<U>(U) -> Float
// Returns the maximum element of a single-precision vector.
//static func minimum<U>(U) -> Float
// Returns the minimum element of a single-precision vector.
let maxValue = vDSP.maximum(targetVector)
let minValue = vDSP.minimum(targetVector)
let range = maxValue - minValue
let negMinValue = -minValue
let subtractVector = vDSP.add(negMinValue, targetVector)
// adding negative value is subtracting
let result = vDSP.divide(subtractVector, range)
return result
}
func setUpNormalize() -> CVPixelBuffer {
// grayscale buffer float32 ie Float
// return normalized CVPixelBuffer
CVPixelBufferLockBaseAddress(self,
CVPixelBufferLockFlags(rawValue: 0))
let width = CVPixelBufferGetWidthOfPlane(self, 0)
let height = CVPixelBufferGetHeightOfPlane(self, 0)
let count = width * height
let bufferBaseAddress = CVPixelBufferGetBaseAddressOfPlane(self, 0)
// UnsafeMutableRawPointer
let pixelBufferBase = unsafeBitCast(bufferBaseAddress, to: UnsafeMutablePointer<Float>.self)
let depthCopy = UnsafeMutablePointer<Float>.allocate(capacity: count)
depthCopy.initialize(from: pixelBufferBase, count: count)
let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: depthCopy, count: count)
let normalizedDisparity = vectorNormalize(targetVector: depthCopyBuffer)
pixelBufferBase.initialize(from: normalizedDisparity, count: count)
// copy back the normalized map into the CVPixelBuffer
depthCopy.deallocate()
// depthCopyBuffer.deallocate()
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
return self
}
}
https://github.com/racewalkWill/PhotoBrowseModified
extension CVPixelBuffer {
func normalize() {
CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
let width = CVPixelBufferGetWidthOfPlane(self, 0)
let height = CVPixelBufferGetHeightOfPlane(self, 0)
let count = width * height
let pixelBufferBase = unsafeBitCast(CVPixelBufferGetBaseAddressOfPlane(self, 0), to: UnsafeMutablePointer<Float>.self)
let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: pixelBufferBase, count: count)
let maxValue = vDSP.maximum(depthCopyBuffer)
let minValue = vDSP.minimum(depthCopyBuffer)
let range = maxValue - minValue
let negMinValue = -minValue
let subtractVector = vDSP.add(negMinValue, depthCopyBuffer)
let normalizedDisparity = vDSP.divide(subtractVector, range)
pixelBufferBase.initialize(from: normalizedDisparity, count: count)
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
}