Go 整数双线性插值优化_Go_Optimization_Integer_Bilinear Interpolation

Go 整数双线性插值优化

go optimization

Go 整数双线性插值优化,go,optimization,integer,bilinear-interpolation,Go,Optimization,Integer,Bilinear Interpolation,我的代码受到双线性插值的限制，所以我编写了一个不使用浮点数学的版本（ScaleBlerpI）。这已经快了1.5倍1.85倍，但我想知道如何使它更快如有任何提示，我们将不胜感激 func ScaleBlerpI（src，dst*ValueFieldI）{ mx:=uint64（（src.Width-1）*math.maxint32/dst.Width） my:=uint64（（src.Height-1）*math.MaxUint32/dst.Height）对于y:=uint64（0）；y32/

我的代码受到双线性插值的限制，所以我编写了一个不使用浮点数学的版本（

ScaleBlerpI

）。这已经快了1.5倍1.85倍，但我想知道如何使它更快

如有任何提示，我们将不胜感激

func ScaleBlerpI（src，dst*ValueFieldI）{
mx:=uint64（（src.Width-1）*math.maxint32/dst.Width）
my:=uint64（（src.Height-1）*math.MaxUint32/dst.Height）
对于y:=uint64（0）；y>32//eq./math.MaxUint32
tx:=（x*mx）和math.MaxUint32//eq.%（math.MaxUint32+1）或%2^32
gy:=（y*my）>>32
ty:=（y*my）和math.MaxUint32
srcX，srcY:=int（gx），int（gy）
rgba00:=src.GetComponent（srcX，srcY）
rgba10:=src.GetComponent（srcX+1，srcY）
rgba01:=src.GetComponent（srcX，srcY+1）
rgba11:=src.GetComponent（srcX+1，srcY+1）
结果：=[]uint32{
blerpI（rgba00[0]，rgba10[0]，rgba01[0]，rgba11[0]，tx，ty），
blerpI（rgba00[1]，rgba10[1]，rgba01[1]，rgba11[1]，tx，ty），
blerpI（rgba00[2]，rgba10[2]，rgba01[2]，rgba11[2]，tx，ty），
}
dst.SetComponent（整数（x）、整数（y）、结果）
}
}
}
func lerpI（s，e uint32，f uint64）uint32{
//基本上是s*（1-f）+b*f
返回uint32(
（uint64（s）*（数学最大值32-f）+uint64（e）*f）/
数学（maxint32）
}
功能blerpI（c00、c10、c01、c11 uint32、tx、ty uint64）uint32{
返回lerpI(
lerpI（c00，c10，tx），
lerpI（c01，c11，tx），
泰，
)
}

类型ValueFieldI结构{
宽度，高度int
组件大小int
值[]uint32
}
func（vf*ValueFieldI）GetComponent（x，y int）[]uint32{
组件dx:=x+y*vf.Width
返回vf.值[ComponentDX*vf.ComponentSize:ComponentDX*vf.ComponentSize+vf.ComponentSize]
}
func（vf*ValueFieldI）SetComponent（x，y int，c[]uint32）{
复制（vf.GetComponent（x，y），c）
}

分析表明，在

blerpI

、

src.GetComponent

和

dst.SetComponent

上损失的时间最多

编辑1 取代

//基本上是s*（1-f）+e*f
返回uint32(
（uint64（s）*（数学最大值32-f）+uint64（e）*f）/
数学（maxint32）

与

//基本上是s+f*（e-s）
返回s+uint32（（f*（uint64（e）-uint64（s））>>32）

整数版本现在快了1.85倍

编辑2 基准：

func BenchmarkBlerpIRand(b *testing.B) {
    src := &ValueFieldI{
        Width:         37,
        Height:        37,
        ComponentSize: 3,
        Values:        make([]uint32, 37*37*3),
    }

    for i := range src.Values {
        src.Values[i] = rand.Uint32()
    }

    dst := &ValueFieldI{
        Width:         37 * 8,
        Height:        37 * 8,
        ComponentSize: 3,
        Values:        make([]uint32, 37*8*37*8*3),
    }

    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        ScaleBlerpI(src, dst)
    }
}

func BenchmarkBlerpIRand（b*testing.b）{
src:=&ValueFieldI{
宽度：37，
身高：37，
组件大小：3，
值：make（[]uint32，37*37*3），
}
对于i:=范围src.值{
src.Values[i]=rand.Uint32（）
}
dst:=&ValueFieldI{
宽度：37*8，
身高：37*8，
组件大小：3，
值：make（[]uint32，37*8*37*8*3），
}
b、 重置计时器（）
对于i:=0；i

你能提供你正在使用的测试数据和基准函数吗？@HymnsForDisco我已经添加了基准你可以加快速度如果你去掉嵌套的lerpI调用，复合方程可以简化，这可能会有所帮助，但我自己对定点算法不够熟悉，无法解决这个问题。：/（注意：编译器内联

blerpI

call）我最终得到了一个x4.8go和一个x10c SIMD版本。