Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/algorithm/10.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 3.x Go strings.Contains()比Python3慢2倍?_Python 3.x_Algorithm_Performance_Go_Yara - Fatal编程技术网

Python 3.x Go strings.Contains()比Python3慢2倍?

Python 3.x Go strings.Contains()比Python3慢2倍?,python-3.x,algorithm,performance,go,yara,Python 3.x,Algorithm,Performance,Go,Yara,我正在将一个文本模式扫描仪从Python3转换为Go1.10,但我很惊讶它实际上慢了两倍。分析后,罪魁祸首出现在strings.Contains()中。请参见下面的简单基准测试。我错过什么了吗?你能推荐一种更快的Go模式搜索算法,在这种情况下,它的性能会更好吗?我不担心启动时间,同样的模式将用于扫描数百万个文件 Py3基准: import time import re RUNS = 10000 if __name__ == '__main__': with open('data.ph

我正在将一个文本模式扫描仪从Python3转换为Go1.10,但我很惊讶它实际上慢了两倍。分析后,罪魁祸首出现在
strings.Contains()
中。请参见下面的简单基准测试。我错过什么了吗?你能推荐一种更快的Go模式搜索算法,在这种情况下,它的性能会更好吗?我不担心启动时间,同样的模式将用于扫描数百万个文件

Py3基准:

import time
import re

RUNS = 10000

if __name__ == '__main__':
    with open('data.php') as fh:
        testString = fh.read()

    def do():
        return "576ad4f370014dfb1d0f17b0e6855f22" in testString

    start = time.time()
    for i in range(RUNS):
        _ = do()
    duration = time.time() - start
    print("Python: %.2fs" % duration)
package main

import (
    "fmt"
    "io/ioutil"
    "log"
    "strings"
    "time"
)

const (
    runs = 10000
)

func main() {
    fname := "data.php"
    testdata := readFile(fname)
    needle := "576ad4f370014dfb1d0f17b0e6855f22"
    start := time.Now()

    for i := 0; i < runs; i++ {
        _ = strings.Contains(testdata, needle)

    }
    duration := time.Now().Sub(start)
    fmt.Printf("Go: %.2fs\n", duration.Seconds())
}

func readFile(fname string) string {
    data, err := ioutil.ReadFile(fname)
    if err != nil {
        log.Fatal(err)
    }
    return string(data)
}
Go1.10基准:

import time
import re

RUNS = 10000

if __name__ == '__main__':
    with open('data.php') as fh:
        testString = fh.read()

    def do():
        return "576ad4f370014dfb1d0f17b0e6855f22" in testString

    start = time.time()
    for i in range(RUNS):
        _ = do()
    duration = time.time() - start
    print("Python: %.2fs" % duration)
package main

import (
    "fmt"
    "io/ioutil"
    "log"
    "strings"
    "time"
)

const (
    runs = 10000
)

func main() {
    fname := "data.php"
    testdata := readFile(fname)
    needle := "576ad4f370014dfb1d0f17b0e6855f22"
    start := time.Now()

    for i := 0; i < runs; i++ {
        _ = strings.Contains(testdata, needle)

    }
    duration := time.Now().Sub(start)
    fmt.Printf("Go: %.2fs\n", duration.Seconds())
}

func readFile(fname string) string {
    data, err := ioutil.ReadFile(fname)
    if err != nil {
        log.Fatal(err)
    }
    return string(data)
}
为什么Python3(24.79s)比Go(5.47s)慢4.5倍?你得到了什么结果

Python

$ cat contains.py
import time
import re

RUNS = 10000

if __name__ == '__main__':
    # The Complete Works of William Shakespeare by William Shakespeare
    # http://www.gutenberg.org/files/100/100-0.txt
    file = '/home/peter/shakespeare.100-0.txt' # 'data.php'
    with open(file) as fh:
        testString = fh.read()

    def do():
        return "Means to immure herself and not be seen." in testString

    start = time.time()
    for i in range(RUNS):
        _ = do()
    duration = time.time() - start
    print("Python: %.2fs" % duration)
    print(do())
$ python3 --version
Python 3.6.5
$ python3 contains.py
Python: 24.79s
True
$ 
$ cat contains.go
package main

import (
    "fmt"
    "io/ioutil"
    "log"
    "strings"
    "time"
)

const (
    runs = 10000
)

func main() {
    // The Complete Works of William Shakespeare by William Shakespeare
    // http://www.gutenberg.org/files/100/100-0.txt
    fname := `/home/peter/shakespeare.100-0.txt` // "data.php"
    testdata := readFile(fname)
    needle := "Means to immure herself and not be seen."
    start := time.Now()

    for i := 0; i < runs; i++ {
        _ = strings.Contains(testdata, needle)

    }
    duration := time.Now().Sub(start)
    fmt.Printf("Go: %.2fs\n", duration.Seconds())

    fmt.Println(strings.Contains(testdata, needle))
    fmt.Println(strings.Index(testdata, needle))

}

func readFile(fname string) string {
    data, err := ioutil.ReadFile(fname)
    if err != nil {
        log.Fatal(err)
    }
    return string(data)
}
$ go version
go version devel +5332b5e75a Tue Jul 31 15:44:37 2018 +0000 linux/amd64
$ go run contains.go
Go: 5.47s
true
5837178
$ 
开始

$ cat contains.py
import time
import re

RUNS = 10000

if __name__ == '__main__':
    # The Complete Works of William Shakespeare by William Shakespeare
    # http://www.gutenberg.org/files/100/100-0.txt
    file = '/home/peter/shakespeare.100-0.txt' # 'data.php'
    with open(file) as fh:
        testString = fh.read()

    def do():
        return "Means to immure herself and not be seen." in testString

    start = time.time()
    for i in range(RUNS):
        _ = do()
    duration = time.time() - start
    print("Python: %.2fs" % duration)
    print(do())
$ python3 --version
Python 3.6.5
$ python3 contains.py
Python: 24.79s
True
$ 
$ cat contains.go
package main

import (
    "fmt"
    "io/ioutil"
    "log"
    "strings"
    "time"
)

const (
    runs = 10000
)

func main() {
    // The Complete Works of William Shakespeare by William Shakespeare
    // http://www.gutenberg.org/files/100/100-0.txt
    fname := `/home/peter/shakespeare.100-0.txt` // "data.php"
    testdata := readFile(fname)
    needle := "Means to immure herself and not be seen."
    start := time.Now()

    for i := 0; i < runs; i++ {
        _ = strings.Contains(testdata, needle)

    }
    duration := time.Now().Sub(start)
    fmt.Printf("Go: %.2fs\n", duration.Seconds())

    fmt.Println(strings.Contains(testdata, needle))
    fmt.Println(strings.Index(testdata, needle))

}

func readFile(fname string) string {
    data, err := ioutil.ReadFile(fname)
    if err != nil {
        log.Fatal(err)
    }
    return string(data)
}
$ go version
go version devel +5332b5e75a Tue Jul 31 15:44:37 2018 +0000 linux/amd64
$ go run contains.go
Go: 5.47s
true
5837178
$ 
$cat contains.go
包干管
进口(
“fmt”
“io/ioutil”
“日志”
“字符串”
“时间”
)
常数(
运行次数=10000次
)
func main(){
//威廉·莎士比亚全集
// http://www.gutenberg.org/files/100/100-0.txt
fname:=`/home/peter/shakespeare.100-0.txt`/“data.php”
testdata:=读取文件(fname)
针头:=“意味着使自己免疫而不被看见。”
开始:=时间。现在()
对于i:=0;i
为什么Python 3(24.79s)比Go(5.47s)慢4.5倍?你得到了什么结果

Python

$ cat contains.py
import time
import re

RUNS = 10000

if __name__ == '__main__':
    # The Complete Works of William Shakespeare by William Shakespeare
    # http://www.gutenberg.org/files/100/100-0.txt
    file = '/home/peter/shakespeare.100-0.txt' # 'data.php'
    with open(file) as fh:
        testString = fh.read()

    def do():
        return "Means to immure herself and not be seen." in testString

    start = time.time()
    for i in range(RUNS):
        _ = do()
    duration = time.time() - start
    print("Python: %.2fs" % duration)
    print(do())
$ python3 --version
Python 3.6.5
$ python3 contains.py
Python: 24.79s
True
$ 
$ cat contains.go
package main

import (
    "fmt"
    "io/ioutil"
    "log"
    "strings"
    "time"
)

const (
    runs = 10000
)

func main() {
    // The Complete Works of William Shakespeare by William Shakespeare
    // http://www.gutenberg.org/files/100/100-0.txt
    fname := `/home/peter/shakespeare.100-0.txt` // "data.php"
    testdata := readFile(fname)
    needle := "Means to immure herself and not be seen."
    start := time.Now()

    for i := 0; i < runs; i++ {
        _ = strings.Contains(testdata, needle)

    }
    duration := time.Now().Sub(start)
    fmt.Printf("Go: %.2fs\n", duration.Seconds())

    fmt.Println(strings.Contains(testdata, needle))
    fmt.Println(strings.Index(testdata, needle))

}

func readFile(fname string) string {
    data, err := ioutil.ReadFile(fname)
    if err != nil {
        log.Fatal(err)
    }
    return string(data)
}
$ go version
go version devel +5332b5e75a Tue Jul 31 15:44:37 2018 +0000 linux/amd64
$ go run contains.go
Go: 5.47s
true
5837178
$ 
开始

$ cat contains.py
import time
import re

RUNS = 10000

if __name__ == '__main__':
    # The Complete Works of William Shakespeare by William Shakespeare
    # http://www.gutenberg.org/files/100/100-0.txt
    file = '/home/peter/shakespeare.100-0.txt' # 'data.php'
    with open(file) as fh:
        testString = fh.read()

    def do():
        return "Means to immure herself and not be seen." in testString

    start = time.time()
    for i in range(RUNS):
        _ = do()
    duration = time.time() - start
    print("Python: %.2fs" % duration)
    print(do())
$ python3 --version
Python 3.6.5
$ python3 contains.py
Python: 24.79s
True
$ 
$ cat contains.go
package main

import (
    "fmt"
    "io/ioutil"
    "log"
    "strings"
    "time"
)

const (
    runs = 10000
)

func main() {
    // The Complete Works of William Shakespeare by William Shakespeare
    // http://www.gutenberg.org/files/100/100-0.txt
    fname := `/home/peter/shakespeare.100-0.txt` // "data.php"
    testdata := readFile(fname)
    needle := "Means to immure herself and not be seen."
    start := time.Now()

    for i := 0; i < runs; i++ {
        _ = strings.Contains(testdata, needle)

    }
    duration := time.Now().Sub(start)
    fmt.Printf("Go: %.2fs\n", duration.Seconds())

    fmt.Println(strings.Contains(testdata, needle))
    fmt.Println(strings.Index(testdata, needle))

}

func readFile(fname string) string {
    data, err := ioutil.ReadFile(fname)
    if err != nil {
        log.Fatal(err)
    }
    return string(data)
}
$ go version
go version devel +5332b5e75a Tue Jul 31 15:44:37 2018 +0000 linux/amd64
$ go run contains.go
Go: 5.47s
true
5837178
$ 
$cat contains.go
包干管
进口(
“fmt”
“io/ioutil”
“日志”
“字符串”
“时间”
)
常数(
运行次数=10000次
)
func main(){
//威廉·莎士比亚全集
// http://www.gutenberg.org/files/100/100-0.txt
fname:=`/home/peter/shakespeare.100-0.txt`/“data.php”
testdata:=读取文件(fname)
针头:=“意味着使自己免疫而不被看见。”
开始:=时间。现在()
对于i:=0;i
我对我在上找到的各种字符串搜索实现进行了更多的基准测试,例如:

  • (雅拉似乎在幕后实施了Aho&Corasick)
基准结果():

然后,针对本地(
字符串。包含
regexp
)和基于C的Yara实现,我测试了一个500KB不匹配文件的1100个签名(100个正则表达式,1000个文本)的实际用例:

BenchmarkScanNative-4              2     824328504 ns/op
BenchmarkScanYara-4              300       5338861 ns/op

尽管Go中的C调用被认为是昂贵的,但在这些“繁重”的操作中,利润是可观的。旁白:Yara只需要5倍的CPU时间来匹配1100个签名,而不是1个

我对我在上找到的各种字符串搜索实现进行了更多的基准测试,例如:

  • (雅拉似乎在幕后实施了Aho&Corasick)
基准结果():

然后,针对本地(
字符串。包含
regexp
)和基于C的Yara实现,我测试了一个500KB不匹配文件的1100个签名(100个正则表达式,1000个文本)的实际用例:

BenchmarkScanNative-4              2     824328504 ns/op
BenchmarkScanYara-4              300       5338861 ns/op

尽管Go中的C调用被认为是昂贵的,但在这些“繁重”的操作中,利润是可观的。旁白:Yara只需要5倍的CPU时间来匹配1100个签名,而不是1个

请使用Go中包
测试
提供的基准框架。(顺便说一句,我怀疑你会发现比strings.Contains或byte.Contains更优化的东西)。指针的长度正好是32字节。这意味着它可以在64位机器上进行asm优化,但不能在32位机器上进行。虽然我对Python3的速度感到惊讶,但请使用Go中的package
testing
提供的基准框架。(顺便说一句,我怀疑你会发现比strings.Contains或byte.Contains更优化的东西)。指针的长度正好是32字节。这意味着它可以在64位机器上进行asm优化,但不能在32位机器上进行。虽然我很惊讶Python3的速度。谢谢你展示一个反例!这里也有类似的结果。这仍然让我困惑,到底是什么造成了差异,所以我们将进行更多的测试。感谢您展示一个反例!这里也有类似的结果。我仍然不明白是什么造成了差异,所以我会做更多的测试。