Go 检查字节片是否为数字的最有效方法_Go

Go 检查字节片是否为数字的最有效方法

Go 检查字节片是否为数字的最有效方法,go,Go,我正在寻找判断字节片是否为浮点的最有效方法这是在巨大的数据集上完成的，所以性能是关键尝试过的方法： strconv.ParseFloat regexp.Match CheckNumber-使用IsNumber+查看字节片是否包含的自转函数 func CheckNumber(p []byte) bool { r := string(p) sep := 0 for _, b := range r { if unicode.IsNumber(b) {

我正在寻找判断字节片是否为浮点的最有效方法

这是在巨大的数据集上完成的，所以性能是关键

尝试过的方法：

```
strconv.ParseFloat
```
```
regexp.Match
```

CheckNumber

-使用

IsNumber

+查看字节片是否包含

的自转函数

func CheckNumber(p []byte) bool {
    r := string(p)
    sep := 0
    for _, b := range r {
        if unicode.IsNumber(b) {
            continue
        }
        if b == rune('.') {
            if sep > 0 {
                return false
            }
            sep++
            continue
        }
        return false
    }
    return true
}

基准代码：

func BenchmarkFloatStrconv(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        _, err := strconv.ParseFloat(string(p), 64)
        if err != nil {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkFloatRegex(b *testing.B) {
    p := []byte("15.34234234234")
    r := `[-+]?[0-9]*\.?[0-9]`
    c, _ := regexp.Compile(r)

    for i := 0; i < b.N; i++ {
        ok := c.Match(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkCheckNumber(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        ok := CheckNumber(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkValidate(b *testing.B) {
    p := []byte("15.1234567890")

    for i := 0; i < b.N; i++ {
        ok := Validate(p)
        if !ok {
            log.Fatalf("problem")
        }
    }
}

我在做不同的解决方案吗
有更好的解决方案吗

编辑：多亏了Adrian和icza的指针，这避免了转换为

字符串
func CheckNumberNoStringConvert(r []byte) bool {
    sep := 0

    for i := range r {
        if r[i] >= 48 && r[i] <= 57 {
            continue
        }
        if r[i] == 46 {
            if sep > 0 {
                return false
            }
            sep++
            continue
        }
        return false
    }

    return true
}

对于简单的实数（浮点）数（无科学或工程浮点格式，无组分隔符）
real\u test.go
：
package main

import (
    "log"
    "regexp"
    "strconv"
    "testing"
    "unicode"
)

func IsReal(n []byte) bool {
    if len(n) > 0 && n[0] == '-' {
        n = n[1:]
    }
    if len(n) == 0 {
        return false
    }
    var point bool
    for _, c := range n {
        if '0' <= c && c <= '9' {
            continue
        }
        if c == '.' && len(n) > 1 && !point {
            point = true
            continue
        }
        return false
    }
    return true
}

func BenchmarkIsReal(b *testing.B) {
    p := []byte("15.34234234234")
    for i := 0; i < b.N; i++ {
        ok := IsReal(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

func CheckNumber(p []byte) bool {
    r := string(p)

    sep := 0

    for _, b := range r {
        if unicode.IsNumber(b) {
            continue
        }
        if b == rune('.') {
            if sep > 0 {
                return false
            }
            sep++
            continue
        }
        return false
    }

    return true

}

func BenchmarkFloatStrconv(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        _, err := strconv.ParseFloat(string(p), 64)
        if err != nil {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkFloatRegex(b *testing.B) {
    p := []byte("15.34234234234")
    r := `[-+]?[0-9]*\.?[0-9]`
    c, _ := regexp.Compile(r)

    for i := 0; i < b.N; i++ {
        ok := c.Match(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkCheckNumber(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        ok := CheckNumber(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

主程序包
进口(
“日志”
“regexp”
“strconv”
“测试”
“unicode”
)
func IsReal（n[]字节）bool{
如果len（n）>0&&n[0]='-'{
n=n[1:]
}
如果len（n）==0{
返回错误
}
瓦尔点布尔
对于u，c:=范围n{
如果“0”为0{
返回错误
}
九月++
持续
}
返回错误
}
返回真值
}
func BenchmarkFloatStrconv（b*testing.b）{
p:=[]字节（“15.34234”）
对于i:=0；i
我将其视为对自己的一个挑战，将其改写为某种状态机，综合来自这里所有人的集体输入：）
并在原始基准上表现良好：
BenchmarkValidate-8     100000000           13.0 ns/op         0 B/op          0 allocs/op

基准代码：
func BenchmarkFloatStrconv(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        _, err := strconv.ParseFloat(string(p), 64)
        if err != nil {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkFloatRegex(b *testing.B) {
    p := []byte("15.34234234234")
    r := `[-+]?[0-9]*\.?[0-9]`
    c, _ := regexp.Compile(r)

    for i := 0; i < b.N; i++ {
        ok := c.Match(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkCheckNumber(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        ok := CheckNumber(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkValidate(b *testing.B) {
    p := []byte("15.1234567890")

    for i := 0; i < b.N; i++ {
        ok := Validate(p)
        if !ok {
            log.Fatalf("problem")
        }
    }
}

func BenchmarkValidate（b*testing.b）{
p:=[]字节（“15.1234567890”）
对于i:=0；i
对于正则表达式，我会将编译放在基准函数之外（编译为全局变量），因为您只需要编译一次，所以成本与测试无关。至于公平。。。如果您正在测试的一个值代表了您可能传递给函数的所有值，那么是的，这似乎是公平的。此外，CheckNumber
可以只对字节片进行操作，而不是转换为字符串，除非您需要任意多字节文本。您不必担心多字节字符。如果您的输入包含这些，其他方法无论如何都会将其视为非数字（就像正确地查看单个字节一样）。根据您的数据，可能在CheckNumber
中，您需要检查数字组分隔符（例如逗号），e或e的字母表示指数符号，以及符号（e/e后加/减）.建议：用“0”、“9”和“.”代替48、57和46。这与速度无关，但它使代码更具可读性。如果c<'0'| | | c>'9'{return false；}

产生很大的差异，而不是

如果'0'@md2perpe:Yes，则会产生很大的差异。它将引入一个bug。它不处理小数点。但是，如果“0”
func Validate(b []byte) bool {
    for i := range b {
        switch {
        case b[i] >= '0' && b[i] <= '9':
            continue
        case b[i] == '.':
            if len(b) == 1 {
                return false
            }
            if len(b) > i {
                return fractional(b[i+1:])
            }
            return true
        case i == 0 && b[i] == '-':
            if len(b) == 1 {
                return false
            }
            continue
        default:
            return false
        }
    }

    return true
}

func fractional(b []byte) bool {
    for i := range b {
        switch {
        case b[i] >= '0' && b[i] <= '9':
            continue
        case b[i] == 'e' || b[i] == 'E':
            if len(b[:i]) == 0 {
                return false
            }
            if len(b) > i+1 {
                return scientific(b[i+1:])
            }
            return false
        default:
            return false
        }
    }

    return true
}

func scientific(b []byte) bool {
    for i := range b {
        switch {
        case b[i] >= '0' && b[i] <= '9':
            continue
        case i == 0 && b[i] == '-':
            if len(b) == 1 {
                return false
            }
            continue
        default:
            return false
        }
    }

    return true
}

type v struct {
    Input    []byte
    Expected bool
}

func TestPermutations(t *testing.T) {
    b := []v{
        v{[]byte("123.456"), true},
        v{[]byte("123"), true},
        v{[]byte("123."), true},
        v{[]byte(".123"), true},
        v{[]byte("12.1e12"), true},
        v{[]byte("12.1e-12"), true},
        v{[]byte("-123.456"), true},
        v{[]byte("-123"), true},
        v{[]byte("-123."), true},
        v{[]byte("-.123"), true},
        v{[]byte("-12.1e12"), true},
        v{[]byte("-12.1e-12"), true},
        v{[]byte(".1e-12"), true},
        v{[]byte(".e-12"), false},
        v{[]byte(".e"), false},
        v{[]byte("e"), false},
        v{[]byte("abcdef"), false},
        v{[]byte("-"), false},
        v{[]byte("."), false},
    }

    for _, test := range b {
        ok := Validate(test.Input)
        if ok != test.Expected {
            t.Errorf("could not handle case %s", test.Input)
        }
    }

}

BenchmarkValidate-8     100000000           13.0 ns/op         0 B/op          0 allocs/op

func BenchmarkValidate(b *testing.B) {
    p := []byte("15.1234567890")

    for i := 0; i < b.N; i++ {
        ok := Validate(p)
        if !ok {
            log.Fatalf("problem")
        }
    }
}