golang请求头并发读写_Go_Kubernetes

golang请求头并发读写

go kubernetes

golang请求头并发读写,go,kubernetes,Go,Kubernetes,Go env: GOARCH=“amd64” GOBIN=“/root/” GOEXE=“” GOHOSTARCH=“amd64” GOHOSTOS=“linux” GOOS=“linux” GOPATH=“/data/workspace/kubernetes” GORACE=“” GOROOT=“/usr/local/go” GOTOOLDIR=“/usr/local/go/pkg/tool/linux_amd64” GO15VENDOREXPERIMENT=“1” CC=“gcc” GOGC

Go env:

GOARCH=“amd64”

GOBIN=“/root/”

GOEXE=“”

GOHOSTARCH=“amd64”

GOHOSTOS=“linux”

GOOS=“linux”

GOPATH=“/data/workspace/kubernetes”

GORACE=“”

GOROOT=“/usr/local/go”

GOTOOLDIR=“/usr/local/go/pkg/tool/linux_amd64”

GO15VENDOREXPERIMENT=“1”

CC=“gcc”

GOGCCFLAGS=“-fPIC-m64-pthread-fmessage length=0”

CXX=“g++”

CGO_ENABLED=“1”

Go版本：

go版本go1.6.3 linux/amd64

此问题发生在高负载的“performance test env”kube apiserver上。kube apiserver死机并退出：

fatal error: concurrent map read and map write

goroutine 77930636 [running]:
runtime.throw(0x2f4c4c0, 0x21)
    /root/.gvm/gos/go1.6.3/src/runtime/panic.go:547 +0x90 fp=0xca67b477f0     sp=0xca67b477d8
runtime.mapaccess1_faststr(0x2a8e520, 0xc9e29000f0, 0x2c11220, 0xa, 0x433e360)
    /root/.gvm/gos/go1.6.3/src/runtime/hashmap_fast.go:202 +0x5b fp=0xca67b47850 sp=0xca67b477f0
k8s.io/kubernetes/pkg/httplog.(*respLogger).Log(0xcbddf2ae70)
       /data/gerrit/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/pkg/httplog/log.go:180 +0x43d fp=0xca67b47af8 sp=0xca67b47850
k8s.io/kubernetes/pkg/apiserver.RecoverPanics.func1(0x7f099f157090, 0xcbddf2ae70, 0xcd7569e380)
     /data/gerrit/src/k8s.io/kubernetes/_output/local/go/src/k8s.io/kubernetes/pkg/apiserver/handlers.go:174 +0x15d fp=0xca67b47b50 sp=0xca67b47af8
    net/http.HandlerFunc.ServeHTTP(0xc821a4eac0, 0x7f099f157058, 0xca0f4eb450, 0xcd7569e380)
    /root/.gvm/gos/go1.6.3/src/net/http/server.go:1618 +0x3a fp=0xca67b47b70 sp=0xca67b47b50
net/http.serverHandler.ServeHTTP(0xc8215a7b80, 0x7f099f157058, 0xca0f4eb450, 0xcd7569e380)
    /root/.gvm/gos/go1.6.3/src/net/http/server.go:2081 +0x19e fp=0xca67b47bd0 sp=0xca67b47b70
net/http.(*conn).serve(0xc8b5d6b980)
    /root/.gvm/gos/go1.6.3/src/net/http/server.go:1472 +0xf2e fp=0xca67b47f98 sp=0xca67b47bd0
runtime.goexit()
    /root/.gvm/gos/go1.6.3/src/runtime/asm_amd64.s:1998 +0x1 fp=0xca67b47fa0 sp=0xca67b47f98
created by net/http.(*Server).Serve
    /root/.gvm/gos/go1.6.3/src/net/http/server.go:2137 +0x44e

相应的源代码：

pkg/apiserver/handlers.go

145 func RecoverPanics(handler http.Handler) http.Handler {
146 return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
147 defer func() {
148     if x := recover(); x != nil {
149         http.Error(w, "apis panic. Look in log for details.", http.StatusInternalServerError)
150         glog.Errorf("APIServer panic'd on %v %v: %v\n%s\n", req.Method, req.RequestURI, x, debug.Stack())
151     }
152 }()
153 defer httplog.NewLogged(req, &w).StacktraceWhen(
        httplog.StatusIsNot(
            http.StatusOK,
            http.StatusCreated,
            http.StatusAccepted,
            http.StatusBadRequest,
            http.StatusMovedPermanently,
            http.StatusTemporaryRedirect,
            http.StatusConflict,
            http.StatusNotFound,
            http.StatusUnauthorized,
            http.StatusForbidden,
            errors.StatusUnprocessableEntity,
            http.StatusSwitchingProtocols,
            http.StatusRequestTimeout,
            errors.StatusTooManyRequests,
        ),
170 ).Log()

    // Dispatch to the internal handler
    handler.ServeHTTP(w, req)
174 })
}

pkg/httplog/log.go：

159 func (rl *respLogger) Log() {
160 latency := time.Since(rl.startTime)
161 if glog.V(2) {
162     extraInfo := ""
163     if latency >= time.Millisecond*200 && latency < time.Second {
        extraInfo = fmt.Sprintf("%d00.Millisecond", latency/(time.Millisecond*100))
    } else if latency >= time.Second && latency < time.Minute { // Warning
        extraInfo = fmt.Sprintf("%d.Second", latency/(time.Second))
    } else if latency >= time.Minute { // nce will timeout
        extraInfo = fmt.Sprintf("%d.Minutes", latency/(time.Minute))
    }
    method := rl.req.Method
    if len(rl.req.Header["Detailed-Method"]) > 0 {
        method = rl.req.Header["Detailed-Method"][0]
    }

    remoteIP := rl.getXForwardIPAdress(rl.req)


    if !rl.hijacked {
        //glog.InfoDepth(1, fmt.Sprintf("%s %s: (%v) %v%v%v [%s %s]", rl.req.Method, rl.req.RequestURI, latency, rl.status, rl.statusStack, rl.addedInfo, rl.req.Header["User-Agent"], rl.req.RemoteAddr))
180         glog.InfoDepth(1, fmt.Sprintf("%v %s %s: (%sms) %v%v [%s %s]-%s %v", rl.req.Header["X-Requestid"], method, rl.req.RequestURI, GetMilliLatency(latency), rl.status, rl.addedInfo, rl.req.Header["User-Agent"], remoteIP, extraInfo, rl.statusStack))
    } else {
        //glog.InfoDepth(1, fmt.Sprintf("%s %s: (%v) hijacked [%s %s]", rl.req.Method, rl.req.RequestURI, latency, rl.req.Header["User-Agent"], rl.req.RemoteAddr))
        glog.InfoDepth(1, fmt.Sprintf("%v %s %s: (%sms) hijacked [%s %s]-%s", rl.req.Header["X-Requestid"], method, rl.req.RequestURI, GetMilliLatency(latency), rl.req.Header["User-Agent"], remoteIP, extraInfo))
    }
}

159函数（rl*respLogger）日志（）{
160延迟：=时间自（rl.startTime）
161如果全球V（2）{
162外部信息：=“”
163如果延迟>=时间.毫秒*200&&延迟<时间.秒{
extraInfo=fmt.Sprintf（“%d00.毫秒”，延迟/（时间.毫秒*100））
}否则，如果延迟>=time.Second&&latency=time.Minute{//nce将超时
extraInfo=fmt.Sprintf（“%d.Minutes”，延迟/（time.Minutes））
}
方法：=rl.req.method
如果len（rl.req.Header[“详细方法”]）>0{
方法=rl.req.Header[“详细方法”][0]
}
remoteIP:=rl.getxForwardiPAddress（rl.req）
如果！rl.被劫持了{
//glog.InfoDepth（1，fmt.Sprintf（“%s%s:（%v）%v%v%v[%s%s]”），rl.req.Method，rl.req.RequestURI，延迟，rl.status，rl.status堆栈，rl.addedInfo，rl.req.Header[“用户代理”]，rl.req.RemoteAddr））
180 glog.InfoDepth（1，fmt.Sprintf（“%v%s%s:（%sms）%v%v[%s%s]-%s%v”，rl.req.Header[“X-Requestid”]、方法、rl.req.RequestURI、getMillialtency（latency）、rl.status、rl.addedInfo、rl.req.Header[“用户代理”]、remoteIP、extraInfo、rl.statusStack））
}否则{
//glog.InfoDepth（1，fmt.Sprintf（“%s%s:（%v）劫持[%s%s]”），rl.req.Method，rl.req.RequestURI，延迟，rl.req.Header[“用户代理”]，rl.req.RemoteAddr））
glog.InfoDepth（1，fmt.Sprintf（“%v%s%s:（%sms）劫持[%s%s]-%s”、rl.req.Header[“X-Requestid”]、方法、rl.req.RequestURI、getMillialtency（latency）、rl.req.Header[“用户代理”]、remoteIP、extraInfo））
}
}

}

在handler.ServeHTTP中，我更改了request.Header。我找不到原因：

（1） “serveHTTP”和“defer log”以串行方式工作。因此不存在“并发读写”

（2）即使存在“并发读写”，也存在“恢复”功能来处理恐慌，因此kube apiserver不应退出

这个问题花了我很多时间，谁能帮我？谢谢

我猜您已经意识到全局超时处理程序实际上无法停止goroutines处理请求。因此，当连接超时时（除非您已更改），它将返回一个错误。如果goroutine实际上同时完成，它还可以尝试写入连接。我认为我们已经添加了一个锁，以防止这导致问题，但可能头部并不总是受到锁的保护。如果您可以在一个干净的源代码树中复制这个，那么请在Kubernetes github repo上提交一个问题。

race detector会有帮助吗？谢谢你的帮助。我使用-race选项构建kube apiserver，然后运行它，没有发生任何事情，没有“数据竞争”bla bla。“由于其设计，竞争检测器只能在运行代码实际触发竞争条件时检测竞争条件，这意味着在实际工作负载下运行支持竞争的二进制文件非常重要。”对不起，刚才注意到了。这种恐慌在我们的“性能测试环境”中只发生过一次，不容易重现。我想在

glog

实现中有一些错误。而

致命错误

不是恐慌。慌乱控制了流程，但致命的是使可执行文件崩溃。感谢您的帮助。但在全局超时处理程序中，当超时发生时，它不会读取或更改请求头。发生“并发映射读写”的地方只有“读”操作。在resthandler.go中，我在处理请求时添加了自定义头。req.Request.Header[“Detailed Method”]=[]string{“GET-”+scope.Resource.Resource}这是为了操作方便，因为日志中打印的方法是http标准而不是k8s标准。但我不认为这是原因（它们是连载的）。也许我不应该在处理程序中更改请求，这可能会导致一些小问题。