Golang程序内存泄漏
我正在尝试编写一个简单的程序,其行为类似于golang中的find | grep。我让所有程序都使用goroutines工作,使用以下模式:Golang程序内存泄漏,go,Go,我正在尝试编写一个简单的程序,其行为类似于golang中的find | grep。我让所有程序都使用goroutines工作,使用以下模式: goroutine(filech根据@JimB对我的问题的评论,我发现这不是一个内存泄漏问题,而是一个无限制并发的问题。我的原始代码为每个文件启动了一个grep,因为它是无限制遇到的 我可以通过限制在任何时候打开给grep的文件数量来解决这个问题。使用提供的示例。在此链接中,他们创建了一个只接受限制数量的消息的信号通道。在打开文件之前,向此通道写入一个值,
goroutine(filech根据@JimB对我的问题的评论,我发现这不是一个内存泄漏问题,而是一个无限制并发的问题。我的原始代码为每个文件启动了一个grep,因为它是无限制遇到的 我可以通过限制在任何时候打开给grep的文件数量来解决这个问题。使用提供的示例。在此链接中,他们创建了一个只接受限制数量的消息的信号通道。在打开文件之前,向此通道写入一个值,并在搜索完f后从该通道读取一个值最后,等待信号通道再次填满 以下是与我的损坏的原始代码相对应的工作代码(有关相关部分,请参见
grepConcurrencyLimit
和semaphoreChan
):
主程序包
进口(
“旗帜”
“fmt”
“io/ioutil”
“操作系统”
“regexp”
“运行时/pprof”
“字符串”
“同步”
)
变量(
topDir字符串
波尔的心肺复苏术
普洛夫布尔酒店
cProfFile*os.File
mProfFile*os.File
文件名[]字符串
文件类型[]字符串
文件列表映射[字符串][]字符串
grepConcurrencyLimit int
cMatch=regexp.MustCompile(`i^.*\.(?:c | h | cc | cpp | c\+\+| hpp)$`)
javaMatch=regexp.MustCompile(`(?i)^.*\.(?:java|js)$`)
goMatch=regexp.MustCompile(`(?i)^.*\.(?:go)$`)
buildMatch=regexp.MustCompile(`i)^.*\(?:gradle | mk | mka)$`)
buildMatch2=regexp.MustCompile(`^.*/(?:Makefile[^/\\]*)$`)
regMatch=regexp.MustCompile(`i)(?:test|debug)`)
)
func init(){
fileLists=make(映射[string][]string)
}
func main(){
flag.StringVar(&topDir,“d”,“要处理的顶级目录(默认为当前目录)”)
flag.IntVar(&grepConcurrencyLimit,“l”,50,“任何时候grep的文件数量限制”)
flag.BoolVar(&cProf,“c”,false,“如果要保存CPU配置文件,请包括”)
flag.BoolVar(&mProf,“m”,false,“如果要保存MEM配置文件,请包括”)
flag.Parse()
cProfFunc()
getFilesChan:=make(chan字符串,1000)
grepFilesChan:=make(chan字符串,100)
//此通道用于确保在任何时候仅对grepConcurrencyLimit文件进行灰色化
信号量控制器:=make(chan bool,grepConcurrencyLimit)
转到getFileNamesOverChan(topDir,GetFileChan)
var文件结果字符串
var grepWg sync.WaitGroup
var categorizeWg sync.WaitGroup
fileTypes=append(文件类型,“C”、“Java”、“Go”、“Build”、“Uncategorized”)
分类工作组添加(1)
go func(成串){
变量grepResult字符串
对于GREPRULT=范围grepFilesChan{
如果你不相信{
fmt.Printf(“找到文本为%s的文件,\n”,grepResult)
var fileType=getFileCategory(grepResult)
fileLists[fileType]=追加(fileLists[fileType],grepResult)
}
}
分类wg.Done()
}(格雷普菲尔森)
对于fileResult=range getFilesChan{
如果fileResult!“”{
fileNames=append(文件名、文件结果)
grepWg.Add(1)
//将一个布尔值写入semaphoreChan以占用其中一个并发限制点
信号量:你如何衡量你的代码是否泄漏内存?很可能你的衡量是错误的。你知道你正试图将所有文件同时写入内存吗?我并不奇怪这会消耗大量内存(而且这不是泄漏)。另外,似乎你在重新发明。我知道我正在将整个文件休眠到内存中,但我认为由于grepFilesChan上的缓冲区,我一次只能休眠100个文件。然后我希望在各自的go例程完成后释放内存。我正在重新创建filepath.Walk,因为我想直接过滤掉它我在文件路径中没有看到该功能。Walk。我通过观察我的程序运行时的top来测量。使用的内存不断增长,直到程序消耗所有内存并崩溃时才会下降。接受其他建议以确定问题所在,但我不会这样做我期待这个程序。我猜这个程序永远不会GC读取调用grepOverChan的go例程中的完整文件的字节片。我只是无法确认这一点,也不知道我做错了什么导致了这种情况。这里没有内存泄漏。您正在尝试无限制地同时处理文件。您试图同时读取每个文件的全部内容,然后将这些巨大的blob发送到正则表达式。一旦有任何积压工作,一切都会变慢,问题会变得复杂。您需要限制并发性,不要一次读取每个文件的全部内容。
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"regexp"
"runtime/pprof"
"strings"
"sync"
)
var (
topDir string
cProf bool
mProf bool
cProfFile *os.File
mProfFile *os.File
fileNames []string
fileTypes []string
fileLists map[string][]string
cMatch = regexp.MustCompile(`(?i)^.*\.(?:c|h|cc|cpp|c\+\+|hpp)$`)
javaMatch = regexp.MustCompile(`(?i)^.*\.(?:java|js)$`)
goMatch = regexp.MustCompile(`(?i)^.*\.(?:go)$`)
buildMatch = regexp.MustCompile(`(?i)^.*\.(?:gradle|mk|mka)$`)
buildMatch2 = regexp.MustCompile(`^.*/(?:Makefile[^/\\]*)$`)
regMatch = regexp.MustCompile(`(?i)(?:test|debug)`)
)
func init() {
fileLists = make(map[string][]string)
}
func main() {
flag.StringVar(&topDir, "d", ".", "The top level directory to process (default is current directory)")
flag.BoolVar(&cProf, "c", false, "Include if you want to save the CPU profile")
flag.BoolVar(&mProf, "m", false, "Include if you want to save the MEM profile")
flag.Parse()
cProfFunc()
getFilesChan := make(chan string, 1000)
grepFilesChan := make(chan string, 100)
go getFileNamesOverChan(topDir, getFilesChan)
var fileResult string
var grepWg sync.WaitGroup
var categorizeWg sync.WaitGroup
fileTypes = append(fileTypes, "C", "Java", "Go", "Build", "Uncategorized")
categorizeWg.Add(1)
go func(chan string) {
var grepResult string
for grepResult = range grepFilesChan {
if grepResult != "" {
fmt.Printf("Found file %s with text\n", grepResult)
var fileType = getFileCategory(grepResult)
fileLists[fileType] = append(fileLists[fileType], grepResult)
}
}
categorizeWg.Done()
}(grepFilesChan)
for fileResult = range getFilesChan {
if fileResult != "" {
fileNames = append(fileNames, fileResult)
grepWg.Add(1)
go func(file string, ch chan string) {
fmt.Printf("Grepping file %s\n", file)
grepOverChan(file, ch)
grepWg.Done()
}(fileResult, grepFilesChan)
}
}
grepWg.Wait()
close(grepFilesChan)
categorizeWg.Wait()
printSummary()
mProfFunc()
defer pprof.StopCPUProfile()
defer cProfFile.Close()
}
func cProfFunc() {
if cProf {
cProfFile, _ = os.Create("cpu_profile.pprof")
//handle err
_ = pprof.StartCPUProfile(cProfFile)
//handle err
}
}
func mProfFunc() {
if mProf {
mProfFile, _ = os.Create("mem_profile.pprof")
//handle err
_ = pprof.WriteHeapProfile(mProfFile)
//handle err
defer mProfFile.Close()
}
}
func printSummary() {
fmt.Printf("\n\nProcessed %d Files\n\n", len(fileNames))
fmt.Println("")
fmt.Println("Found text in the following files:")
for _, fType := range fileTypes {
fmt.Printf("Found text in %d %s Files\n", len(fileLists[fType]), fType)
}
/*
for _, fType := range fileTypes {
if len(fileLists[fType]) > 0 {
fmt.Println("")
fmt.Printf("\t%s Files:\n", fType)
}
for _, fileName := range fileLists[fType] {
fmt.Printf("\t\t%s\n", fileName)
}
}
*/
}
func getFileNamesOverChan(directory string, ch chan string) {
fmt.Printf("Finding files in directory %s\n", directory)
var err error
var dirInfo os.FileInfo
dirInfo, err = os.Lstat(directory)
if err != nil {
close(ch)
return
}
if !dirInfo.IsDir() {
close(ch)
return
}
recursiveGetFilesOverChan(directory, ch)
close(ch)
}
func recursiveGetFilesOverChan(dir string, ch chan string) {
dirFile, _ := os.Open(dir)
//handle err
defer dirFile.Close()
dirFileInfo, _ := dirFile.Readdir(0)
//handle err
for _, file := range dirFileInfo {
filePath := fmt.Sprintf("%s%c%s", dir, os.PathSeparator, file.Name())
switch mode := file.Mode(); {
case mode.IsDir():
//is a directory ... recurse
recursiveGetFilesOverChan(filePath, ch)
case mode.IsRegular():
//is a regular file ... send it if it is not a CVS or GIT file
if !strings.Contains(filePath, "/CVS/") && !strings.Contains(filePath, "/.git/") {
fmt.Printf("Found File %s\n", filePath)
ch <- filePath
}
case mode&os.ModeSymlink != 0:
//is a symbolic link ... skip it
continue
case mode&os.ModeNamedPipe != 0:
//is a Named Pipe ... skip it
continue
}
}
}
func getFileCategory(file string) string {
var fileType string
switch {
case cMatch.MatchString(file):
fileType = "C"
case javaMatch.MatchString(file):
fileType = "Java"
case goMatch.MatchString(file):
fileType = "Go"
case buildMatch.MatchString(file):
fileType = "Build"
case buildMatch2.MatchString(file):
fileType = "Build"
default:
fileType = "Uncategorized"
}
return fileType
}
func grepOverChan(f string, ch chan string) {
fileBytes, _ := ioutil.ReadFile(f)
if regMatch.Match(fileBytes) {
ch <- f
}
}
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"regexp"
"runtime/pprof"
"strings"
"sync"
)
var (
topDir string
cProf bool
mProf bool
cProfFile *os.File
mProfFile *os.File
fileNames []string
fileTypes []string
fileLists map[string][]string
grepConcurrencyLimit int
cMatch = regexp.MustCompile(`(?i)^.*\.(?:c|h|cc|cpp|c\+\+|hpp)$`)
javaMatch = regexp.MustCompile(`(?i)^.*\.(?:java|js)$`)
goMatch = regexp.MustCompile(`(?i)^.*\.(?:go)$`)
buildMatch = regexp.MustCompile(`(?i)^.*\.(?:gradle|mk|mka)$`)
buildMatch2 = regexp.MustCompile(`^.*/(?:Makefile[^/\\]*)$`)
regMatch = regexp.MustCompile(`(?i)(?:test|debug)`)
)
func init() {
fileLists = make(map[string][]string)
}
func main() {
flag.StringVar(&topDir, "d", ".", "The top level directory to process (default is current directory)")
flag.IntVar(&grepConcurrencyLimit, "l", 50, "The limit of number of files to grep at any one time")
flag.BoolVar(&cProf, "c", false, "Include if you want to save the CPU profile")
flag.BoolVar(&mProf, "m", false, "Include if you want to save the MEM profile")
flag.Parse()
cProfFunc()
getFilesChan := make(chan string, 1000)
grepFilesChan := make(chan string, 100)
// This channel is to ensure that only grepConcurrencyLimit files are ever grepped at any one time
semaphoreChan := make(chan bool, grepConcurrencyLimit)
go getFileNamesOverChan(topDir, getFilesChan)
var fileResult string
var grepWg sync.WaitGroup
var categorizeWg sync.WaitGroup
fileTypes = append(fileTypes, "C", "Java", "Go", "Build", "Uncategorized")
categorizeWg.Add(1)
go func(chan string) {
var grepResult string
for grepResult = range grepFilesChan {
if grepResult != "" {
fmt.Printf("Found file %s with text\n", grepResult)
var fileType = getFileCategory(grepResult)
fileLists[fileType] = append(fileLists[fileType], grepResult)
}
}
categorizeWg.Done()
}(grepFilesChan)
for fileResult = range getFilesChan {
if fileResult != "" {
fileNames = append(fileNames, fileResult)
grepWg.Add(1)
// write a boolean to semaphoreChan to take up one of the concurrency limit spots
semaphoreChan <- true
go func(file string, ch chan string) {
fmt.Printf("Grepping file %s\n", file)
//run the function to read a boolean from semaphoreChan to release one of the concurrency limit spots
defer func() { <-semaphoreChan }()
grepOverChan(file, ch)
grepWg.Done()
}(fileResult, grepFilesChan)
}
}
// refill semaphoreChan to capacity to wait until all of the final go routines have completed.
for i := 0; i < cap(semaphoreChan); i++ {
semaphoreChan <- true
}
grepWg.Wait()
close(grepFilesChan)
categorizeWg.Wait()
printSummary()
mProfFunc()
defer pprof.StopCPUProfile()
defer cProfFile.Close()
}
func cProfFunc() {
if cProf {
cProfFile, _ = os.Create("cpu_profile.pprof")
//handle err
_ = pprof.StartCPUProfile(cProfFile)
//handle err
}
}
func mProfFunc() {
if mProf {
mProfFile, _ = os.Create("mem_profile.pprof")
//handle err
_ = pprof.WriteHeapProfile(mProfFile)
//handle err
defer mProfFile.Close()
}
}
func printSummary() {
fmt.Printf("\n\nProcessed %d Files\n\n", len(fileNames))
fmt.Println("")
fmt.Println("Found text in the following files:")
for _, fType := range fileTypes {
fmt.Printf("Found text in %d %s Files\n", len(fileLists[fType]), fType)
}
/*
for _, fType := range fileTypes {
if len(fileLists[fType]) > 0 {
fmt.Println("")
fmt.Printf("\t%s Files:\n", fType)
}
for _, fileName := range fileLists[fType] {
fmt.Printf("\t\t%s\n", fileName)
}
}
*/
}
func getFileNamesOverChan(directory string, ch chan string) {
fmt.Printf("Finding files in directory %s\n", directory)
var err error
var dirInfo os.FileInfo
dirInfo, err = os.Lstat(directory)
if err != nil {
close(ch)
return
}
if !dirInfo.IsDir() {
close(ch)
return
}
recursiveGetFilesOverChan(directory, ch)
close(ch)
}
func recursiveGetFilesOverChan(dir string, ch chan string) {
dirFile, _ := os.Open(dir)
//handle err
defer dirFile.Close()
dirFileInfo, _ := dirFile.Readdir(0)
//handle err
for _, file := range dirFileInfo {
filePath := fmt.Sprintf("%s%c%s", dir, os.PathSeparator, file.Name())
switch mode := file.Mode(); {
case mode.IsDir():
//is a directory ... recurse
recursiveGetFilesOverChan(filePath, ch)
case mode.IsRegular():
//is a regular file ... send it if it is not a CVS or GIT file
if !strings.Contains(filePath, "/CVS/") && !strings.Contains(filePath, "/.git/") {
fmt.Printf("Found File %s\n", filePath)
ch <- filePath
}
case mode&os.ModeSymlink != 0:
//is a symbolic link ... skip it
continue
case mode&os.ModeNamedPipe != 0:
//is a Named Pipe ... skip it
continue
}
}
}
func getFileCategory(file string) string {
var fileType string
switch {
case cMatch.MatchString(file):
fileType = "C"
case javaMatch.MatchString(file):
fileType = "Java"
case goMatch.MatchString(file):
fileType = "Go"
case buildMatch.MatchString(file):
fileType = "Build"
case buildMatch2.MatchString(file):
fileType = "Build"
default:
fileType = "Uncategorized"
}
return fileType
}
func grepOverChan(f string, ch chan string) {
fileBytes, _ := ioutil.ReadFile(f)
if regMatch.Match(fileBytes) {
ch <- f
}
}