Warning: file_get_contents(/data/phpspider/zhask/data//catemap/7/sql-server/25.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Go 为什么逐行读取文件会占用更多内存?_Go_Memory Management - Fatal编程技术网

Go 为什么逐行读取文件会占用更多内存?

Go 为什么逐行读取文件会占用更多内存?,go,memory-management,Go,Memory Management,我尝试读取以下格式的大文件: a string key, 200 values separated by comma 把它写在地图上 我写了这段代码: package main import ( "bufio" "unsafe" "fmt" "log" "os" "runtime" "strings" ) func main() { file, err := os.Open("file_address.txt") i

我尝试读取以下格式的大文件:

a string key, 200 values separated by comma
把它写在地图上

我写了这段代码:

package main

import (
    "bufio"
    "unsafe"
    "fmt"
    "log"
    "os"
    "runtime"
    "strings"
)

func main() {

    file, err := os.Open("file_address.txt")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    mp := make(map[string]float32)
    var total_size int64 = 0
    scanner := bufio.NewScanner(file)
    var counter int64 = 0

    for scanner.Scan() {
        counter++
        sliced := strings.Split(scanner.Text(), ",")
        mp[sliced[0]] = 2.2
    }

    if err := scanner.Err(); err != nil {
        log.Fatal(err)
    }
    fmt.Printf("loaded: %d. Took %d Mb of memory.", counter, total_size/1024.0/1024.0)
    fmt.Println("Loading finished. Now waiting...")

    var ms runtime.MemStats
    runtime.ReadMemStats(&ms)

    fmt.Printf("\n")
    fmt.Printf("Alloc: %d MB, TotalAlloc: %d MB, Sys: %d MB\n",
        ms.Alloc/1024/1024, ms.TotalAlloc/1024/1024, ms.Sys/1024/1024)
    fmt.Printf("Mallocs: %d, Frees: %d\n",
        ms.Mallocs, ms.Frees)
    fmt.Printf("HeapAlloc: %d MB, HeapSys: %d MB, HeapIdle: %d MB\n",
        ms.HeapAlloc/1024/1024, ms.HeapSys/1024/1024, ms.HeapIdle/1024/1024)
    fmt.Printf("HeapObjects: %d\n", ms.HeapObjects)
    fmt.Printf("\n")
}
以下是输出:

loaded: 544594. Took 8 Mb of memory.Loading finished. Now waiting...

Alloc: 2667 MB, TotalAlloc: 3973 MB, Sys: 2831 MB
Mallocs: 1108463, Frees: 401665
HeapAlloc: 2667 MB, HeapSys: 2687 MB, HeapIdle: 11 MB
HeapObjects: 706798

Done!
虽然按键只需要8Mb左右,但程序需要2.7Gb左右的内存!似乎
切片的
永远不会从堆中删除。我尝试在
末尾设置
sliced=nil
,但没有效果。我已经读到,如果我在内存中加载整个文件,然后分割它,我可以避免这个问题,但是我必须逐行读取文件,因为我没有足够的内存来加载一些较大的文件


为什么内存被占用了?处理完每一行后,如何释放它

我想我找到问题了!我把大文件的每一行都切片。返回的
[]字符串
是一个切片,包含原始字符串(文件行)的子字符串。现在的问题是,每个子字符串都不是一个新字符串。Is仅仅是一个
切片
,它保留对未切片字符串(文件行!)的引用。我为每一行保留
切片[0]
,因此,我保留对文件每一行的引用。垃圾收集器不会触及读取行,因为我仍然有对它的引用。从技术上讲,我读取并在内存中保留文件的所有行

解决方案是将我想要的部分(
sliced[0]
)复制到一个新的字符串中,实际上丢失了对整行的引用。我是这样做的:

    sliced := strings.Split(scanner.Text(), ",")
    key_rune_arr := []rune(sliced[0])
    key := string(key_rune_arr) // now key is a copy of sliced[0] without reference to line
    mp[key] = 2.2 //instead of mp[sliced[0]] = 2.2
该计划现在变成:

package main

import (
    "bufio"
    "unsafe"
    "fmt"
    "log"
    "os"
    "runtime"
    "strings"
)

func main() {

    file, err := os.Open("file_address.txt")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    mp := make(map[string]float32)
    var total_size int64 = 0
    scanner := bufio.NewScanner(file)
    var counter int64 = 0

    for scanner.Scan() {
        counter++
        sliced := strings.Split(scanner.Text(), ",")
        key_rune_arr := []rune(sliced[0])
        key := string(key_rune_arr) // now key is a copy of sliced[0] without reference to line
        mp[key] = 2.2 //instead of mp[sliced[0]] = 2.2
    }

    if err := scanner.Err(); err != nil {
        log.Fatal(err)
    }
    fmt.Printf("loaded: %d. Took %d Mb of memory.", counter, total_size/1024.0/1024.0)
    fmt.Println("Loading finished. Now waiting...")

    var ms runtime.MemStats
    runtime.ReadMemStats(&ms)

    fmt.Printf("\n")
    fmt.Printf("Alloc: %d MB, TotalAlloc: %d MB, Sys: %d MB\n",
        ms.Alloc/1024/1024, ms.TotalAlloc/1024/1024, ms.Sys/1024/1024)
    fmt.Printf("Mallocs: %d, Frees: %d\n",
        ms.Mallocs, ms.Frees)
    fmt.Printf("HeapAlloc: %d MB, HeapSys: %d MB, HeapIdle: %d MB\n",
        ms.HeapAlloc/1024/1024, ms.HeapSys/1024/1024, ms.HeapIdle/1024/1024)
    fmt.Printf("HeapObjects: %d\n", ms.HeapObjects)
    fmt.Printf("\n")
}
结果如我所愿:

loaded: 544594. Took 8 Mb id memory.Loading finished. Now waiting...

Alloc: 94 MB, TotalAlloc: 3986 MB, Sys: 135 MB
Mallocs: 1653590, Frees: 1108129
HeapAlloc: 94 MB, HeapSys: 127 MB, HeapIdle: 32 MB
HeapObjects: 545461

Done!

为了高效地使用CPU和内存

key := string(bytes.SplitN(scanner.Bytes(), []byte(","), 2)[0])
mp[key] = 2.2

您可以搜索
三色算法golang gc
以获取更多详细信息您可以使用pprof查看内存分配的确切来源。分析应用程序是一项有用的技能,这可能是学习应用程序的一个很好的练习。@mh cbon发现这不是因为gc不工作。问题是,我仍然通过将
切片[0]
作为映射键来保留对每一行文件的引用。