Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/go/7.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Amazon web services 使用Goroutines&;将多个文件并行上传到Amazon S3;渠道_Amazon Web Services_Go_Amazon S3_Concurrency_Channels - Fatal编程技术网

Amazon web services 使用Goroutines&;将多个文件并行上传到Amazon S3;渠道

Amazon web services 使用Goroutines&;将多个文件并行上传到Amazon S3;渠道,amazon-web-services,go,amazon-s3,concurrency,channels,Amazon Web Services,Go,Amazon S3,Concurrency,Channels,我正在尝试将目录上载到AmazonS3存储桶中。但是,上载目录的唯一方法是遍历目录中的所有文件并逐个上载它们 我正在使用Go对目录中的文件进行迭代。但是,对于我遍历的每个文件,我希望派生一个goroutine来上载文件,而主线程遍历目录中的下一个元素,并派生另一个goroutine来上载相同的文件 你知道如何使用Goroutines和Channels并行上传目录中的所有文件吗 修改后的代码段,实现了一个goroutine和一个并行上传文件的通道。但我不确定这是否是正确的实现 func uploa

我正在尝试将目录上载到AmazonS3存储桶中。但是,上载目录的唯一方法是遍历目录中的所有文件并逐个上载它们


我正在使用Go对目录中的文件进行迭代。但是,对于我遍历的每个文件,我希望派生一个goroutine来上载文件,而主线程遍历目录中的下一个元素,并派生另一个goroutine来上载相同的文件

你知道如何使用Goroutines和Channels并行上传目录中的所有文件吗

修改后的代码段,实现了一个goroutine和一个并行上传文件的通道。但我不确定这是否是正确的实现

func uploadDirToS3(dir字符串,svc*s3.s3){
文件列表:=[]字符串{}
filepath.Walk(dir,func(路径字符串,f os.FileInfo,err error)错误{
fmt.Println(“路径==>”+路径)
fileList=append(文件列表,路径)
归零
})
对于,pathOfFile:=范围文件列表[1:]{
频道:=制作(chan bool)
转到上传文件到3(PathFFILE、svc、频道)

因此,您正在寻找并发性,它植根于
go
指令。对于在循环中启动的goroutine之间的同步,您可以使用或。第二个选项更容易实现。 此外,还必须重构函数,并将
的内部
逻辑移动到一个单独的函数中

func uploadDirToS3(dir string, svc *s3.S3) {
    fileList := []string{}
    filepath.Walk(dir, func(path string, f os.FileInfo, err error) error {
        fileList = append(fileList, path)
        return nil
    })
    var wg sync.WaitGroup
    wg.Add(len(fileList))
    for _, pathOfFile := range fileList[1:] {
        //maybe spin off a goroutine here??
        go putInS3(pathOfFile, svc, &wg)
    }
    wg.Wait()
}

func putInS3(pathOfFile string, svc *s3.S3, wg *sync.WaitGroup) {
    defer func() {
        wg.Done()
    }()
    file, _ := os.Open(pathOfFile)
    defer file.Close()
    fileInfo, _ := file.Stat()
    size := fileInfo.Size()
    buffer := make([]byte, size)
    file.Read(buffer)
    fileBytes := bytes.NewReader(buffer)
    fileType := http.DetectContentType(buffer)
    path := file.Name()
    params := &s3.PutObjectInput{
        Bucket:        aws.String("bucket-name"),
        Key:           aws.String(path),
        Body:          fileBytes,
        ContentLength: aws.Int64(size),
        ContentType:   aws.String(fileType),
    }

    resp, _ := svc.PutObject(params)
    fmt.Printf("response %s", awsutil.StringValue(resp))
}

严格地说,下面并没有回答OP,但是它试图引入使用go语言的并行处理

希望这有帮助

package main

import (
    "log"
    "sync"
    "time"
)

func main() {

    // processInSync()
    // The processing takes up to 3seconds,
    // it displays all the output and handles errors.

    // processInParallel1()
    // The processing takes up to few microseconds,
    // it displays some of the output and does not handle errors.
    // It is super fast, but incorrect.

    // processInParallel2()
    // The processing takes up to 1s,
    // It correctly displays all the output,
    // But it does not yet handle return values.

    processInParallel3()
    // The processing takes up to 1s,
    // It correctly displays all the output,
    // and it is able to return the first error encountered.

    // This merely just an introduction to what you are able to do.
    // More examples are required to explains the subtletlies of channels
    // to implement unbound work processing.
    // I leave that as an exercise to the reader.
    // For more information and explanations about channels,
    // Read The Friendly Manual and the tons of examples
    // we left on the internet.
    // https://golang.org/doc/effective_go.html#concurrency
    // https://gobyexample.com/channels
    // https://gobyexample.com/closing-channels
}

func aSlowProcess(name string) error {
    log.Println("aSlowProcess ", name)
    <-time.After(time.Second)
    return nil
}

//processInSync a dummy function calling a slow function one after the other.
func processInSync() error {
    now := time.Now()
    // it calls the slow process three time,
    // one after the other;
    // If an error is returned, returns asap.
    if err := aSlowProcess("#1"); err != nil {
        return err
    }
    if err := aSlowProcess("#2"); err != nil {
        return err
    }
    if err := aSlowProcess("#3"); err != nil {
        return err
    }
    // This is a sync process because it does not involve
    // extra synchronisation mechanism.
    log.Printf("processInSync spent %v\n", time.Now().Sub(now))
    return nil
}

// processInParallel1 implements parallel processing example.
// it is not yet a fully working example, to keep it simple,
// it only implements the sending part of the processing.
func processInParallel1() error {
    now := time.Now()

    // We want to execute those function calls in parallel
    // for that we use the go keyword which allows to run the function
    // into a separate routine/process/thread.
    // It is called async because the main thread and the
    // the new routines requires to be synchronized.
    // To synchronize two independant routine we must use
    // atomic (race free) operators.

    // A channel is an atomic operator because it is safe to
    // read and write from it from multiple parallel
    // and independant routines.

    // before we implement such processing, we must ask ourselve
    // what is the input i need to distribute among routines,
    // and what are the values i want to get from those routines.

    // lets create a channel of string to distribute the input to multiple
    // independant workers.
    distributor := make(chan string)

    // The input channel MUST be read from the new routines.
    // We create three workers of slow process, reading and processing.
    go func() {
        value := <-distributor
        aSlowProcess(value)
    }()
    go func() {
        value := <-distributor
        aSlowProcess(value)
    }()
    go func() {
        value := <-distributor
        aSlowProcess(value)
    }()

    // we must now write the values into the distributor
    // so that each worker can read and process data.
    distributor <- "#1"
    distributor <- "#2"
    distributor <- "#3"

    log.Printf("processInParallel1 spent %v\n", time.Now().Sub(now))

    return nil
}

// processInParallel2 implements parallel processing example.
// it is not yet a fully working example, to keep it simple,
// it implements the sending part of the processing,
// and the synchronization mechanism to wait for all workers
// to finish before returning.
func processInParallel2() error {
    now := time.Now()

    // We saw in the previous example how to send values and process
    // them in parallel, however, that function was not able to wait for
    // those async process to finish before returning.

    // To implement such synchronization mechanism
    // where the main thread waits for all workers to finish
    // before returning we need to use the sync package.
    // It provides the best pattern to handle that requirements.

    // In addition to the previous example we now instantiate a
    // WaitGroup https://golang.org/pkg/sync/#WaitGroup
    // The purpose of the wait group is to record a number
    // of async jobs to process and wait for them to finish.

    var wg sync.WaitGroup

    distributor := make(chan string)

    // Because we have three workers, we add three to the group.
    wg.Add(1)
    go func() {
        // Then we make sure that we signal to the waitgroup 
    // that the process is done.
        defer wg.Done()
        value := <-distributor
        aSlowProcess(value)
    }()
    //-
    wg.Add(1)
    go func() {
        defer wg.Done() // as an exercise, comment this line 
    // and inspect the output of your program.
        value := <-distributor
        aSlowProcess(value)
    }()
    //-
    wg.Add(1)
    go func() {
        defer wg.Done()
        value := <-distributor
        aSlowProcess(value)
    }()

    // we can now write the data for processing....
    distributor <- "#1"
    distributor <- "#2"
    distributor <- "#3"

    //....and wait for their completion
    wg.Wait()

    log.Printf("processInParallel2 spent %v\n", time.Now().Sub(now))

    return nil
}

// processInParallel3 implements parallel processing example.
// It is a fully working example that distribute jobs, 
// wait for completion and catch for return values.
func processInParallel3() error {
    now := time.Now()

    var wg sync.WaitGroup
    distributor := make(chan string)

    // To catch for return values we must implement a
    // way for output values to safely reach the main thread.
    // We create a channel of errors for that purpose.
    receiver := make(chan error)

    // As previsouly we start the workers, and attach them to a waitgroup.
    wg.Add(1)
    go func() {
        defer wg.Done()
        value := <-distributor
        err := aSlowProcess(value)
        // to return the value we write on the output channel.
        receiver <- err
    }()
    //-
    wg.Add(1)
    go func() {
        defer wg.Done()
        value := <-distributor
        receiver <- aSlowProcess(value)
    }()
    //-
    wg.Add(1)
    go func() {
        defer wg.Done()
        value := <-distributor
        receiver <- aSlowProcess(value)
    }()

    // we can now write the data for processing....
    distributor <- "#1"
    distributor <- "#2"
    distributor <- "#3"

    /// ... read the output values
    err1 := <-receiver
    err2 := <-receiver
    err3 := <-receiver

    //....and wait for routines completion....
    wg.Wait()

    log.Printf("processInParallel3 spent %v\n", time.Now().Sub(now))

    // finally check for errors
    if err1 != nil {
        return err1
    }
    if err2 != nil {
        return err2
    }
    if err3 != nil {
        return err3
    }

    return nil
}
主程序包
进口(
“日志”
“同步”
“时间”
)
func main(){
//processInSync()
//处理过程最多需要3秒钟,
//它显示所有输出并处理错误。
//processInParallel1()
//处理过程最多需要几微秒,
//它显示一些输出,不处理错误。
//它非常快,但不正确。
//processInParallel2()
//处理时间长达1s,
//正确显示所有输出,
//但它还不能处理返回值。
processInParallel3()
//处理时间长达1s,
//正确显示所有输出,
//并且它能够返回遇到的第一个错误。
//这仅仅是对你能做什么的介绍。
//需要更多的例子来解释通道的细节
//实现未绑定的工作处理。
//我把它作为练习留给读者。
//有关频道的更多信息和说明,
//阅读友好的手册和大量的例子
//我们在网上离开了。
// https://golang.org/doc/effective_go.html#concurrency
// https://gobyexample.com/channels
// https://gobyexample.com/closing-channels
}
func aSlowProcess(名称字符串)错误{
log.Println(“aSlowProcess”,名称)

是的,您可以将
for
循环的核心放在goroutine中(尽管确保在循环中创建
pathOfFile
的本地副本或将其作为参数传递给goroutine函数)。您可能需要使用
sync.WaitGroup
,以便等待它们全部完成(或否–这取决于您的程序的结构)。此外,您不需要将文件读入缓冲区。您可以在
PutObjectInput
中将
file
设置为
Body
的值。不幸的是,在当前代码中,您没有实现任何并发。请做好准备,学习并行模式,尝试一些方法,遇到问题,我们将帮助您。“对于我遍历的每个文件,我都希望派生一个goroutine来上载该文件"所以实现它。您当前的代码块是因为它是单线程的。Goroutines、channel和其他基本概念都包含在。@AndySchweig中,所以我修改了代码以实现channel而不是WaitGroup。但是,我不完全确定这是否是正确的方法。文件仍在上载,代码仍在运行,但是不确定它是否并行工作。@mh cbon刚刚使用Goroutines和Channel实现了这一点。但不确定这是否是正确的方法。如果我能得到您的建议,那就太好了。谢谢您非常详细的回答。我将尝试使用WaitGroups实现这一点。我已经在上面使用Channel的地方修改了我的代码我也这么认为。但我不完全确定这是否是正确的方法。我发现渠道比WaitGroup更容易理解和实现。如果您对如何改进代码有任何建议,请告诉我。但我一定会尝试实现您的解决方案,以便我能更好地理解WaitGroup。我不建议创建一个单独的渠道每个文件的go例程。如果有1000个文件,它将创建1000个go例程,并尝试一次上载所有文件。相反,您应该查看工作池以控制并发上载的数量。@ankit deshpande同意您的看法。但我刚刚演示了如何并发运行它。关于工作池-可能他阅读了其他内容place@Solorad同意.