Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/apache/9.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 分段错误:地址未映射/地址失败:(nil)_C++_Openmpi_Avx - Fatal编程技术网

C++ 分段错误:地址未映射/地址失败:(nil)

C++ 分段错误:地址未映射/地址失败:(nil),c++,openmpi,avx,C++,Openmpi,Avx,我使用Tensorflow的C API在并行模拟中进行推理。由于需要AVX支持,我从源代码编译了Tensorflow。我链接了它,并使用wmake编译了所有内容 现在,如果我开始一个正常的(非并行的)模拟运行,一切正常。但如果我将其并行化,则在启动模拟运行后会立即出现此错误: [node134:18796] *** Process received signal *** [node134:18796] Signal: Segmentation fault (11) [node134:18796]

我使用Tensorflow的C API在并行模拟中进行推理。由于需要AVX支持,我从源代码编译了Tensorflow。我链接了它,并使用wmake编译了所有内容

现在,如果我开始一个正常的(非并行的)模拟运行,一切正常。但如果我将其并行化,则在启动模拟运行后会立即出现此错误:

[node134:18796] *** Process received signal ***
[node134:18796] Signal: Segmentation fault (11)
[node134:18796] Signal code: Address not mapped (1)
[node134:18796] Failing at address: (nil)
[node134:18796] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef20)[0x7fec1c96ff20]
[node134:18796] [ 1] /home/elias/OpenFOAM/elias-4.1/platforms/linux64GccDPInt32Opt/lib/libtensorflow_framework.so(hwloc_bitmap_and+0x14)[0x7fec01c21534]
[node134:18796] [ 2] /usr/lib/x86_64-linux-gnu/libopen-pal.so.20(opal_hwloc_base_filter_cpus+0x380)[0x7febe59d6b80]
[node134:18796] [ 3] /usr/lib/x86_64-linux-gnu/openmpi/lib/openmpi/mca_ess_pmi.so(+0x2b4e)[0x7febe4902b4e]
[node134:18796] [ 4] /usr/lib/x86_64-linux-gnu/libopen-rte.so.20(orte_init+0x22e)[0x7febe5c2a1de]
[node134:18796] [ 5] /usr/lib/x86_64-linux-gnu/libmpi.so.20(ompi_mpi_init+0x30e)[0x7febffdbc27e]
[node134:18796] [ 6] /usr/lib/x86_64-linux-gnu/libmpi.so.20(MPI_Init+0x6b)[0x7febffddd2ab]
[node134:18796] [ 7] /opt/OpenFOAM/OpenFOAM-4.1/platforms/linux64GccDPInt32Opt/lib/openmpi-system/libPstream.so(_ZN4Foam8UPstream4initERiRPPc+0x1f)[0x7fec1c72843f]
[node134:18796] [ 8] /opt/OpenFOAM/OpenFOAM-4.1/platforms/linux64GccDPInt32Opt/lib/libOpenFOAM.so(_ZN4Foam7argListC1ERiRPPcbbb+0x719)[0x7fec1db36ed9]
[node134:18796] [ 9] tabulatedCombustionFoam(+0x279b8)[0x55fe6eb489b8]
[node134:18796] [10] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7)[0x7fec1c952b97]
[node134:18796] [11] tabulatedCombustionFoam(+0x30a0a)[0x55fe6eb51a0a]
[node134:18796] *** End of error message ***
我试图自己修复它,但到目前为止,我没有看到任何进展。有人知道这个问题的原因吗

提前谢谢

编辑:我没有假设代码可能是错误的,因为它在以下条件下工作: -没有并行化 -与标准C API版本并行,您可以下载

以下是“主要”的相关部分:

if(推断模式==0)
{
自动启动=标准::时钟::高分辨率时钟::现在();
const char*frozenGraphName=“/home/elias/Lr75-57_FPVANN_premix/data/FPV_ANN_制表_Standard_500.pb”;
const std::string iON=string(输入层名称);
const char*inputOperationName=iON.c_str();
const std::string oON=string(输出层名称)+“/biaadd”;
常量字符*outputOperationName=oON.c_str();
int no_of_inputs=in_mean.size();
int no_of_outputs=out_mean.size();
int cellsAndPatches=(input_f_zeta_PVNorm.size())/无输入;
std::向量输入_维度={cellsAndPatches,没有_个输入};
std::vector output_dimensions={cellsAndPatches,no_of_output};
推理*inf=新推理();
bool success=inf->doInference(frozenGraphName、inputOperationName、outputOperationName、no_of_inputs、no_of_output、input_dimensions、output_dimensions、cellsAndPatches、input_f_zeta_PVNorm、output_real、limit_cores);
删除inf;
自动t_end_0=std::chrono::高分辨率时钟::now();
自动总计=std::chrono::duration(t_end_0-t_start_0).count();
std::cout data_deallocator=DeallocateBuffer;
返回buf;
}
静态void DeallocateTensor(void*数据,std::size\u t,void*)//vorher void*arg
{
标准::免费(数据);
}
类推理
{
公众:
booldoinference(常量字符*,常量字符*,常量字符*,int,int,std::vector,std::vector,int,std::vector&,std::vector&,std::vector&,int);
};
#endif//推论
这是.C文件:

#include "inferenceC.H"

bool Inference::doInference(const char* fgn, const char* iname, const char* oname, int nIn, int nOut, std::vector<int64_t> dimIn,std::vector<int64_t> dimOut, int CP, std::vector<float> &inVals, std::vector<float> &outVals, int maxCores)
{   
    TF_Buffer* graph_def = ReadBufferFromFile(fgn);

    if (graph_def == nullptr)
    {
        std::cout << "Can't read buffer from file" << std::endl;
    }

    TF_Graph* graph = TF_NewGraph();
    TF_Status* status = TF_NewStatus();
    TF_ImportGraphDefOptions* graph_opts = TF_NewImportGraphDefOptions();

    TF_GraphImportGraphDef(graph, graph_def, graph_opts, status);

    if(TF_GetCode(status)!=TF_OK)   
    {
        std::cout << "ERROR: Unable to import graph " << TF_Message(status) << std::endl;
    }

    //int num_bytes_in = CP*nIn*sizeof(float);
    //int num_bytes_out = CP*nOut*sizeof(float);

    TF_DeleteImportGraphDefOptions(graph_opts);


    TF_DeleteBuffer(graph_def);


    assert((inVals.size()%nIn)==0);
    std::cout << "Effective batch size: " << (inVals.size()/nIn) << std::endl;

    TF_Output input = {TF_GraphOperationByName(graph, iname), 0};
    TF_Output output = {TF_GraphOperationByName(graph, oname), 0};

    const std::vector<std::int64_t> dims = {CP,nIn};
    std::size_t data_size = sizeof(float);
    for (auto i : dims)
    {
        data_size *= i;
    }

    auto data = static_cast<float*>(std::malloc(data_size));
    std::copy(inVals.begin(), inVals.end(), data);
    TF_Tensor* input_value = TF_NewTensor(TF_FLOAT,dims.data(), static_cast<int>(dims.size()),data, data_size,DeallocateTensor, nullptr);

    const std::vector<int64_t> outdims = {CP,nOut};
    std::size_t outdata_size = sizeof(float);
    for (auto i : outdims)
    {
        outdata_size *= i;
    }

    TF_Tensor* output_value = nullptr;

    std::cout << "Running session..." << std::endl;
    TF_SessionOptions* sess_opts = TF_NewSessionOptions();

    if(maxCores!=0)
    {        
        uint8_t intra_op_parallelism_threads = maxCores; // for operations that can be parallelized internally, such as matrix multiplication 
        uint8_t inter_op_parallelism_threads = maxCores; // for operationss that are independent in your TensorFlow graph because there is no directed path between them in the dataflow graph
        uint8_t config[]={0x10,intra_op_parallelism_threads,0x28,inter_op_parallelism_threads};
        TF_SetConfig(sess_opts,config,sizeof(config),status);
        if (TF_GetCode(status) != TF_OK)
        {
            printf("ERROR: %s\n", TF_Message(status));
        }
    }


    TF_Session* session = TF_NewSession(graph, sess_opts, status);
    assert(TF_GetCode(status)==TF_OK);
    auto t_start = std::chrono::high_resolution_clock::now();

    TF_SessionRun(session, nullptr, &input, &input_value, 1, &output, &output_value, 1, nullptr, 0, nullptr, status);

    auto t_end = std::chrono::high_resolution_clock::now();
    auto total = std::chrono::duration<float, std::milli>(t_end - t_start).count();
    std::cout << "time required for TF_SessionRun: " << total << std::endl;

    float* out_vals = static_cast<float*>(TF_TensorData(output_value));

    std::vector<float> results(nOut*CP,0);
    for(int i=0;i<CP;i++)
    {
        for(int j=0;j<nOut;j++)
        {
            results.at(i*nOut+j) = *out_vals;
            out_vals++;
        }
    }

    std::cout << "Successfully ran session!" << std::endl;

    outVals = results;

    TF_CloseSession(session,status);
    TF_DeleteSession(session,status);
    TF_DeleteSessionOptions(sess_opts);
    TF_DeleteStatus(status);
    TF_DeleteGraph(graph);

    TF_DeleteTensor(output_value);
    TF_DeleteTensor(input_value);

    return 0;
}
#包括“推断c.H”
布尔推理::doInference(常量字符*fgn,常量字符*iname,常量字符*oname,int-nIn,int-nOut,std::向量dimIn,std::向量dimOut,int-CP,std::向量和无效值,std::向量和无效值,int-maxCores)
{   
TF_Buffer*graph_def=ReadBufferFromFile(fgn);
if(图_def==nullptr)
{

std::cout如下链接所示,没有代码错误,而是在当前主分支上解决了Tensorflow问题:


将您的代码简化为一个新的版本(即,删除所有不需要复制行为的内容)--理想情况下,只剩下一个文件,其中包含
main
和所有必要的
#include
s。我会尝试。你认为这是代码错误,而不是其他问题吗?Bc。相同的代码与旧的Tensorflow版本一起工作,没有AVX问题,而且应用程序在到达我自己的代码之前崩溃了……你没有提供了太多的代码,无法一眼看到导致崩溃的原因。问题很可能是由于不兼容的编译选项导致的ABI问题。
#ifndef INFERENCEC_H
#define INFERENCEC_H

#include "c_api.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include <assert.h>
#include <vector>
#include <algorithm>
#include <iterator>
#include <cstdlib>
#include <iostream>
#include <chrono>
#include <ctime>
#include <memory>
#include <unistd.h>
#include <thread>

static void DeallocateBuffer(void* data, size_t)
{
    std::free(data);
}

static TF_Buffer* ReadBufferFromFile(const char* file)
{
    const auto f = std::fopen(file, "rb");
    if (f == nullptr)
    {
        return nullptr;
    }

    std::fseek(f, 0, SEEK_END);
    const auto fsize = ftell(f);
    std::fseek(f, 0, SEEK_SET);

    if (fsize < 1)
    {
        std::fclose(f);
        return nullptr;
    }

    const auto data = std::malloc(fsize);
    std::fread(data, fsize, 1, f);
    std::fclose(f);

    TF_Buffer* buf = TF_NewBuffer();
    buf->data = data;
    buf->length = fsize;
    buf->data_deallocator = DeallocateBuffer;

    return buf;
}

static void DeallocateTensor(void* data, std::size_t, void*) // vorher void* arg
{
    std::free(data);
}


class Inference
{
public:
    bool doInference(const char*,const char*,const char*,int,int,std::vector<int64_t>,std::vector<int64_t>,int,std::vector<float>&,std::vector<float>&,int);
};

#endif // INFERENCEC_H
#include "inferenceC.H"

bool Inference::doInference(const char* fgn, const char* iname, const char* oname, int nIn, int nOut, std::vector<int64_t> dimIn,std::vector<int64_t> dimOut, int CP, std::vector<float> &inVals, std::vector<float> &outVals, int maxCores)
{   
    TF_Buffer* graph_def = ReadBufferFromFile(fgn);

    if (graph_def == nullptr)
    {
        std::cout << "Can't read buffer from file" << std::endl;
    }

    TF_Graph* graph = TF_NewGraph();
    TF_Status* status = TF_NewStatus();
    TF_ImportGraphDefOptions* graph_opts = TF_NewImportGraphDefOptions();

    TF_GraphImportGraphDef(graph, graph_def, graph_opts, status);

    if(TF_GetCode(status)!=TF_OK)   
    {
        std::cout << "ERROR: Unable to import graph " << TF_Message(status) << std::endl;
    }

    //int num_bytes_in = CP*nIn*sizeof(float);
    //int num_bytes_out = CP*nOut*sizeof(float);

    TF_DeleteImportGraphDefOptions(graph_opts);


    TF_DeleteBuffer(graph_def);


    assert((inVals.size()%nIn)==0);
    std::cout << "Effective batch size: " << (inVals.size()/nIn) << std::endl;

    TF_Output input = {TF_GraphOperationByName(graph, iname), 0};
    TF_Output output = {TF_GraphOperationByName(graph, oname), 0};

    const std::vector<std::int64_t> dims = {CP,nIn};
    std::size_t data_size = sizeof(float);
    for (auto i : dims)
    {
        data_size *= i;
    }

    auto data = static_cast<float*>(std::malloc(data_size));
    std::copy(inVals.begin(), inVals.end(), data);
    TF_Tensor* input_value = TF_NewTensor(TF_FLOAT,dims.data(), static_cast<int>(dims.size()),data, data_size,DeallocateTensor, nullptr);

    const std::vector<int64_t> outdims = {CP,nOut};
    std::size_t outdata_size = sizeof(float);
    for (auto i : outdims)
    {
        outdata_size *= i;
    }

    TF_Tensor* output_value = nullptr;

    std::cout << "Running session..." << std::endl;
    TF_SessionOptions* sess_opts = TF_NewSessionOptions();

    if(maxCores!=0)
    {        
        uint8_t intra_op_parallelism_threads = maxCores; // for operations that can be parallelized internally, such as matrix multiplication 
        uint8_t inter_op_parallelism_threads = maxCores; // for operationss that are independent in your TensorFlow graph because there is no directed path between them in the dataflow graph
        uint8_t config[]={0x10,intra_op_parallelism_threads,0x28,inter_op_parallelism_threads};
        TF_SetConfig(sess_opts,config,sizeof(config),status);
        if (TF_GetCode(status) != TF_OK)
        {
            printf("ERROR: %s\n", TF_Message(status));
        }
    }


    TF_Session* session = TF_NewSession(graph, sess_opts, status);
    assert(TF_GetCode(status)==TF_OK);
    auto t_start = std::chrono::high_resolution_clock::now();

    TF_SessionRun(session, nullptr, &input, &input_value, 1, &output, &output_value, 1, nullptr, 0, nullptr, status);

    auto t_end = std::chrono::high_resolution_clock::now();
    auto total = std::chrono::duration<float, std::milli>(t_end - t_start).count();
    std::cout << "time required for TF_SessionRun: " << total << std::endl;

    float* out_vals = static_cast<float*>(TF_TensorData(output_value));

    std::vector<float> results(nOut*CP,0);
    for(int i=0;i<CP;i++)
    {
        for(int j=0;j<nOut;j++)
        {
            results.at(i*nOut+j) = *out_vals;
            out_vals++;
        }
    }

    std::cout << "Successfully ran session!" << std::endl;

    outVals = results;

    TF_CloseSession(session,status);
    TF_DeleteSession(session,status);
    TF_DeleteSessionOptions(sess_opts);
    TF_DeleteStatus(status);
    TF_DeleteGraph(graph);

    TF_DeleteTensor(output_value);
    TF_DeleteTensor(input_value);

    return 0;
}