Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/135.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ CUDA Visual Studio错误“;命令(非常长的命令)退出,代码为255“;_C++_Visual Studio_Cuda - Fatal编程技术网

C++ CUDA Visual Studio错误“;命令(非常长的命令)退出,代码为255“;

C++ CUDA Visual Studio错误“;命令(非常长的命令)退出,代码为255“;,c++,visual-studio,cuda,C++,Visual Studio,Cuda,我试图学习cuda并将我当前的项目转换为使用它,但我遇到了以下错误: 错误MSB3721命令“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe”-gencode=arch=compute_52,code=“sm_52,compute_52”-use local env-ccbin“C:\Program Files(x86)\Microsoft Visual Studio\2019\Community\VC\

我试图学习cuda并将我当前的项目转换为使用它,但我遇到了以下错误:

错误MSB3721命令“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe”-gencode=arch=compute_52,code=“sm_52,compute_52”-use local env-ccbin“C:\Program Files(x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64”-x cu-I”C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include“-I”C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include“-G--keep dir x64\Debug-maxrregcount=0--machine 64--compile cudart static-G-DïDebug-DïCONSOLE-Dï-UNICODE-DUNICODE-Xciler”/EHsc/W3/nologo/Od/Fdx64\Debug\vc142.pdb/FS/Zi/RTC1/MDd“-o x64\Debug\cudaMain.cu.obj”C:\Users[我的usr名称]\source\repos\LogicGateMachineLearning\u V2\u Solution\LogicGateMachineLearning\u V2\cudaMain.cu”“已退出,代码为255

我正在使用一个.cuh文件如何声明类它给我一个警告,说“属性不适用于实体”。我需要标记吗?我还得到一个警告,告诉我“警告C26812枚举类型'cudaError'未范围化”。更喜欢“枚举类”而不是“枚举”(枚举3)。”

.cuh文件

#pragma once
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include <string>
#include <vector>
#include <algorithm>
#include <ctime>
#pragma warning(disable : 4996)
#include <cuda_runtime.h>
#include "device_launch_parameters.h"

using namespace std;

static unsigned const int maxCircuitSizeG = 200;
static const int inputSizeG = 16;
static const int outputSizeG = 9;
static const short childParentAmountG = 10;

__host__ __device__ class Gate {
public:
    char type;
    int children[childParentAmountG];
    int parents[childParentAmountG];
    bool output;
    __host__ __device__ Gate();
};
__host__ __device__ class Circuit {
public:
    Gate gates[maxCircuitSizeG];
    bool inputs[inputSizeG];
    bool outputs[outputSizeG];
    double score;
    unsigned int averageCounter; // up to 4,294,967,295
    int size;
    __host__ __device__ Circuit();
};

__host__ __device__ unsigned int randumb(void);
unsigned int randumb2(void);

__host__ __device__ bool IsBoolInParents(Gate gate, Gate circuit[], bool boolToFind);
__host__ __device__ bool XORgateOutput(Gate gate, Gate circuit[]);
__host__ __device__ bool IsIntInArr(int arr[], int arrSize, int num);
bool IsShortInArr(short arr[], short arrSize, short num);
bool IsIntInVector(vector<int> vec, int num);
__host__ __device__ bool PushInt(int arr[], int arrSize, int num);
bool PushShort(short arr[], short arrSize, short num);
__host__ __device__ int CountCircuitSize(Gate circuit[]);
int CountCircuitSize2(Gate circuit[]);
__host__ __device__ void RemoveIntAndShiftArr(int arr[], int arrSize, int indexToRemove);
void RemoveShortAndShiftArr(short arr[], short arrSize, short indexToRemove);
int IntPow(int num, int exponent);
int BinaryToDecimal(bool bits[], int byteSize, bool firstIsMostSignificant);
string IntToString(int num);
void DecimalToBinary(int n, bool byte[], int byteSize);
void ShiftBinary(bool byte[], int byteSize, bool shiftLeft, int shiftAmount);
string BinaryToString(bool byte[], int byteSize);
__host__ __device__ void CopyGate(Gate& to, Gate from);

__host__ __device__ void RandomGateType(Gate circuit[], int circuitIndex);
__host__ __device__ void DestroyGate(Gate circuit[], int indexToRemove, int circuitSize);
__host__ __device__ void CleanCircuit(Gate circuit[], int circuitSize);
__host__ __device__ void AddChild(Gate circuit[], int circuitSize, int index);
__host__ __device__ void AddParent(Gate circuit[], int circuitSize, int index);
__host__ __device__ void CreateGate(Circuit& circuit, int indexToAdd, int circuitSize);
__host__ __device__ void CreateGate2(Circuit& circuit, int indexToAdd, int circuitSize);
__host__ __device__ void SafeFixCircuit(Gate circuit[], int circuitSize);
__host__ __device__ bool GateOutput(Gate gate, Gate circuit[]);
__host__ __device__ void Process(Circuit& circuit);
__host__ __device__ void ProcessFromCharArr(Circuit& circuit, char arr[]);
__host__ __device__ void RandomCircuit(Circuit& circuit, int circuitSize, int startingChildParentAmount);
__host__ __device__ void RemoveChild(Gate circuit[], int circuitSize, int index);
__host__ __device__ void Mutate(Circuit& circuit, int growChance, int shrinkChance, int grow, int shrink, int rate, int intensity);

void CreateAdderCircuit(Gate circuit[]);

string CircuitToString(Circuit circuit);
void SaveCircuit(string path, Circuit circuit, int circuitSize);
void FileToCircuit(string path, Gate circuit[]);

__host__ __device__ void InitRndPop(Circuit population[], int popSize, int startCircuitSize, int startChildParentAmount);
void InitPopFromFile(Circuit population[], int popSize, string path);

vector<string> MakeRndSample(int sampleSize, string path);

__host__ __device__ void Score3(Circuit& circuit, char arr[]);

__host__ __device__ void CopyCircuit(Circuit from, Circuit& to);
__host__ __device__ void CopyCircuitToPopulation(Circuit circuit, Circuit population[], unsigned short populationSize);
void CopyCircuit2(Circuit from, Circuit& to);
void CopyCircuitsToPop(vector<int> circuitsIndexes, Circuit population[], unsigned short populationSize);

__host__ __device__ void ScoreAverageFromArray(char arr[], int arrSize, Circuit& circuit);

__host__ __device__ void RandomBruteForceImproveFromArray(Circuit& circuit, char arr[], unsigned int arrSize, unsigned int maxSearch);

void fileToCharArr(char arr[], int size, string path);
#pragma一次
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#杂注警告(禁用:4996)
#包括
#包括“设备启动参数.h”
使用名称空间std;
静态无符号常量int maxCircuitSizeG=200;
静态常数int inputSizeG=16;
静态常数int outputSizeG=9;
静态常数short childparentamount=10;
__主机设备类门{
公众:
煤焦类型;
int children[childParentAmountG];
int parents[childParentAmountG];
布尔输出;
__主机设备门();
};
__主机设备类电路{
公众:
闸门门[maxCircuitSizeG];
布尔输入[inputSizeG];
布尔输出[outputSizeG];
双倍得分;
unsigned int averageCounter;//最多4294967295
整数大小;
__主机设备电路();
};
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
无符号整数2(无效);
__主机设备bool IsBoolInParents(门、门电路[]、bool boolToFind);
__主机设备输出(门电路、门电路[]);
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
bool-IsShortInArr(短arr[],短arrSize,短num);
布尔向量(向量向量,整数);
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
bool PushShort(短arr[],短arrSize,短num);
__主机设备计数电路大小(门电路[]);
int CountCircuitSize2(门电路[]);
__主机设备无效删除和移位器(int-arr[],int-arrSize,int-indexToRemove);
空移短移(短arr[],短arr大小,短indexToRemove);
int IntPow(int num,int指数);
int BinaryToDecimal(bool位[],int字节大小,bool first最重要);
字符串IntToString(int num);
无效十进制数(整数n,布尔字节[],整数字节大小);
void ShiftBinary(bool byte[],int byteSize,bool shiftLeft,int shiftAmount);
字符串二进制字符串(bool byte[],int byteSize);
__主机设备无效复制门(门和到,门从);
__主机设备无效随机门类型(门电路[],int电路索引);
__主机设备无效破坏门(门电路[],int索引移动,int电路大小);
__主机设备无效清洁电路(门电路[],int电路尺寸);
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
__主机设备无效添加父级(门电路[],int电路大小,int索引);
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
__主机设备无效安全固定电路(门电路[],int电路尺寸);
__主机设备门输出(门、门电路[]);
__主机设备无效过程(电路和电路);
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
__主机设备无效随机电路(电路和电路、int电路大小、int启动子数量);
__主机设备无效删除child(门电路[],int电路大小,int索引);
__主机设备无效变异(电路和电路,整数增长机会,整数收缩机会,整数增长,整数收缩,整数速率,整数强度);
void CreateAdderCircuit(门电路[]);
串电路串(电路);
无效保存电路(字符串路径、电路、int电路大小);
无效文件到电路(字符串路径,门电路[]);
__主机设备无效初始化RNDPOP(电路填充[],int popSize,int startCircuitSize,int startChildParentAmount);
void InitPopFromFile(电路填充[],int-popSize,字符串路径);
向量MakeRndSample(int sampleSize,字符串路径);
__主机设备无效记分3(电路和电路,字符arr[]);
__主机设备无效复制电路(电路从、电路和到);
__主机设备无效复制电路填充(电路,电路填充[],无符号短路填充大小);
无效复制电路2(电路从、电路和到);
void CopyCircuitsToPop(向量电路索引,电路填充[],无符号短填充大小);
__主机\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
__主机\uuuuuuuuuuuuuuuuuuuuuuuu设备\uuuuuuuuuuuuuuuuuuuuuuuuuuuuu设备\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu;
void filetocharar(char arr[],int size,字符串路径);
带有这些定义的.cu太大,无法包含在内,但它们都没有自己不需要的主机设备,是吗

main.cu文件

#include "LogicSimCuda.cuh"
#include <stdio.h>
cudaError_t improveCircuitPopWithCuda(Circuit* circuit, char arr[], int arrSize, unsigned int size);

__global__ void addKernel(Circuit *circuit, char arr[], int arrSize, const int maxSearch)
{
    int i = threadIdx.x;
    RandomBruteForceImproveFromArray(circuit[i], arr, arrSize, maxSearch);
}

int main()
{
    const int populationSize = 1024;
    Circuit *population = new Circuit[populationSize];
    InitPopFromFile(population, populationSize, "C:/Users/voidm/Documents/LogicSimProjectGIT/LogicSim/Circuits/day2/Sun_Dec_20_12_54_59_2020.txt");
    unsigned const int fileSize = (inputSizeG + outputSizeG + 1) * 65536;
    char* trainingArr = new char[fileSize];
    fileToCharArr(trainingArr, fileSize,"C:/Users/voidm/Documents/LogicSimProjectGIT/LogicSim/src/Eight-Bit-Adder-Data.txt" );

    cudaError_t cudaStatus = improveCircuitPopWithCuda(population, trainingArr, fileSize, populationSize);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "improveCircuitPopWithCuda failed!");
        return 1;
    }

    std::cout << population[0].score;


    return 0;

}

cudaError_t improveCircuitPopWithCuda(Circuit* circuitPop, char arr[], int arrSize, unsigned int size)
{
    Circuit *dev_circuit;
    cudaError_t cudaStatus;

    // Choose which GPU to run on, change this on a multi-GPU system.
    cudaStatus = cudaSetDevice(0);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
        goto Error;
    }

    // Allocate GPU buffers .
    cudaStatus = cudaMalloc((void**)&dev_circuit, size * sizeof(Circuit));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    // Copy from host memory to GPU buffers.
    cudaStatus = cudaMemcpy(dev_circuit, circuitPop, size * sizeof(Circuit), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

    // Launch a kernel on the GPU with one thread for each element.
    addKernel <<<1, size >>> (dev_circuit, arr, arrSize, 1000000);

    // Check for any errors launching the kernel
    cudaStatus = cudaGetLastError();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        goto Error;
    }

    // cudaDeviceSynchronize waits for the kernel to finish, and returns
    // any errors encountered during the launch.
    cudaStatus = cudaDeviceSynchronize();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
        goto Error;
    }

    // Copy output vector from GPU buffer to host memory.
    cudaStatus = cudaMemcpy(circuitPop, dev_circuit, size * sizeof(Circuit), cudaMemcpyDeviceToHost);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

Error:
    cudaFree(dev_circuit);

    return cudaStatus;
}
    ```

here is the output

>1>------ Build started: Project: LogicGateMachineLearning_V2, Configuration: Debug x64 ------
1>Compiling CUDA source file cudaMain.cu...
1>Compiling CUDA source file LogicSimCuda.cu...
1>
1>C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64" -x cu   -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include"  -G   --keep-dir x64\Debug  -maxrregcount=0  --machine 64 --compile -cudart static  -g  -D_DEBUG -D_CONSOLE -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc142.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\LogicSimCuda.cu.obj "C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2\LogicSimCuda.cu"
1>
1>C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64" -x cu   -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include"  -G   --keep-dir x64\Debug  -maxrregcount=0  --machine 64 --compile -cudart static  -g  -D_DEBUG -D_CONSOLE -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc142.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\cudaMain.cu.obj "C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2\cudaMain.cu"
1>ptxas fatal   : Unresolved extern function '_Z32RandomBruteForceImproveFromArrayR7CircuitPcjj'
1>cudaMain.cu
1>C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 11.2.targets(785,9): error MSB3721: The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin\nvcc.exe" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\HostX86\x64" -x cu   -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include"  -G   --keep-dir x64\Debug  -maxrregcount=0  --machine 64 --compile -cudart static  -g  -D_DEBUG -D_CONSOLE -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc142.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\cudaMain.cu.obj "C:\Users\voidm\source\repos\LogicGateMachineLearning_V2_Solution\LogicGateMachineLearning_V2\cudaMain.cu"" exited with code 255.
1>Done building project "LogicGateMachineLearning_V2.vcxproj" -- FAILED.
1>LogicSimCuda.cu
========== Build: 0 succeeded, 1 failed, 0 up-to-date, 0 skipped ==========
#包括“LogicSimCuda.cuh”
#包括
使用CUDA(电路*电路,字符arr[],整数arrSize,无符号整数size)改善电路性能;
__全局无效addKernel(Circuit*Circuit,char arr[],int arrSize,const int maxSearch)
{
int i=threadIdx.x;
R
__host__ __device__ class Gate {
public:
    char type;
    int children[childParentAmountG];
    int parents[childParentAmountG];
    bool output;
    __host__ __device__ Gate();
};
class Gate {
    public:
        char type;
        int children[childParentAmountG];
        int parents[childParentAmountG];
        bool output;
        __host__ __device__ Gate();
    };