Windows 为什么CUDA内核没有在VS 2013中推出CUDA 9.0_Windows_Visual Studio_Parallel Processing_Cuda

Windows 为什么CUDA内核没有在VS 2013中推出CUDA 9.0

windows visual-studio parallel-processing cuda

Windows 为什么CUDA内核没有在VS 2013中推出CUDA 9.0,windows,visual-studio,parallel-processing,cuda,Windows,Visual Studio,Parallel Processing,Cuda,我已经在Windows（GeForce GT 720M）中编写了一个基于CUDA的并行程序。我已经安装了CUDA 9.0工具包和Visual Studio 2013。一切正常，但当我编译代码并运行它时，输出是错误的该计划是： #include <stdio.h> #include "cuda_runtime.h" #include "device_launch_parameters.h" __global__ void square(float * d_out, float *

我已经在Windows（GeForce GT 720M）中编写了一个基于CUDA的并行程序。我已经安装了CUDA 9.0工具包和Visual Studio 2013。一切正常，但当我编译代码并运行它时，输出是错误的

该计划是：

#include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

__global__ void square(float * d_out, float * d_in)
{
    int idx = threadIdx.x;
    float f = d_in[idx];
    d_out[idx] = 50;
}

int main(int argc, char ** argv)
{
    const int ARRAY_SIZE = 64;
    const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float);

    // generate the input array on the host
    float h_in[ARRAY_SIZE];
    for (int i = 0; i < ARRAY_SIZE; i++)
    {
        h_in[i] = float(i);
    }
    float h_out[ARRAY_SIZE];

    // declare GPU memory pointers
    float * d_in;
    float * d_out;

    // allocate GPU memory
    cudaMalloc((void **) &d_in, ARRAY_BYTES);
    cudaMalloc((void **) &d_out, ARRAY_BYTES);

    // transfer the array to the GPU
   cudaMemcpy(d_in, h_in, ARRAY_BYTES, cudaMemcpyHostToDevice);

    // launch the Kernel
    square << <1, ARRAY_SIZE >> >(d_out, d_in);

    // copy back the result array to the GPU
    cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);

    // print out the resulting array
    for (int i = 0; i < ARRAY_SIZE; i++)
    {
        printf("%f", h_out[i]);
        printf(((i % 4) != 3) ? "\t" : "\n");
    }

    // free GPU memory allocation
    cudaFree(d_in);
    cudaFree(d_out);

    getchar();
    return 0;
}

#包括
#包括“cuda_runtime.h”
#包括“设备启动参数.h”
__全局无效正方形（浮点*d\u输出，浮点*d\u输入）
{
int idx=threadIdx.x；
浮动f=d_in[idx]；
d_out[idx]=50；
}
int main（int argc，字符**argv）
{
常量int数组_SIZE=64；
const int ARRAY_BYTES=数组大小*sizeof（float）；
//在主机上生成输入数组
在[数组大小]中浮动h_；
for（int i=0；i（d_out，d_in）；
//将结果数组复制回GPU
cudaMemcpy（h_out、d_out、数组字节、cudaMemcpyDeviceToHost）；
//打印出结果数组
for（int i=0；i


当我运行它时，输出是：

另外，我用nvcc square.cu
编译了它，但输出是相同的。我在VS中有内核启动语法错误，但我认为它与输出无关（但图像与另一个程序相关）：
问题出在CUDA工具包版本上。对于GeForce GT 720M，计算能力为2.1，可供CUDA 8.0
使用。以下是CUDA工具包版本及其计算能力表
从添加到代码开始。然后重新编译/运行并报告所指示的任何错误。@RobertCrovella我应该使用哪种答案？@RobertCrovella CUDA错误：没有内核映像可在设备上执行，因此没有为将在GPUThe GeForce GT 720M上运行的架构编译。CUDA 9放弃了对费米设备的支持。CUDA 8是最新的CUDA工具包，仍然支持Fermi GPU。真不敢相信在2020年安装时没有检查您的硬件以警告您它不支持您的硬件。这是香蕉。我花了几个小时的时间拼命工作，因为我可以将数据从主机复制到设备，反之亦然，但无法调用全局函数。