Memory cudaMemGetInfo在GTX 690的两个设备上返回相同数量的可用内存

Memory cudaMemGetInfo在GTX 690的两个设备上返回相同数量的可用内存,memory,cuda,multi-gpu,Memory,Cuda,Multi Gpu,我在跟踪内存使用情况时遇到了Geforce GTX 690的问题。一个简单的测试程序: BOOST_AUTO_TEST_CASE(cudaMemoryTest) { size_t mem_tot_0 = 0; size_t mem_free_0 = 0; size_t mem_tot_1 = 0; size_t mem_free_1 = 0; unsigned int mem_size = 100*1000000; float* h_P = new float[mem_size]; for

我在跟踪内存使用情况时遇到了Geforce GTX 690的问题。一个简单的测试程序:

BOOST_AUTO_TEST_CASE(cudaMemoryTest) {

size_t mem_tot_0 = 0;
size_t mem_free_0 = 0;
size_t mem_tot_1 = 0;
size_t mem_free_1 = 0;

unsigned int mem_size = 100*1000000;

float* h_P = new float[mem_size];
for(size_t i = 0; i < mem_size; i++) {
    h_P[i] = 0.f;
}

cudaSetDevice(0);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory before copy dev 0: "<<mem_free_0<<std::endl;

cudaSetDevice(1);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_1, &mem_tot_1);
std::cout<<"Free memory before copy dev 1: "<<mem_free_1<<std::endl;

cudaSetDevice(0);
float* P;
cudaMalloc((void**)&P, mem_size*sizeof(float));
cudaMemcpy((void*)P, h_P,  mem_size*sizeof(float), cudaMemcpyHostToDevice);

cudaSetDevice(0);
cudaMemGetInfo(&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after copy  dev 0: "<<mem_free_0<<std::endl;

cudaSetDevice(1);
cudaMemGetInfo(&mem_free_1, &mem_tot_1);
std::cout<<"Free memory after copy dev 1: "<<mem_free_1<<std::endl;

BOOST_CHECK(mem_free_0 != mem_free_1);

cudaError_t err;
err = cudaGetLastError();
if(err!=cudaSuccess)
    std::cout<<"an error occurred"<<std::endl;

cudaSetDevice(0);
destroyMem(P);
delete [] h_P;
}
问题在于,分配后,设备1上的可用内存量与设备0上的完全相同,但情况并非如此,因此问题必须出现在cudaMemGetInfo和/或cudaSetDevice中。任何人都会遇到同样的问题,或者测试中是否有其他根本错误,有人可以指出

在Windows 7、Visual studio 2010、Cuda SDK 5.0上运行代码,使用代码生成进行编译:compute_30、sm_30

2013年4月22日编辑

我继续试验这个问题,从cudaGetDevice调用的结果可以看出,cudaSetDevice运行良好。在内存分配测试之后,我添加了设备0的重置,似乎cudaMemGetInfo返回的可用内存大小对于这两个设备也是相同的。我在自己的代码中检查了cuda_error_t的所有返回值,所有函数调用都返回cudaSuccess。有没有人在GTX 690上遇到类似的问题

最新测试代码:

BOOST_AUTO_TEST_CASE(cudaMemoryTest) {
size_t mem_tot_0 = 0;
size_t mem_free_0 = 0;
size_t mem_tot_1 = 0;
size_t mem_free_1 = 0;

int device_num = 0;

unsigned int mem_size = 100*1000000;

float* h_P = new float[mem_size];
for(size_t i = 0; i < mem_size; i++) {
    h_P[i] = 0.f;
}

cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory before copy dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory before copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(0);
cudaGetDevice(&device_num);
float* P;
cudaMalloc((void**)&P, mem_size*sizeof(float));
cudaMemcpy((void*)P, h_P,  mem_size*sizeof(float), cudaMemcpyHostToDevice);
cudaMemGetInfo(&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after copy  dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo(&mem_free_1, &mem_tot_1);
std::cout<<"Free memory after copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

BOOST_CHECK(mem_free_0 != mem_free_1);

cudaError_t err;
err = cudaGetLastError();
if(err!=cudaSuccess)
    std::cout<<"an error occurred"<<std::endl;

// Reset only device 0 and check both
cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after second reset of device 0, dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory after second device reset of device 0, dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

delete [] h_P;
    }

通过更改WDDM驱动程序设置解决了此问题,如下所示:

从位于的NVIDIA控制面板切换“禁用多GPU模式” “3D设置”->“配置多GPU、环绕声、PhysX”


[此答案作为社区wiki条目从评论中添加,以将问题从CUDA标签的未答复队列中删除]

这通过更改WDDM驱动程序设置解决,如下所示:

从位于的NVIDIA控制面板切换“禁用多GPU模式” “3D设置”->“配置多GPU、环绕声、PhysX”


[此答案作为社区wiki条目从评论中添加,用于将问题从CUDA标记的未回答队列中删除]

返回API级错误代码。也许你应该对它进行错误检查。我在linux的K10机器上运行了你的代码版本(在Windows中最接近你的GTX690),没有遇到任何问题。复制dev 1后的大小与复制dev 1前的大小相同。感谢您的测试运行!所有函数调用返回cudaSuccess。已解决:从NVIDIA控制面板的“3D设置”->“配置多GPU、环绕声、PhysX”处切换“禁用多GPU模式”返回API级错误代码。也许你应该对它进行错误检查。我在linux的K10机器上运行了你的代码版本(在Windows中最接近你的GTX690),没有遇到任何问题。复制dev 1后的大小与复制dev 1前的大小相同。感谢您的测试运行!所有函数调用返回cudaSuccess。解决方法:从NVIDIA控制面板的“3D设置”->“配置多GPU、环绕声、PhysX”中切换“禁用多GPU模式”
BOOST_AUTO_TEST_CASE(cudaMemoryTest) {
size_t mem_tot_0 = 0;
size_t mem_free_0 = 0;
size_t mem_tot_1 = 0;
size_t mem_free_1 = 0;

int device_num = 0;

unsigned int mem_size = 100*1000000;

float* h_P = new float[mem_size];
for(size_t i = 0; i < mem_size; i++) {
    h_P[i] = 0.f;
}

cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory before copy dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory before copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(0);
cudaGetDevice(&device_num);
float* P;
cudaMalloc((void**)&P, mem_size*sizeof(float));
cudaMemcpy((void*)P, h_P,  mem_size*sizeof(float), cudaMemcpyHostToDevice);
cudaMemGetInfo(&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after copy  dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo(&mem_free_1, &mem_tot_1);
std::cout<<"Free memory after copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

BOOST_CHECK(mem_free_0 != mem_free_1);

cudaError_t err;
err = cudaGetLastError();
if(err!=cudaSuccess)
    std::cout<<"an error occurred"<<std::endl;

// Reset only device 0 and check both
cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after second reset of device 0, dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory after second device reset of device 0, dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

delete [] h_P;
    }
1>  Free memory before copy dev 0: 1794379776 Device: 0
1>  Free memory before copy dev 1: 1751728128 Device: 1
1>  Free memory after copy  dev 0: 1351696384 Device: 0
1>  Free memory after copy dev 1: 1351696384 Device: 1
1>  CudaUtilsTest.cpp(353): error in "cudaMemoryTest": check mem_free_0 != mem_free_1 failed
1>  Free memory after second reset of device 0, dev 0: 1751728128 Device: 0
1>  Free memory after second device reset of device 0, dev 1: 1751728128 Device: 1