CUDA:重新分配内存时出现无效的设备指针错误_Cuda

CUDA:重新分配内存时出现无效的设备指针错误

cuda

CUDA:重新分配内存时出现无效的设备指针错误,cuda,Cuda,在下面的代码中，我只是从main连续两次调用函数foo。该函数只进行设备内存分配，然后递增该指针。然后它离开，回到主干道第一次调用foo时，正确分配了内存。但现在，正如我再次调用foo时在输出中看到的，cuda内存分配失败，出现了一个无效设备指针错误我尝试在两个foo调用之间使用cudaThreadSynchronize（），但没有效果。为什么内存分配失败实际上这个错误是由于 matrixd+=3 因为如果我不这样做，错误就会消失。但是为什么，即使我正在使用cudaFree（）请帮助我

在下面的代码中，我只是从main连续两次调用函数foo。该函数只进行设备内存分配，然后递增该指针。然后它离开，回到主干道

第一次调用foo时，正确分配了内存。但现在，正如我再次调用foo时在输出中看到的，cuda内存分配失败，出现了一个无效设备指针错误

我尝试在两个foo调用之间使用cudaThreadSynchronize（），但没有效果。为什么内存分配失败

实际上这个错误是由于

matrixd+=3
因为如果我不这样做，错误就会消失。
但是为什么，即使我正在使用cudaFree（）
请帮助我理解这一点
我的输出在这里

Calling foo for the first time Allocation of matrixd passed: I came back to main safely :-) I am going back to foo again :-) Allocation of matrixd failed, the reason is: invalid device pointer

#include<stdio.h> #include <cstdlib> // malloc(), free() #include <iostream> // cout, stream #include <math.h> #include <ctime> // time(), clock() #include <bitset> bool foo( ); /*************************************** Main method. ****************************************/ int main() { // Perform one warm-up pass and validate std::cout << "Calling foo for the first time"<<std::endl; foo(); std::cout << "I came back to main safely :-) "<<std::endl; std::cout << "I am going back to foo again :-) "<<std::endl; foo( ); getchar(); return 0; }

#include <cuda.h> #include <cuda_runtime_api.h> #include <device_launch_parameters.h> #include <iostream> bool foo( ) { // Error return value cudaError_t status; // Number of bytes in the matrix. int bytes = 9 *sizeof(float); // Pointers to the device arrays float *matrixd=NULL; // Allocate memory on the device to store matrix cudaMalloc((void**) &matrixd, bytes); status = cudaGetLastError(); //To check the error if (status != cudaSuccess) { std::cout << "Allocation of matrixd failed, the reason is: " << cudaGetErrorString(status) << std::endl; cudaFree(matrixd); //Free call for memory return false; } std::cout << "Allocation of matrixd passed: "<<std::endl; ////// Increment address for (int i=0; i<3; i++){ matrixd += 3; } // Free device memory cudaFree(matrixd); return true; }
我的main（）在这里

Calling foo for the first time Allocation of matrixd passed: I came back to main safely :-) I am going back to foo again :-) Allocation of matrixd failed, the reason is: invalid device pointer

#include<stdio.h> #include <cstdlib> // malloc(), free() #include <iostream> // cout, stream #include <math.h> #include <ctime> // time(), clock() #include <bitset> bool foo( ); /*************************************** Main method. ****************************************/ int main() { // Perform one warm-up pass and validate std::cout << "Calling foo for the first time"<<std::endl; foo(); std::cout << "I came back to main safely :-) "<<std::endl; std::cout << "I am going back to foo again :-) "<<std::endl; foo( ); getchar(); return 0; }

#include <cuda.h> #include <cuda_runtime_api.h> #include <device_launch_parameters.h> #include <iostream> bool foo( ) { // Error return value cudaError_t status; // Number of bytes in the matrix. int bytes = 9 *sizeof(float); // Pointers to the device arrays float *matrixd=NULL; // Allocate memory on the device to store matrix cudaMalloc((void**) &matrixd, bytes); status = cudaGetLastError(); //To check the error if (status != cudaSuccess) { std::cout << "Allocation of matrixd failed, the reason is: " << cudaGetErrorString(status) << std::endl; cudaFree(matrixd); //Free call for memory return false; } std::cout << "Allocation of matrixd passed: "<<std::endl; ////// Increment address for (int i=0; i<3; i++){ matrixd += 3; } // Free device memory cudaFree(matrixd); return true; }
第44行是cudaFree（）。为什么它仍然失败

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } // GPU function for direct method Gross Jorden method. bool foo( ) { // Error return value cudaError_t status; // Number of bytes in the matrix. int bytes = 9 *sizeof(float); // Pointers to the device arrays float *matrixd=NULL; // Allocate memory on the device to store each matrix gpuErrchk( cudaMalloc((void**) &matrixd, bytes)); //cudaMemset(outputMatrixd, 0, bytes); std::cout << "Allocation of matrixd passed: "<<std::endl; ////// Incerament address matrixd += 1; std::cout << "Increamented the pointer and going to free cuda memory: "<<std::endl; // Free device memory gpuErrchk( cudaFree(matrixd)); return true; }

#定义gpuerchk（ans）{gpuAssert（（ans），uuuu文件，uuu行uuu）} 内联void gpuAssert（cudaError\u t代码，const char*文件，int行，bool abort=true） { 如果（代码！=cudaSuccess） { fprintf（标准，“GPUassert:%s%s%d\n”，cudaGetErrorString（代码）、文件、行）；如果（中止）退出（代码）； } } //GPU函数为直接法Gross Jorden法。布尔福（） { //错误返回值错误状态； //矩阵中的字节数。 int字节=9*sizeof（浮点）； //指向设备数组的指针 float*matrixd=NULL； //在设备上分配内存以存储每个矩阵 gpuErrchk（cudamaloc（（void**）和matrixd，字节））； //cudaMemset（outputMatrixd，0，字节）； std:：cout真正的问题在于以下代码： for (int i=0; i<3; i++){ matrixd += 3; } // Free device memory cudaFree(matrixd); for（int i=0；i如果您检查cudaFree call``的返回状态，会发生什么情况？@talonmes您是对的，刚刚检查过，我使用了cudagetlastror（），在cudafree下面，是的，它显示，它失败了，但又是为什么？是的。因此，您的问题基本上是由不完整的错误检查引起的。您可以看到如何正确执行此操作。内存分配没有失败。我将在链接中检查您的答案，但您确定取消分配时没有错误（cuadgetlasterror报告了一个错误）？您的错误检查方法非常简洁。请查看我的更新。我认为我的错误是我试图在主机函数中增加设备指针。我猜这是不允许的，cuda free对此不满意。事实上，主机函数中的matrixd++将指向主机中不在设备内存中的一些垃圾。@user3891236：我告诉过您了问题是什么。你不能释放一个没有分配的地址。“递增”指针是完全可以的（尽管在这种情况下完全没有意义）。但要求API释放递增指针是非法的，因为API从未按该指针值分配内存。非常感谢您消除我的疑虑。今天我从您那里学到了很多东西，包括检查CUDA中的错误有多么重要！。