CUDA Makefile包含错误
我正在尝试使用CUDA和C编写一个基本的矩阵乘法程序。代码本身现在并没有做任何事情,但至少应该编译。在对该问题进行了一些研究之后,我确定该问题是没有包含CUDA头文件,这表明我的Makefile存在问题。我对CUDA(以及C)非常缺乏经验,因此非常感谢您的帮助 命令上的输出:make matrixMult1CUDA Makefile包含错误,cuda,makefile,include,Cuda,Makefile,Include,我正在尝试使用CUDA和C编写一个基本的矩阵乘法程序。代码本身现在并没有做任何事情,但至少应该编译。在对该问题进行了一些研究之后,我确定该问题是没有包含CUDA头文件,这表明我的Makefile存在问题。我对CUDA(以及C)非常缺乏经验,因此非常感谢您的帮助 命令上的输出:make matrixMult1 c99 -I. -I/usr/local/cuda/include -c matrixMult1.c -o matrixMult1.o matrixMult1.c: In functi
c99 -I. -I/usr/local/cuda/include -c matrixMult1.c -o matrixMult1.o
matrixMult1.c: In function 'main':
matrixMult1.c:77: warning: implicit declaration of function 'cudaMalloc'
matrixMult1.c:82: warning: implicit declaration of function 'cudaMemcpy'
matrixMult1.c:83: error: 'cudaMemcpyHostToDevice' undeclared (first use in this
function)
matrixMult1.c:83: error: (Each undeclared identifier is reported only once
matrixMult1.c:83: error: for each function it appears in.)
matrixMult1.c:106: warning: implicit declaration of function 'cudaFree'
make: *** [matrixMult1.o] Error 1
生成文件:
GCC = c99
CUDA_INSTALL_PATH := /usr/local/cuda
INCLUDES := -I. -I$(CUDA_INSTALL_PATH)/include
CUDA_LIBS := -L$(CUDA_INSTALL_PATH)/lib -lcudart
matrixMult1.o: matrixMult1.c
$(GCC) $(INCLUDES) -c matrixMult1.c -o $@
matrixMult1: matrixMult1.o
$(GCC) -o $@ matrixMult1.o $(CUDA_LIBS)
C程序:
//********************************************************************
// matrixMult1.c
//
// A basic matrix multiplication program.
//********************************************************************
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "cuda.h"
#define WA 3
#define HA 3
#define WB 3
#define HB WA
#define WC WB
#define HC HA
void initMatrix(float * matrix, int numIndices);
//*************
// Main Program
//*************
int main(int argc, char** argv) {
/* Set random seed */
srand(2013);
/* Compute memory sizes for matrices A, B, and C */
unsigned int sizeA = WA * HA;
unsigned int sizeB = WB * HB;
unsigned int sizeC = WC * HC;
unsigned int memoryA = sizeof(float) * sizeA;
unsigned int memoryB = sizeof(float) * sizeB;
unsigned int memoryC = sizeof(float) * sizeC;
/* Allocate memory for matrices A, B, and C */
float * matrixA = (float *) malloc(memoryA);
float * matrixB = (float *) malloc(memoryB);
float * matrixC = (float *) malloc(memoryC);
/* Initialize matrices A and B */
initMatrix(matrixA, sizeA);
initMatrix(matrixB, sizeB);
/* Print matrix A */
printf("\nMatrix A:\n");
for (int i = 0; i < sizeA; i++) {
printf("%f ", matrixA[i]);
if (((i + 1) % WA) == 0) {
printf("\n");
} else {
printf(" | ");
}
}
/* Print matrix B */
printf("\nMatrix B:\n");
for (int i = 0; i < sizeB; i++) {
printf("%f ", matrixB[i]);
if (((i + 1) % WA) == 0) {
printf("\n");
} else {
printf(" | ");
}
}
/* Allocate device memory */
float* deviceMemA;
float* deviceMemB;
float* deviceMemC;
cudaMalloc((void**) &deviceMemA, memoryA);
cudaMalloc((void**) &deviceMemB, memoryB);
cudaMalloc((void**) &deviceMemC, memoryC);
/* Copy host memory to device */
cudaMemcpy(deviceMemA, matrixA, memoryA,
cudaMemcpyHostToDevice);
cudaMemcpy(deviceMemB, matrixB, memoryB,
cudaMemcpyHostToDevice);
cudaMemcpy(deviceMemC, matrixC, memoryC,
cudaMemcpyHostToDevice);
/* Print matrix C */
printf("\nMatrix C:\n");
for (int i = 0; i < sizeC; i++) {
printf("%f ", matrixC[i]);
if (((i + 1) % WC) == 0) {
printf("\n");
} else {
printf(" | ");
}
}
printf("\n");
/* Free up memory */
free(matrixA);
free(matrixB);
free(matrixC);
cudaFree(deviceMemA);
cudaFree(deviceMemB);
cudaFree(deviceMemC);
}
//--------------------------------------------------------------------
// initMatrix - Assigns a random float value to each indice of the
// matrix.
//
// PRE: matrix is a pointer to a block of bytes in memory; numIndices
// is the number of indicies in the matrix being instantiated.
// POST: Each index of the matrix has been instantiated with a random
// float value.
//--------------------------------------------------------------------
void initMatrix(float * matrix, int numIndices) {
/*
Loop through the block of bytes, assigning a random float
for each index of the matrix
*/
for (int i = 0; i < numIndices; ++i) {
/* Assign a random float between 0 and 1 at this byte */
matrix[i] = rand() / (float)RAND_MAX;
}
}
//********************************************************************
//矩阵结果1.c
//
//一个基本的矩阵乘法程序。
//********************************************************************
#包括
#包括
#包括
#包括“cuda.h”
#定义WA 3
#定义HA 3
#定义WB3
#定义HB WA
#定义WC WB
#定义HC HA
void initMatrix(浮点*矩阵,整数numIndices);
//*************
//主程序
//*************
int main(int argc,字符**argv){
/*设定随机种子*/
斯兰德(2013);
/*计算矩阵A、B和C的内存大小*/
无符号整数sizeA=WA*HA;
无符号整数sizeB=WB*HB;
无符号整数sizeC=WC*HC;
无符号整数内存=sizeof(float)*sizeA;
无符号整数内存b=sizeof(float)*sizeB;
无符号整数内存c=sizeof(float)*sizeC;
/*为矩阵A、B和C分配内存*/
float*matrixA=(float*)malloc(memoryA);
float*matrixB=(float*)malloc(memoryB);
float*matrixC=(float*)malloc(memoryC);
/*初始化矩阵A和B*/
initMatrix(matrixA,sizeA);
initMatrix(matrixB,sizeB);
/*打印矩阵A*/
printf(“\n矩阵A:\n”);
对于(int i=0;i
这里有两个问题:
请注意,这些更改还抢占了链接期间因未链接CUDA运行时库而导致的丢失符号错误。请注意,根据您使用的是32位还是64位主机操作系统,您可能需要将库路径更改为
$(CUDA_INSTALL_path)/lib64
,链接才能正常工作。CUDA程序需要由nvcc
编译。虽然您的程序还没有包含任何CUDA内核,但我相信这正是您想要实现的
将文件从
matrixMult1.c
重命名为matrixMult1.cu
,删除#include“cuda.h”
行(使用nvcc
编译的程序不需要任何特定于cuda的include),并使用nvcc
而不是gcc
进行编译(例如,在Makefile的开头设置gcc=nvcc
).Makefile没有问题,它是您的代码。您的代码中没有包含CUDA的任何标题。编译器抱怨未定义的常量真的应该令人惊讶吗?在代码中添加#include
。我尝试了cuda.h和cuda_runtime.h,都导致了“没有这样的文件或目录”错误。我还检查了文件是否在/usr/local/cuda/include/中。我在stackoverflow上编辑了这里的代码,以反映我的更改和对错误输出的更改。我想知道这是如何发生的matrixMult1.c:81:警告:函数“cudaMemcopy”的隐式声明发生的。您是否手动写入make的输出?顺便说一下,我看不出您在编译中使用了includes<代码>c99-c矩阵结果1.c
哈哈,你明白我的意思了。我确实手动编写了输出。我不得不从另一台没有互联网连接的机器上转录。我用这个MakefileRevisi的结果更新了这篇文章
GCC = c99
CUDA_INSTALL_PATH := /usr/local/cuda
INCLUDES := -I. -I$(CUDA_INSTALL_PATH)/include
CUDA_LIBS := -L$(CUDA_INSTALL_PATH)/lib -lcudart
matrixMult1.o: matrixMult1.c
$(GCC) $(INCLUDES) -c matrixMult1.c -o $@
matrixMult1: matrixMult1.o
$(GCC) -o $@ matrixMult1.o $(CUDA_LIBS)