CUDA6.5 Can'；t外部纹理的值_Cuda_Textures_Extern

CUDA6.5 Can'；t外部纹理的值

cuda

CUDA6.5 Can'；t外部纹理的值,cuda,textures,extern,Cuda,Textures,Extern,我在年编写了一个程序来跟踪JackOlantem的答案，但是我的结果没有打印extern纹理声明的值 P/s：如何添加-rdc=true以启用外部链接？节目的结果！谢谢你的帮助！！。 kernel.cu编译单元 #include <stdio.h> texture<int, 1, cudaReadModeElementType> texture_test; /********************/ /* CUDA ERROR CHECK */ /*******

我在年编写了一个程序来跟踪JackOlantem的答案，但是我的结果没有打印extern纹理声明的值 P/s：如何添加-rdc=true以启用外部链接？节目的结果！谢谢你的帮助！！。 kernel.cu编译单元

#include <stdio.h>

texture<int, 1, cudaReadModeElementType> texture_test;

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}

/*************************/
/* LOCAL KERNEL FUNCTION */
/*************************/
__global__ void kernel1() {

    printf("ThreadID = %i; Texture value = %i\n", threadIdx.x, tex1Dfetch(texture_test, threadIdx.x));

}

__global__ void kernel2();

/********/
/* MAIN */
/********/
int main() {

    const int N = 16;

    // --- Host data allocation and initialization
    int *h_data = (int*)malloc(N * sizeof(int));
    for (int i=0; i<N; i++) h_data[i] = i;

    // --- Device data allocation and host->device memory transfer
    int *d_data; gpuErrchk(cudaMalloc((void**)&d_data, N * sizeof(int)));
    gpuErrchk(cudaMemcpy(d_data, h_data, N * sizeof(int), cudaMemcpyHostToDevice));

    gpuErrchk(cudaBindTexture(NULL, texture_test, d_data, N * sizeof(int)));

    kernel1<<<1, 16>>>();
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    kernel2<<<1, 16>>>();
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    gpuErrchk(cudaUnbindTexture(texture_test));

}

#include <stdio.h>

extern texture<int, 1, cudaReadModeElementType> texture_test;

/**********************************************/
/* DIFFERENT COMPILATION UNIT KERNEL FUNCTION */
/**********************************************/
__global__ void kernel2() {

    printf("Texture value = %i\n", tex1Dfetch(texture_test, threadIdx.x));

}

#包括
质构检验；
/********************/
/*CUDA错误检查*/
/********************/
#定义gpuerchk（ans）{gpuAssert（（ans），_文件_，_行__）}
内联void gpuAssert（cudaError\u t代码，const char*文件，int行，bool abort=true）
{
如果（代码！=cudaSuccess）
{
fprintf（标准，“GPUassert:%s%s%d\n”，cudaGetErrorString（代码）、文件、行）；
如果（中止）退出（代码）；
}
}
/*************************/
/*局部核函数*/
/*************************/
__全局无效内核1（）{
printf（“ThreadID=%i；纹理值=%i\n”，threadIdx.x，tex1Dfetch（Texture\u test，threadIdx.x））；
}
__全局无效内核2（）；
/********/
/*主要*/
/********/
int main（）{
常数int N=16；
//---主机数据分配和初始化
int*h_data=（int*）malloc（N*sizeof（int））；
用于（int i=0；idevice内存传输）
int*d_数据；gpuerchk（cudamaloc（（void**）和d_数据，N*sizeof（int））；
gpuErrchk（cudaMemcpy（d_数据，h_数据，N*sizeof（int），cudaMemcpyHostToDevice））；
gpuErrchk（cudaBindTexture（NULL，texture_test，d_数据，N*sizeof（int））；
内核1（）；
gpuerchk（cudaPeekAtLastError（））；
gpuErrchk（cudaDeviceSynchronize（））；
内核2（）；
gpuerchk（cudaPeekAtLastError（））；
gpuErrchk（cudaDeviceSynchronize（））；
gpuErrchk（cudaUnbindTexture（texture_test））；
}

内核2.cu编译单元

#include <stdio.h>

texture<int, 1, cudaReadModeElementType> texture_test;

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}

/*************************/
/* LOCAL KERNEL FUNCTION */
/*************************/
__global__ void kernel1() {

    printf("ThreadID = %i; Texture value = %i\n", threadIdx.x, tex1Dfetch(texture_test, threadIdx.x));

}

__global__ void kernel2();

/********/
/* MAIN */
/********/
int main() {

    const int N = 16;

    // --- Host data allocation and initialization
    int *h_data = (int*)malloc(N * sizeof(int));
    for (int i=0; i<N; i++) h_data[i] = i;

    // --- Device data allocation and host->device memory transfer
    int *d_data; gpuErrchk(cudaMalloc((void**)&d_data, N * sizeof(int)));
    gpuErrchk(cudaMemcpy(d_data, h_data, N * sizeof(int), cudaMemcpyHostToDevice));

    gpuErrchk(cudaBindTexture(NULL, texture_test, d_data, N * sizeof(int)));

    kernel1<<<1, 16>>>();
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    kernel2<<<1, 16>>>();
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    gpuErrchk(cudaUnbindTexture(texture_test));

}

#include <stdio.h>

extern texture<int, 1, cudaReadModeElementType> texture_test;

/**********************************************/
/* DIFFERENT COMPILATION UNIT KERNEL FUNCTION */
/**********************************************/
__global__ void kernel2() {

    printf("Texture value = %i\n", tex1Dfetch(texture_test, threadIdx.x));

}

#包括
外部纹理试验；
/**********************************************/
/*不同编译单元的核函数*/
/**********************************************/
__全局无效内核2（）{
printf（“纹理值=%i\n”，tex1Dfetch（纹理测试，threadIdx.x））；
}

p/s：如何添加-rdc=true以启用外部链接

在nsight VSE中，尝试将属性| CUDA C/C++| Common |生成可重定位设备代码“设置为“是”

是一个nsight VSE文档页面，对其进行了描述。

我成功地运行了@Jackolanten提供的代码。建议您不要通过外部链接在问题中包含重要内容-当这些链接消失时，问题就变得不那么有用了。尽可能多地指定这些内容也是一个好主意您的平台（编译命令、GPU、CUDA版本、平台linux/windows）在您的问题中。这样您的问题就更清楚了。（例如，如果您有cc1.x GPU，您就不能使用可重新定位的设备代码。）好的，谢谢你，对此我也很抱歉。我的问题中包含了代码，但是代码很混乱，比如身体类型，我无法编辑，我是新成员，所以我使用不清楚，我会尝试。