Cuda 错误:‘;blockIdx’;未在此范围中声明

Cuda 错误:‘;blockIdx’;未在此范围中声明,cuda,gpu,Cuda,Gpu,我试着用CUDA写一个GPU程序。以下是我的职责: __global__ static void histogram_gpu(int * hist_out, unsigned char * img_in, int img_size, int nbr_bin){ int i; const int bid = blockIdx.x; const int tid = threadIdx.x; // for ( i = 0; i < img_size; i ++){

我试着用CUDA写一个GPU程序。以下是我的职责:

__global__ static void
histogram_gpu(int * hist_out, unsigned char * img_in, int img_size, int nbr_bin){
    int i;
    const int bid = blockIdx.x;
    const int tid = threadIdx.x;
    // for ( i = 0; i < img_size; i ++){
    //     hist_out[img_in[i]] ++;
    // }
    for (i = bid*THREAD_NUM + tid; i < img_size; i += BLOCK_NUM*THREAD_NUM) {
        hist_out[img_in[i]]++;
    }
}
我在MAC电脑上使用CUDA 5.0,下面是生成文件:

OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])

# Flags to detect 32-bit or 64-bit OS platform
OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/")
OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/")

# These flags will override any settings
ifeq ($(i386),1)
    OS_SIZE = 32
    OS_ARCH = i686
endif

ifeq ($(x86_64),1)
    OS_SIZE = 64
    OS_ARCH = x86_64
endif

# Flags to detect either a Linux system (linux) or Mac OSX (darwin)
DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))

# Location of the CUDA Toolkit binaries and libraries
CUDA_PATH       ?= /Developer/NVIDIA/CUDA-5.0
CUDA_INC_PATH   ?= $(CUDA_PATH)/include
CUDA_BIN_PATH   ?= $(CUDA_PATH)/bin

ifneq ($(DARWIN),)
  CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib
else
  ifeq ($(OS_SIZE),32)
    CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib
  else
    CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib64
  endif
endif

# Common binaries
NVCC            ?= $(CUDA_BIN_PATH)/nvcc
GCC             ?= g++

# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS   ?=

# CUDA code generation flags
GENCODE_SM10    := -gencode arch=compute_10,code=sm_10
GENCODE_SM20    := -gencode arch=compute_20,code=sm_20
GENCODE_SM30    := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35
GENCODE_FLAGS   := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)

GENCODE_FLAGS   := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)

# OS-specific build flags
# ifneq ($(DARWIN),) 
#       LDFLAGS   := -Xlinker -rpath $(CUDA_LIB_PATH) -L$(CUDA_LIB_PATH) -lcudart -lcublas -lcuda -lcufft -ltlshook 
#       CCFLAGS   := -arch $(OS_ARCH) 
# else
#   ifeq ($(OS_SIZE),32)
#       LDFLAGS   := -L$(CUDA_LIB_PATH) -lcudart
#       CCFLAGS   := -m32
#   else
      LDFLAGS   := -L$(CUDA_LIB_PATH)  -lcudart -lcublas -lcuda -lcufft -ltlshook
      CCFLAGS   := -m64
#   endif
# endif

# OS-architecture specific flags
ifeq ($(OS_SIZE),32)
      NVCCFLAGS := -m32
else
      NVCCFLAGS := -m64
endif

# Debug build flags
ifeq ($(dbg),1)
      CCFLAGS   += -g
      NVCCFLAGS += -g -G
      TARGET := debug
else
      TARGET := release
endif


# Common includes and paths for CUDA
INCLUDES      := -I$(CUDA_INC_PATH) -I. -I.. -I../../common/inc


# Add source files here
EXECUTABLE  := 5kk70-assignment-gpu
# Cuda source files (compiled with cudacc)
CUFILES     := 
# C/C++ source files (compiled with gcc / c++)
CCFILES     := main.cpp histogram-equalization.cu contrast-enhancement.cu

################################################################################
# Rules and targets
# All Phony Targets
.PHONY : everything clean

# Default starting position
everything : $(EXECUTABLE)

# Common includes and paths for CUDA
# INCLUDES      := -I$(CUDA_INC_PATH) -I. -I.. -I$(CUDA_INC_PATH)/samples/common/inc/

# Clean OBJECTS
clean :
    rm -f $(EXECUTABLE) $(OBJ)

$(EXECUTABLE) : $(CCFILES)
    $(NVCC) -o $@ $^ $(INCLUDES) $(LDFLAGS) $(EXTRA_LDFLAGS) $(GENCODE_FLAGS) 

我的代码有什么问题?

当您编写名为.cpp的文件中的cuda代码并进行编译时,会出现此问题。将文件重命名为.cu,编译器不会向您抱怨。

在bazel构建规则中,尝试将.cu.cc文件放入HDR而不是SRC。

您的代码中是否包含了
cuda_runtime.h
?您的
main()
函数看起来像?sgar91:包含
cuda_runtime.h
不应该是必需的,因为main.cpp是由nvcc在这里编译的。main.cpp中的内核定义是否存在?使用qt时遇到类似问题,我的错误是没有使用
.h
文件,而是试图直接导入
.cu
文件。
OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])

# Flags to detect 32-bit or 64-bit OS platform
OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/")
OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/")

# These flags will override any settings
ifeq ($(i386),1)
    OS_SIZE = 32
    OS_ARCH = i686
endif

ifeq ($(x86_64),1)
    OS_SIZE = 64
    OS_ARCH = x86_64
endif

# Flags to detect either a Linux system (linux) or Mac OSX (darwin)
DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))

# Location of the CUDA Toolkit binaries and libraries
CUDA_PATH       ?= /Developer/NVIDIA/CUDA-5.0
CUDA_INC_PATH   ?= $(CUDA_PATH)/include
CUDA_BIN_PATH   ?= $(CUDA_PATH)/bin

ifneq ($(DARWIN),)
  CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib
else
  ifeq ($(OS_SIZE),32)
    CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib
  else
    CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib64
  endif
endif

# Common binaries
NVCC            ?= $(CUDA_BIN_PATH)/nvcc
GCC             ?= g++

# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS   ?=

# CUDA code generation flags
GENCODE_SM10    := -gencode arch=compute_10,code=sm_10
GENCODE_SM20    := -gencode arch=compute_20,code=sm_20
GENCODE_SM30    := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35
GENCODE_FLAGS   := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)

GENCODE_FLAGS   := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)

# OS-specific build flags
# ifneq ($(DARWIN),) 
#       LDFLAGS   := -Xlinker -rpath $(CUDA_LIB_PATH) -L$(CUDA_LIB_PATH) -lcudart -lcublas -lcuda -lcufft -ltlshook 
#       CCFLAGS   := -arch $(OS_ARCH) 
# else
#   ifeq ($(OS_SIZE),32)
#       LDFLAGS   := -L$(CUDA_LIB_PATH) -lcudart
#       CCFLAGS   := -m32
#   else
      LDFLAGS   := -L$(CUDA_LIB_PATH)  -lcudart -lcublas -lcuda -lcufft -ltlshook
      CCFLAGS   := -m64
#   endif
# endif

# OS-architecture specific flags
ifeq ($(OS_SIZE),32)
      NVCCFLAGS := -m32
else
      NVCCFLAGS := -m64
endif

# Debug build flags
ifeq ($(dbg),1)
      CCFLAGS   += -g
      NVCCFLAGS += -g -G
      TARGET := debug
else
      TARGET := release
endif


# Common includes and paths for CUDA
INCLUDES      := -I$(CUDA_INC_PATH) -I. -I.. -I../../common/inc


# Add source files here
EXECUTABLE  := 5kk70-assignment-gpu
# Cuda source files (compiled with cudacc)
CUFILES     := 
# C/C++ source files (compiled with gcc / c++)
CCFILES     := main.cpp histogram-equalization.cu contrast-enhancement.cu

################################################################################
# Rules and targets
# All Phony Targets
.PHONY : everything clean

# Default starting position
everything : $(EXECUTABLE)

# Common includes and paths for CUDA
# INCLUDES      := -I$(CUDA_INC_PATH) -I. -I.. -I$(CUDA_INC_PATH)/samples/common/inc/

# Clean OBJECTS
clean :
    rm -f $(EXECUTABLE) $(OBJ)

$(EXECUTABLE) : $(CCFILES)
    $(NVCC) -o $@ $^ $(INCLUDES) $(LDFLAGS) $(EXTRA_LDFLAGS) $(GENCODE_FLAGS)