C++ Cuda和x2B；OpenGL互操作性，glDrawArrays（）访问冲突_C++_C_Opengl_Cuda_Interop

C++ Cuda和x2B；OpenGL互操作性，glDrawArrays（）访问冲突

c++ c opengl cuda

C++ Cuda和x2B；OpenGL互操作性，glDrawArrays（）访问冲突,c++,c,opengl,cuda,interop,C++,C,Opengl,Cuda,Interop,首先，我想为我在下面提供的代码数量道歉，我不确定，但我可能违反了发布规则我正在尝试修改nvidia的cuda示例fluidsGL（这里可以看到示例），这样我就可以加载自定义图像（如PNG），然后在其上应用流体解算器，以类似流体的方式有效地扭曲它，而不是修改随机像素在尝试了许多不同的实现之后，我发现使用两个VBO可能是可行的，一个用于float2粒子，将发送到流体解算器内核，另一个用于颜色，我用图像中的RGBA信息填充颜色但是我在glDrawArrays中遇到了访问冲突，我仍然不明白为什么在

首先，我想为我在下面提供的代码数量道歉，我不确定，但我可能违反了发布规则

我正在尝试修改nvidia的cuda示例fluidsGL（这里可以看到示例），这样我就可以加载自定义图像（如PNG），然后在其上应用流体解算器，以类似流体的方式有效地扭曲它，而不是修改随机像素

在尝试了许多不同的实现之后，我发现使用两个VBO可能是可行的，一个用于float2粒子，将发送到流体解算器内核，另一个用于颜色，我用图像中的RGBA信息填充颜色

但是我在glDrawArrays中遇到了访问冲突，我仍然不明白为什么在以前的尝试中，我尝试使用一个vbo和一个名为vertex_data的结构，该结构包含一个float2和一个uchar4，通过glbufferdata发送它，并让内核计算它的float2部分，但是访问冲突异常也发生了

如果有人愿意帮我，我将不胜感激

typedef unsigned char ubyte;

#define DEVICE __device__
#define GLOBAL __global__

#define MAX(a,b) ((a > b) ? a : b)
#define DIM 512
#define DS DIM*DIM

glm::mat4 m_mat;

// CUFFT plan handle
cufftHandle planr2c;
cufftHandle planc2r;
static float2 *vxfield = NULL;
static float2 *vyfield = NULL;

float2 *hvfield = NULL;
float2 *dvfield = NULL;
static int wWidth = MAX(512, DIM);
static int wHeight = MAX(512, DIM);

/*-----CUSTOM STRUCT-----------------------------------------------------*/

struct GLTexture
{
    GLuint id;
    int width;
    int height;
};


vertex_data data[DS];

//c linkage
/*--------------------------------------------------------------------------------------------------------------------------------*/
extern "C" void addForces(float2 *v, int dx, int dy, int spx, int spy, float fx, float fy, int r);
extern "C" void advectVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy, float dt);
extern "C" void diffuseProject(float2 *vx, float2 *vy, int dx, int dy, float dt, float visc);
extern "C" void updateVelocity(float2 *v, float *vx, float *vy, int dx, int pdx, int dy);
extern "C" void advectParticles(GLuint vbo, float2 *v, int dx, int dy, float dt);
/*--------------------------------------------------------------------------------------------------------------------------------*/

GLSLProgram prog;
IOManager m_manager;
GLTexture m_tex;
std::vector<ubyte> in_img;
std::vector<ubyte> out_img;
vertex_data vData[6];


GLuint positionsVBO;
GLuint colorsVBO;
cudaGraphicsResource* positionsVBO_CUDA;
float2 *particles = NULL;

float2 *part_cuda = NULL;

int lastx = 0, lasty = 0;
int clicked = 0;
size_t tPitch = 0;

float myrand(void)
{
    return rand() / (float)RAND_MAX;
}

void initParticles(float2 *p, int dx, int dy)
{
    int i, j;

    for (i = 0; i < dy; i++)
    {
        for (j = 0; j < dx; j++)
        {
            p[i*dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx;
            p[i*dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy;
        }
    }

}




void keyboard(unsigned char key, int x, int y)
{
    switch (key)
    {
    case 27:

        glutDestroyWindow(glutGetWindow());
        exit(0);
        return;


    default:
        break;
    }
}

void click(int button, int updown, int x, int y)
{
    lastx = x;
    lasty = y;
    clicked = !clicked;
}

void motion(int x, int y)
{
    // Convert motion coordinates to domain
    float fx = (lastx / (float)wWidth);
    float fy = (lasty / (float)wHeight);
    int nx = (int)(fx * DIM);
    int ny = (int)(fy * DIM);

    if (clicked && nx < DIM - FR && nx > FR - 1 && ny < DIM - FR && ny > FR - 1)
    {
        int ddx = x - lastx;
        int ddy = y - lasty;
        fx = ddx / (float)wWidth;
        fy = ddy / (float)wHeight;
        int spy = ny - FR;
        int spx = nx - FR;
        addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, FR);
        lastx = x;
        lasty = y;
    }

    glutPostRedisplay();
}

void cleanup(void)
{
    cudaGraphicsUnregisterResource(positionsVBO_CUDA);

    unbindTexture();
    deleteTexture();

    // Free all host and device resources
    free(hvfield);
    free(particles);
    cudaFree(dvfield);
    cudaFree(vxfield);
    cudaFree(vyfield);
    cufftDestroy(planr2c);
    cufftDestroy(planc2r);

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
    glDeleteBuffersARB(1, &positionsVBO);
}

void run_simulation(void){

    //Execute kernel

    advectVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM, DT);
    diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS);
    updateVelocity(dvfield, (float *)vxfield, (float *)vyfield, DIM, RPADW, DIM);
    advectParticles(positionsVBO, dvfield, DIM, DIM, DT);



}


void initShaders(){
    prog.compileShaders("vShader.vertex", "fShader.frag");
    prog.addAttribute("vertexPos");
    prog.addAttribute("vertexColor");

    prog.linkShaders();
}



void pre_display()
{

    glViewport(0, 0, 512, 512);
    glutPostRedisplay();

}

void display()
{
    pre_display();

    // render points from vertex buffer
    glClear(GL_COLOR_BUFFER_BIT);

    initShaders();

    run_simulation();

    prog.use();

    //GLint textureUniform = prog.getUniformLocation("mySampler");
    //glUniform1i(textureUniform, 0);
    //glActiveTexture(GL_TEXTURE0);

    GLint pUniform = prog.getUniformLocation("P");
    glUniformMatrix4fv(pUniform, 1, GL_FALSE, &m_mat[0][0]);

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, positionsVBO);

    glPointSize(1);

    glEnable(GL_POINT_SMOOTH);
    glEnable(GL_BLEND);
    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

    //glEnableVertexAttribArray(0); tried to manually enable the arrays
    //glEnableVertexAttribArray(1);


    glDisable(GL_DEPTH_TEST);
    glDisable(GL_CULL_FACE);

    glVertexAttribPointer(0, 2, GL_FLOAT, GL_TRUE, sizeof(float2), 0);

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); //dont need this but access violaton persists without it anyway

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, colorsVBO);

    glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(vertex_data) ,(void*)(offsetof(vertex_data, col)));


    glDrawArrays(GL_POINTS, 0, DS);

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);

    prog.unuse();


    glDisable(GL_TEXTURE_2D); //from nvidia's probably linked to the cudaarray_t


    // Swap buffers
    glutSwapBuffers();

}

void initGL()
{
    int foo = 1;
    char *bar = "bar";
    glutInit(&foo, &bar);
    glutInitDisplayMode(GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA);
    glutInitWindowSize(DIM, DIM);
    glutCreateWindow("mate21");

    glClearColor(0.0, 0.0, 0.0, 1.0);


    glutKeyboardFunc(keyboard);
    glutMouseFunc(click);
    glutMotionFunc(motion);



    glutDisplayFunc(display);

    glewInit();
}

void setGLDevice(){
    cudaDeviceProp prop;
    int dev;

    memset(&prop, 0, sizeof(cudaDeviceProp));
    prop.major = 1;
    prop.minor = 0;
    cudaChooseDevice(&dev, &prop);
    cudaGLSetGLDevice(dev);
}



void createVBO(){

    //reading rgba information from image to out_img
    unsigned long width, height;
    IOManager::readFileToBuffer("jojo_test.png", in_img);
    decodePNG(out_img, width, height, &(in_img[0]), in_img.size());


    //data.resize(DS); data used to be a vector, gave up on that
    for (int i = 0; i < DS; ++i){

        //data[i].pos = particles[i]; edited vertex_data struct for rgba only
        data[i].col.x = out_img[i * 4 + 0];
        data[i].col.y = out_img[i * 4 + 1];
        data[i].col.z = out_img[i * 4 + 2];
        data[i].col.w = out_img[i * 4 + 3];

    }


    glGenBuffers(1, &positionsVBO);
    glBindBufferARB(GL_ARRAY_BUFFER_ARB, positionsVBO);

    glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(float2)*DS, particles , GL_DYNAMIC_DRAW_ARB);
    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);

    cudaGraphicsGLRegisterBuffer(&positionsVBO_CUDA, positionsVBO, cudaGraphicsMapFlagsNone);

    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);

    glGenBuffers(1, &colorsVBO);
    glBindBuffer(GL_ARRAY_BUFFER_ARB, colorsVBO);
    glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vertex_data)*DS, data, GL_DYNAMIC_DRAW_ARB);
    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);



}

int main()
{
    setGLDevice();

    initGL();

    //orthogonal view matrix with glm
    m_mat = glm::ortho(0, 1, 1, 0, 0, 1);





    hvfield = (float2 *)malloc(sizeof(float2) * DS);
    memset(hvfield, 0, sizeof(float2) * DS);

    // Allocate and initialize device data
    cudaMallocPitch((void **)&dvfield, &tPitch, sizeof(float2)*DIM, DIM);

    cudaMemcpy(dvfield, hvfield, sizeof(float2) * DS,
        cudaMemcpyHostToDevice);
    // Temporary complex velocity field data
    cudaMalloc((void **)&vxfield, sizeof(float2) * PDS);
    cudaMalloc((void **)&vyfield, sizeof(float2) * PDS);

    setupTexture(DIM, DIM);
    bindTexture();

    // Create particle array
    particles = (float2 *)malloc(sizeof(float2) * DS);
    memset(particles, 0, sizeof(float2) * DS);

    initParticles(particles, DIM, DIM);

    // Create CUFFT transform plan configuration
    cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C);
    cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R);

    cufftSetCompatibilityMode(planr2c, CUFFT_COMPATIBILITY_FFTW_PADDING);
    cufftSetCompatibilityMode(planc2r, CUFFT_COMPATIBILITY_FFTW_PADDING);


    createVBO();

    //cleanup
    glutCloseFunc(cleanup);
    //Launch rendering loop
    glutMainLoop();
}

typedef无符号字符；
#定义设备\uuu设备__
#定义全局__
#定义最大值（a，b）（（a>b）？a:b）
#定义尺寸512
#定义DS DIM*DIM
glm:：mat4 m_mat；
//袖口平面把手
卡夫坦德尔平面图2C；
卡夫坦德尔平面；
静态浮点2*vxfield=NULL；
静态浮点2*vyfield=NULL；
float2*hvfield=NULL；
float2*dvfield=NULL；
静态整数wWidth=MAX（512，DIM）；
静态亮度=最大值（512，暗）；
/*-----自定义结构-----------------------------------------------------*/
结构纹理
{
颖片id；
整数宽度；
内部高度；
};
顶点_数据[DS]；
//c连杆机构
/*--------------------------------------------------------------------------------------------------------------------------------*/
外部“C”无效附加力（浮点2*v、整数dx、整数dy、整数spx、整数间谍、浮点fx、浮点fy、整数r）；
外部“C”空洞平流速度（float2*v，float*vx，float*vy，intdx，intpdx，intdy，float dt）；
外部“C”空隙扩散项目（浮点数2*vx、浮点数2*vy、浮点数dx、浮点数dy、浮点数dt、浮点数visc）；
外部“C”void updateVelocity（float2*v、float*vx、float*vy、intdx、intpdx、intdy）；
外部“C”空洞平流粒子（GLuint vbo、float2*v、int dx、int dy、float dt）；
/*--------------------------------------------------------------------------------------------------------------------------------*/
GLSLPROG；
IOManager m_经理；
glm_tex；
std：：矢量输入法；
std：：矢量输出\u img；
顶点数据vData[6]；
胶合位置；
胶粘色；
cudaGraphicsResource*位置SVBO_CUDA；
float2*粒子=NULL；
float2*part_cuda=NULL；
int lastx=0，lasty=0；
int=0；
大小\u t tPitch=0；
浮动百万兰特（无效）
{
返回rand（）/（float）rand_MAX；
}
void initParticles（float2*p，int dx，int dy）
{
int i，j；
对于（i=0；iFR-1&&nyFR-1）
{
int ddx=x-lastx；
int-ddy=y-最后一次；
fx=ddx/（浮动）wWidth；
fy=ddy/（浮动）wHeight；
int spy=ny-FR；
int spx=nx-FR；
附加力（dvfield、DIM、DIM、spx、spy、FORCE*DT*fx、FORCE*DT*fy、FR）；
lastx=x；
lasty=y；
}
再发现（）；
}
空洞清理（空洞）
{
cudaGraphicsUnregisterResource（位置为Bo_CUDA）；
解开纹理（）；
删除纹理（）；
//释放所有主机和设备资源
自由场；
游离（颗粒）；
cudaFree（dvfield）；
cudaFree（vxfield）；
库达弗里（维菲尔德）；
袖套变形（平面图2C）；
袖套拆卸（平面C2R）；
glBindBufferARB（GL_数组_BUFFER_ARB，0）；
glDeleteBuffersARB（1和位置SVBO）；
}
void运行模拟（void）{
//执行内核
平流速度（dvfield，（float*）vxfield，（float*）vyfield，DIM，RPADW，DIM，DT）；
扩散项目（vxfield、vyfield、CPADW、DIM、DT、VIS）；
更新速度（dvfield，（float*）vxfield，（float*）vyfield，DIM，RPADW，DIM）；
平流粒子（位置SVBO、dvfield、DIM、DIM、DT）；
}
void initShaders（）{
程序编译头（“vShader.vertex”、“fShader.frag”）；
程序添加属性（“顶点”）；
程序添加属性（“顶点颜色”）；
prog.linkShaders（）；
}
void pre_display（）
{
glViewport（0,0,512,512）；
再发现（）；
}
无效显示（）
{
pre_display（）；
//从顶点缓冲区渲染点
glClear（GLU颜色缓冲位）；
initShaders（）；
运行_模拟（）；
prog.use（）；
//GLint textureUniform=prog.getUniformLocation（“mySampler”）；
//glUniform1i（结构均匀，0）；
//玻璃纹理（GL_纹理0）；
GLint pUniform=prog.getUniformLocation（“P”）；
glUniformMatrix4fv（石榴形，1，GL_-FALSE和m_-mat[0][0]）；
glBindBufferARB（GL_数组_BUFFER_ARB，positionsVBO）；
glPointSize（1）；
glEnable（GL_POINT_SMOOTH）；
glEnable（GL_混合物）；
glBlendFunc（GL_SRC_ALPHA，GL_ONE_减去GL_SRC_ALPHA）；
//GlenableVertexAttributeArray（0）；尝试手动启用阵列
//GlenableVertexAttributeArray（1）；
glDisable（GLU深度测试）；
glDisable（GLU消隐面）；
glvertexattributepointer（0,2，GL_FLOAT，GL_TRUE，sizeof（float2），0）；
glBindBufferARB（GL\u数组\u BUFFE
extern "C"
void advectParticles(GLuint vbo, float2 *v, int dx, int dy, float dt)
{
    dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1));
    dim3 tids(TIDSX, TIDSY);

    float2 *p;
    cudaGraphicsMapResources(1, &positionsVBO_CUDA, 0);


   size_t num_bytes;
   cudaGraphicsResourceGetMappedPointer((void **)&p, &num_bytes,positionsVBO_CUDA);


    advectParticles_k<<<grid, tids>>>(p, v, dx, dy, dt, TILEY/TIDSY, tPitch);


    cudaGraphicsUnmapResources(1, &positionsVBO_CUDA, 0);

}

__global__ void
advectParticles_k(float2 *part, float2 *v, int dx, int dy,
                  float dt, int lb, size_t pitch)
{

    int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
    int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
    int p;

    // gtidx is the domain location in x for this thread
    float2 pterm, vterm;

    if (gtidx < dx)
    {
        for (p = 0; p < lb; p++)
        {
            // fi is the domain location in y for this thread
            int fi = gtidy + p;

            if (fi < dy)
            {
                int fj = fi * dx + gtidx;
                pterm = part[fj];

                int xvi = ((int)(pterm.x * dx));
                int yvi = ((int)(pterm.y * dy));
                vterm = *((float2 *)((char *)v + yvi * pitch) + xvi);

                pterm.x += dt * vterm.x;
                pterm.x = pterm.x - (int)pterm.x;
                pterm.x += 1.f;
                pterm.x = pterm.x - (int)pterm.x;
                pterm.y += dt * vterm.y;
                pterm.y = pterm.y - (int)pterm.y;
                pterm.y += 1.f;
                pterm.y = pterm.y - (int)pterm.y;

                part[fj] = pterm;
            }
        } // If this thread is inside the domain in Y
    } // If this thread is inside the domain in X
}

sizeof(float2) = 4
DS = 10

 glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(float2)*DS, particles , GL_DYNAMIC_DRAW_ARB);

glVertexAttribPointer(0, 2, GL_FLOAT, GL_TRUE, sizeof(float2), 0);

glDrawArrays(GL_POINTS, 0, DS);