在OpenGL中创建和寻址大型缓冲区(以GB为单位)

在OpenGL中创建和寻址大型缓冲区(以GB为单位),opengl,gpu,gpgpu,precision,Opengl,Gpu,Gpgpu,Precision,我惊讶地发现,在寻址更高的索引时,我的着色器会开始从缓冲区读取零。我猜这与驱动程序中寻址内部的精度有关。我从来没有得到任何内存错误,着色器似乎只是默默地停止访问它们。如果我错了,请纠正我,但我相信CUDA支持64位指针和大量内存 我已经建立了一个MWE(下图),在这里我创建了一个比2GB少1 vec4的缓冲区。如果我达到或超过2GB,着色器甚至不会向第一个元素写入任何内容。使用image_load_store写入着色器中的缓冲区最多只能工作512MiB。我更幸运的是使用了无绑定图形,它可以正确地

我惊讶地发现,在寻址更高的索引时,我的着色器会开始从缓冲区读取零。我猜这与驱动程序中寻址内部的精度有关。我从来没有得到任何内存错误,着色器似乎只是默默地停止访问它们。如果我错了,请纠正我,但我相信CUDA支持64位指针和大量内存

我已经建立了一个MWE(下图),在这里我创建了一个比2GB少1 vec4的缓冲区。如果我达到或超过2GB,着色器甚至不会向第一个元素写入任何内容。使用image_load_store写入着色器中的缓冲区最多只能工作512MiB。我更幸运的是使用了无绑定图形,它可以正确地写入整个缓冲区,但我仍然坚持最大2GB,即使我可以创建一个更大的缓冲区,而且似乎无绑定图形使用64位寻址,所以我不认为有任何理由存在这种限制

如何使用OpenGL创建和使用大于2GB的缓冲区?

我使用的是GTX Titan(6GB)

更新:

发现一个错误。
GL_R32F
内部格式应为
GL_RGBA32F
,允许图像加载存储达到~2GB标记。在大小达到2GB或更大之前,程序将正确执行,不会输出,此时,对于image_load_store和bindless,程序仍然失败


GL_MAX_TEXTURE_BUFFER_SIZE
对我来说是134217728,这使得RGBA32F的最大大小正好是2GB。然而,我关于大于2GB的问题仍然存在。当然,我可以分配多个缓冲区,但这是一堆我不愿意处理的内部事务和开销。

您是否尝试过使用
glGetIntegerv
检索
GL\u MAX\u TEXTURE\u BUFFER\u SIZE
?谢谢,@talonmes,
GL\u MAX\u TEXTURE\u BUFFER\u SIZE
似乎与我看到的图像加载存储未访问超过512MB的问题相匹配(尽管实际最大值似乎少了一个元素)。但仍在寻找获得2GB以上容量的方法。
//#include <windows.h>
#include <assert.h>
#include <stdio.h>
#include <memory.h>
#include <GL/glew.h>
#include <GL/glut.h>

const char* imageSource =
"#version 440\n"
"uniform layout(rgba32f) imageBuffer data;\n"
"uniform float val;\n"
"void main() {\n"
"   imageStore(data, gl_VertexID, vec4(val));\n"
"   gl_Position = vec4(0.0);\n"
"}\n";

const char* bindlessSource =
"#version 440\n"
"#extension GL_NV_gpu_shader5 : enable\n"
"#extension GL_NV_shader_buffer_load : enable\n"
"uniform vec4* data;\n"
"uniform float val;\n"
"void main() {\n"
"   data[gl_VertexID] = vec4(val);\n"
"   gl_Position = vec4(0.0);\n"
"}\n";

GLuint compile(GLenum type, const char* shaderSrc)
{
    GLuint shader = glCreateShader(type);
    glShaderSource(shader, 1, (const GLchar**)&shaderSrc, NULL);
    glCompileShader(shader);
    int success = 0;
    int loglen = 0;
    glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &loglen);
    GLchar* log = new GLchar[loglen];
    glGetShaderInfoLog(shader, loglen, &loglen, log);
    if (!success)
    {
        printf("%s\n", log);
        exit(0);
    }
    GLuint program = glCreateProgram();
    glAttachShader(program, shader);
    glLinkProgram(program);
    return program;
}

int main(int argc, char** argv)
{
    float* check;
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH);
    glutCreateWindow("test");
    glewInit();

    GLsizeiptr bufferSize = 1024 * 1024 * 1024; //1GB
    bufferSize *= 2;
    bufferSize -= 16;
    GLsizeiptr numFloats = bufferSize/sizeof(float);
    GLsizeiptr numVec4s = bufferSize/(sizeof(float)*4);
    float testVal = 123.123f;

    glEnable(GL_RASTERIZER_DISCARD);

    float* dat = new float[numFloats];
    memset(dat, 0, bufferSize);

    //create a buffer with data
    GLuint buffer;
    glGenBuffers(1, &buffer);
    glBindBuffer(GL_TEXTURE_BUFFER, buffer);
    glBufferData(GL_TEXTURE_BUFFER, bufferSize, NULL, GL_STATIC_DRAW);

    //get a bindless address
    GLuint64 address;
    glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
    glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);

    //make a texture alias for it
    GLuint bufferTexture;
    glGenTextures(1, &bufferTexture);
    glBindTexture(GL_TEXTURE_BUFFER, bufferTexture);
    glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, buffer); //should be GL_RGBA32F (see update)
    glBindImageTextureEXT(0, bufferTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32F); //should be GL_RGBA32F (see update)

    //compile the shaders
    GLuint imageShader = compile(GL_VERTEX_SHADER, imageSource);
    GLuint bindlessShader = compile(GL_VERTEX_SHADER, bindlessSource);

    //initialize buffer
    glBufferData(GL_TEXTURE_BUFFER, bufferSize, dat, GL_STATIC_DRAW);
    glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
    glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);
    assert(glIsBufferResidentNV(GL_TEXTURE_BUFFER)); //sanity check

    //run image_load_store
    glUseProgram(imageShader);
    glUniform1i(glGetUniformLocation(imageShader, "data"), 0);
    glUniform1f(glGetUniformLocation(imageShader, "val"), testVal);
    glDrawArrays(GL_POINTS, 0, numVec4s);
    glMemoryBarrier(GL_ALL_BARRIER_BITS);
    check = (float*)glMapBuffer(GL_TEXTURE_BUFFER, GL_READ_ONLY);
    for (GLsizeiptr i = 0; i < numFloats; ++i)
    {
        if (check[i] != testVal)
        {
            printf("failed image_load_store: dat[%td] = %f (%fMiB)\n", i, check[i], (double)i*sizeof(float)/1024.0/1024.0);
            break;
        }
    }
    glUnmapBuffer(GL_TEXTURE_BUFFER);

    //initialize buffer
    glBufferData(GL_TEXTURE_BUFFER, bufferSize, dat, GL_STATIC_DRAW);
    glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
    glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);
    assert(glIsBufferResidentNV(GL_TEXTURE_BUFFER)); //sanity check

    //run bindless
    glUseProgram(bindlessShader);
    glProgramUniformui64NV(bindlessShader, glGetUniformLocation(bindlessShader, "data"), address);
    glUniform1f(glGetUniformLocation(bindlessShader, "val"), testVal);
    glDrawArrays(GL_POINTS, 0, numVec4s);
    glMemoryBarrier(GL_ALL_BARRIER_BITS);
    check = (float*)glMapBuffer(GL_TEXTURE_BUFFER, GL_READ_ONLY);
    for (GLsizeiptr i = 0; i < numFloats; ++i)
    {
        if (check[i] != testVal)
        {
            printf("failed bindless: dat[%td] = %f (%fMiB)\n", i, check[i], (double)i*sizeof(float)/1024.0/1024.0);
            break;
        }
    }
    glUnmapBuffer(GL_TEXTURE_BUFFER);

    return 0;
}
> make && ./a.out 
g++ -lGL -lGLEW -lglut main.c
failed image_load_store: dat[134217727] = 0.000000 (511.999996MiB)