在OpenGL中创建和寻址大型缓冲区(以GB为单位)
我惊讶地发现,在寻址更高的索引时,我的着色器会开始从缓冲区读取零。我猜这与驱动程序中寻址内部的精度有关。我从来没有得到任何内存错误,着色器似乎只是默默地停止访问它们。如果我错了,请纠正我,但我相信CUDA支持64位指针和大量内存 我已经建立了一个MWE(下图),在这里我创建了一个比2GB少1 vec4的缓冲区。如果我达到或超过2GB,着色器甚至不会向第一个元素写入任何内容。使用image_load_store写入着色器中的缓冲区最多只能工作512MiB。我更幸运的是使用了无绑定图形,它可以正确地写入整个缓冲区,但我仍然坚持最大2GB,即使我可以创建一个更大的缓冲区,而且似乎无绑定图形使用64位寻址,所以我不认为有任何理由存在这种限制 如何使用OpenGL创建和使用大于2GB的缓冲区? 我使用的是GTX Titan(6GB) 更新: 发现一个错误。在OpenGL中创建和寻址大型缓冲区(以GB为单位),opengl,gpu,gpgpu,precision,Opengl,Gpu,Gpgpu,Precision,我惊讶地发现,在寻址更高的索引时,我的着色器会开始从缓冲区读取零。我猜这与驱动程序中寻址内部的精度有关。我从来没有得到任何内存错误,着色器似乎只是默默地停止访问它们。如果我错了,请纠正我,但我相信CUDA支持64位指针和大量内存 我已经建立了一个MWE(下图),在这里我创建了一个比2GB少1 vec4的缓冲区。如果我达到或超过2GB,着色器甚至不会向第一个元素写入任何内容。使用image_load_store写入着色器中的缓冲区最多只能工作512MiB。我更幸运的是使用了无绑定图形,它可以正确地
GL_R32F
内部格式应为GL_RGBA32F
,允许图像加载存储达到~2GB标记。在大小达到2GB或更大之前,程序将正确执行,不会输出,此时,对于image_load_store和bindless,程序仍然失败
GL_MAX_TEXTURE_BUFFER_SIZE
对我来说是134217728,这使得RGBA32F的最大大小正好是2GB。然而,我关于大于2GB的问题仍然存在。当然,我可以分配多个缓冲区,但这是一堆我不愿意处理的内部事务和开销。您是否尝试过使用glGetIntegerv
检索GL\u MAX\u TEXTURE\u BUFFER\u SIZE
?谢谢,@talonmes,GL\u MAX\u TEXTURE\u BUFFER\u SIZE
似乎与我看到的图像加载存储未访问超过512MB的问题相匹配(尽管实际最大值似乎少了一个元素)。但仍在寻找获得2GB以上容量的方法。
//#include <windows.h>
#include <assert.h>
#include <stdio.h>
#include <memory.h>
#include <GL/glew.h>
#include <GL/glut.h>
const char* imageSource =
"#version 440\n"
"uniform layout(rgba32f) imageBuffer data;\n"
"uniform float val;\n"
"void main() {\n"
" imageStore(data, gl_VertexID, vec4(val));\n"
" gl_Position = vec4(0.0);\n"
"}\n";
const char* bindlessSource =
"#version 440\n"
"#extension GL_NV_gpu_shader5 : enable\n"
"#extension GL_NV_shader_buffer_load : enable\n"
"uniform vec4* data;\n"
"uniform float val;\n"
"void main() {\n"
" data[gl_VertexID] = vec4(val);\n"
" gl_Position = vec4(0.0);\n"
"}\n";
GLuint compile(GLenum type, const char* shaderSrc)
{
GLuint shader = glCreateShader(type);
glShaderSource(shader, 1, (const GLchar**)&shaderSrc, NULL);
glCompileShader(shader);
int success = 0;
int loglen = 0;
glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &loglen);
GLchar* log = new GLchar[loglen];
glGetShaderInfoLog(shader, loglen, &loglen, log);
if (!success)
{
printf("%s\n", log);
exit(0);
}
GLuint program = glCreateProgram();
glAttachShader(program, shader);
glLinkProgram(program);
return program;
}
int main(int argc, char** argv)
{
float* check;
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH);
glutCreateWindow("test");
glewInit();
GLsizeiptr bufferSize = 1024 * 1024 * 1024; //1GB
bufferSize *= 2;
bufferSize -= 16;
GLsizeiptr numFloats = bufferSize/sizeof(float);
GLsizeiptr numVec4s = bufferSize/(sizeof(float)*4);
float testVal = 123.123f;
glEnable(GL_RASTERIZER_DISCARD);
float* dat = new float[numFloats];
memset(dat, 0, bufferSize);
//create a buffer with data
GLuint buffer;
glGenBuffers(1, &buffer);
glBindBuffer(GL_TEXTURE_BUFFER, buffer);
glBufferData(GL_TEXTURE_BUFFER, bufferSize, NULL, GL_STATIC_DRAW);
//get a bindless address
GLuint64 address;
glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);
//make a texture alias for it
GLuint bufferTexture;
glGenTextures(1, &bufferTexture);
glBindTexture(GL_TEXTURE_BUFFER, bufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, buffer); //should be GL_RGBA32F (see update)
glBindImageTextureEXT(0, bufferTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32F); //should be GL_RGBA32F (see update)
//compile the shaders
GLuint imageShader = compile(GL_VERTEX_SHADER, imageSource);
GLuint bindlessShader = compile(GL_VERTEX_SHADER, bindlessSource);
//initialize buffer
glBufferData(GL_TEXTURE_BUFFER, bufferSize, dat, GL_STATIC_DRAW);
glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);
assert(glIsBufferResidentNV(GL_TEXTURE_BUFFER)); //sanity check
//run image_load_store
glUseProgram(imageShader);
glUniform1i(glGetUniformLocation(imageShader, "data"), 0);
glUniform1f(glGetUniformLocation(imageShader, "val"), testVal);
glDrawArrays(GL_POINTS, 0, numVec4s);
glMemoryBarrier(GL_ALL_BARRIER_BITS);
check = (float*)glMapBuffer(GL_TEXTURE_BUFFER, GL_READ_ONLY);
for (GLsizeiptr i = 0; i < numFloats; ++i)
{
if (check[i] != testVal)
{
printf("failed image_load_store: dat[%td] = %f (%fMiB)\n", i, check[i], (double)i*sizeof(float)/1024.0/1024.0);
break;
}
}
glUnmapBuffer(GL_TEXTURE_BUFFER);
//initialize buffer
glBufferData(GL_TEXTURE_BUFFER, bufferSize, dat, GL_STATIC_DRAW);
glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);
assert(glIsBufferResidentNV(GL_TEXTURE_BUFFER)); //sanity check
//run bindless
glUseProgram(bindlessShader);
glProgramUniformui64NV(bindlessShader, glGetUniformLocation(bindlessShader, "data"), address);
glUniform1f(glGetUniformLocation(bindlessShader, "val"), testVal);
glDrawArrays(GL_POINTS, 0, numVec4s);
glMemoryBarrier(GL_ALL_BARRIER_BITS);
check = (float*)glMapBuffer(GL_TEXTURE_BUFFER, GL_READ_ONLY);
for (GLsizeiptr i = 0; i < numFloats; ++i)
{
if (check[i] != testVal)
{
printf("failed bindless: dat[%td] = %f (%fMiB)\n", i, check[i], (double)i*sizeof(float)/1024.0/1024.0);
break;
}
}
glUnmapBuffer(GL_TEXTURE_BUFFER);
return 0;
}
> make && ./a.out
g++ -lGL -lGLEW -lglut main.c
failed image_load_store: dat[134217727] = 0.000000 (511.999996MiB)