Performance OpenGL组合纹理
我正在用OpenGL编程一个2D游戏,我必须输出一个由20x15个字段组成的关卡 因此,我目前正在为每个字段输出一个非常慢的纹理(300个纹理/帧) 但是由于级别从未改变的原因,我想知道是否有可能在游戏循环开始之前将纹理组合成一个大的单一纹理 然后,我必须只输出一个纹理,其中包含4个纹理坐标(0/0)(0/1)(1/1)(1/0)和4个glVertex2f(),用于指定窗口中的位置 这是300个字段中每个字段的当前代码:Performance OpenGL组合纹理,performance,opengl,glfw,Performance,Opengl,Glfw,我正在用OpenGL编程一个2D游戏,我必须输出一个由20x15个字段组成的关卡 因此,我目前正在为每个字段输出一个非常慢的纹理(300个纹理/帧) 但是由于级别从未改变的原因,我想知道是否有可能在游戏循环开始之前将纹理组合成一个大的单一纹理 然后,我必须只输出一个纹理,其中包含4个纹理坐标(0/0)(0/1)(1/1)(1/0)和4个glVertex2f(),用于指定窗口中的位置 这是300个字段中每个字段的当前代码: glColor3f(1,1,1); glBindTexture(GL_TE
glColor3f(1,1,1);
glBindTexture(GL_TEXTURE_2D,textur);
glBegin(GL_QUADS);
glTexCoord2f(textArea.a.x,textArea.b.y);glVertex2f(display.a.x,display.a.y);
glTexCoord2f(textArea.a.x,textArea.a.y);glVertex2f(display.a.x,display.b.y);
glTexCoord2f(textArea.b.x,textArea.a.y);glVertex2f(display.b.x,display.b.y);
glTexCoord2f(textArea.b.x,textArea.b.y);glVertex2f(display.b.x,display.a.y);
glEnd();
注意,我在一个.tga文件中有所有可能字段类型的图像。因此,我选择了正确的glTexCoord2f()
将包含所有平铺的图像文件加载到
GLuint textur;
因此,我为每个字段绑定相同的纹理
我的目标是减少CPU时间。显示列表不起作用,因为图形卡中要加载的数据太多,最终显示列表的速度甚至更慢
我也不能使用VBOs,因为我不使用像GLUT这样的扩展
所以我的想法是生成一个简单有效的纹理
我希望您能给我反馈如何组合纹理,以及这种方法是否最容易提高性能
编辑:这些是我在程序中使用的OpenGl函数:
启动程序时,我初始化窗口:
glfwInit();
if( !glfwOpenWindow(windowSize.x,windowSize.y, 0, 0, 0, 0, 0, 0, GLFW_WINDOW ) )
{ glfwTerminate();
return;
}
这就是游戏循环对OpenG的作用:
int main()
{
//INIT HERE (see code above)
glBlendFunc(GL_SRC_ALPHA,GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_BLEND);
glAlphaFunc(GL_GREATER,0.1f);
glEnable(GL_ALPHA_TEST);
long loopStart;//measure loopcycle-time
do{
height = height > 0 ? height : 1;
glViewport( 0, 0, width, height ); //set Origin
glClearColor( 0.0f, 0.0f, 0.0f, 0.0f ); //background-color
glClear(GL_COLOR_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0,windowSize.x,0,windowSize.y,0,128); //2D-Mode
glMatrixMode(GL_MODELVIEW);
loopStart=clock();
//(...) OUTPUT HERE (code see above)
glfwSwapBuffers(); //erzeugte Grafikdaten ausgeben
printf("%4dms -> ",clock()-loopStart);
}while(...);
glDisable(GL_ALPHA_TEST);
glDisable(GL_TEXTURE_2D);
glfwTerminate();
}
我现在发现了一个时间杀手。我使用的纹理太大,分辨率非常低。
包含关卡精灵的主纹理的分辨率为2200x2200像素。因此,GPU将大小增加到4096x4096,并使用大量数据进行计算。
图像包含10x10个不同级别的分幅,每个分幅以50x50像素的分辨率输出到屏幕上。
因此,我以较低的分辨率保存了Tiles文件(1020 x 1020像素->每个tile=102x102px),现在我有了一个的循环时间,我看到您正在使用GLFW。您可以添加GLEW和GLM,然后应该使用OpenGL3.x或更高版本 下面是一个完整的示例,您如何在一台廉价笔记本电脑上轻松绘制2000个纹理四边形(使用Alphablending)或更多,FPS为200或更多。它只有一个小纹理,但它也可以用于4096x4096纹理图集。如果“大纹理”中的子纹理大小与您绘制的四边形大小完全匹配,您将获得巨大的性能打击!你应该在大纹理中使用50x50像素!下面的Deme代码也会在每帧更新所有2000个四边形,并将它们发送到GPU。如果您不必在每一帧更新它们并将滚动坐标放置到着色器中,您将再次获得性能。 如果你不需要混合…使用阿尔法测试…你将再次获得更高的速度
#define GLEW_STATIC
#include "glew.h"
#include "glfw.h"
#include "glm.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include "glm/gtx/transform.hpp"
#include <sstream>
#include <fstream>
#include <vector>
#define BUFFER_OFFSET(i) ((char *)NULL + (i))
std::ofstream logger("Log\\Ausgabe.txt", (std::ios::out | std::ios::app));
class Vertex
{
public:
float x;
float y;
float z;
float tx;
float ty;
};
class Quad
{
public:
float x;
float y;
float width;
float height;
};
int getHighResTimeInMilliSeconds(bool bFirstRun);
GLuint buildShader();
void addQuadToLocalVerticeArray(Vertex * ptrVertexArrayLocal, Quad *quad, int *iQuadCounter);
int main()
{
logger << "Start" << std::endl;
if(!glfwInit())
exit(EXIT_FAILURE);
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR,3);
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR,3);
glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, 1);
glfwOpenWindowHint(GLFW_OPENGL_PROFILE,GLFW_OPENGL_CORE_PROFILE);
if( !glfwOpenWindow(1366, 768,8,8,8,8,32,32,GLFW_FULLSCREEN) )
{
glfwTerminate();
exit( EXIT_FAILURE );
}
if (glewInit() != GLEW_OK)
exit( EXIT_FAILURE );
//Init
GLuint VertexArrayID;
GLuint vertexbuffer;
GLuint MatrixID;
GLuint TextureID;
GLuint Texture;
GLuint programID = buildShader();
//Texture in Video-Speicher erstellen
GLFWimage img;
int iResult = glfwReadImage("Graphics\\gfx.tga", &img, GLFW_NO_RESCALE_BIT);
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &Texture);
glBindTexture(GL_TEXTURE_2D, Texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,32,32, 0, GL_RGBA, GL_UNSIGNED_BYTE, img.Data);
glfwFreeImage(&img);
Vertex * ptrVertexArrayLocal = new Vertex[12000];
glGenVertexArrays(1, &VertexArrayID);
glBindVertexArray(VertexArrayID);
glGenBuffers(1, &vertexbuffer);
glBindBuffer(GL_ARRAY_BUFFER, VertexArrayID);
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 12000, NULL, GL_DYNAMIC_DRAW);
glm::mat4 Projection = glm::ortho(0.0f, (float)1366,0.0f, (float)768, 0.0f, 100.0f);
glm::mat4 Model = glm::mat4(1.0f);
glm::mat4 MVP = Projection * Model;
glViewport( 0, 0, 1366, 768 );
MatrixID = glGetUniformLocation(programID, "MVP");
glEnable(GL_CULL_FACE);
glEnable (GL_BLEND);
glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
TextureID = glGetUniformLocation(programID, "myTextureSampler");
glUseProgram(programID);
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, Texture);
glUniform1i(TextureID, 0);
int iQuadVerticeCounter=0;
int iNumOfQuads = 2000;
Quad * ptrQuads = new Quad[iNumOfQuads];
//LOCAL VERTICES CHANGES EACH LOOP
for (int i=0; i<iNumOfQuads; i++)
{
ptrQuads[i].width = 32;
ptrQuads[i].height = 32;
ptrQuads[i].x = (float)(rand() % (1334));
ptrQuads[i].y = (float)(rand() % (736));
}
int iCurrentTime=0;
int iFPS=0;
int iFrames=0;
int iFrameCounterTimeStart=0;
int running = GL_TRUE;
bool bFirstRun=true;
while( running )
{
iCurrentTime = getHighResTimeInMilliSeconds(bFirstRun);
bFirstRun=false;
//UPDATE ALL QUADS EACH FRAME!
for (int i=0; i<iNumOfQuads; i++)
{
ptrQuads[i].width = 32;
ptrQuads[i].height = 32;
ptrQuads[i].x = ptrQuads[i].x;
ptrQuads[i].y = ptrQuads[i].y;
addQuadToLocalVerticeArray(ptrVertexArrayLocal, &ptrQuads[i], &iQuadVerticeCounter);
}
//DO THE RENDERING
glClear( GL_COLOR_BUFFER_BIT );
glBindBuffer(GL_ARRAY_BUFFER, VertexArrayID);
glBufferSubData(GL_ARRAY_BUFFER, 0,sizeof(Vertex) * iQuadVerticeCounter, ptrVertexArrayLocal);
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,sizeof(Vertex),BUFFER_OFFSET(0));
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
glVertexAttribPointer(1,2,GL_FLOAT,GL_FALSE,sizeof(Vertex),BUFFER_OFFSET(3*sizeof(GL_FLOAT)));
glDrawArrays(GL_TRIANGLES, 0, iQuadVerticeCounter);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
iQuadVerticeCounter=0;
glfwSwapBuffers();
//END OF DOING THE RENDERING
running = !glfwGetKey( GLFW_KEY_ESC ) &&glfwGetWindowParam( GLFW_OPENED );
iFrames++;
if (iCurrentTime >= iFrameCounterTimeStart + 1000.0f)
{
iFPS = (int)((iCurrentTime - iFrameCounterTimeStart) / 1000.0f * iFrames);
iFrameCounterTimeStart = iCurrentTime;
iFrames = 0;
logger << "FPS: " << iFPS << std::endl;
}
}
glfwTerminate();
exit( EXIT_SUCCESS );
}
int getHighResTimeInMilliSeconds(bool bFirstRun)
{
if (bFirstRun)
glfwSetTime(0);
return (int)((float)glfwGetTime()*1000.0f);
}
GLuint buildShader()
{
//Hint: Shader in the TXT-File looks like this
/*std::stringstream ssVertexShader;
ssVertexShader << "#version 330 core"<< std::endl
<< "layout(location = 0) in vec3 vertexPosition_modelspace;"<< std::endl
<< "layout(location = 1) in vec2 vertexUV;"<< std::endl
<< "out vec2 UV;"<< std::endl
<< "uniform mat4 MVP;"<< std::endl
<< "void main(){"<< std::endl
<< "vec4 v = vec4(vertexPosition_modelspace,1);"<< std::endl
<< "gl_Position = MVP * v;"<< std::endl
<< "UV = vertexUV;"<< std::endl
<< "}"<< std::endl;*/
std::string strVertexShaderCode;
std::ifstream VertexShaderStream("Shader\\VertexShader.txt", std::ios::in);
if(VertexShaderStream.is_open())
{
std::string Line = "";
while(getline(VertexShaderStream, Line))
strVertexShaderCode += "\n" + Line;
VertexShaderStream.close();
}
//Hint: Shader in the TXT-File looks like this
/*std::stringstream ssFragmentShader;
ssFragmentShader << "#version 330 core\n"
"in vec2 UV;\n"
"out vec4 color;\n"
"uniform sampler2D myTextureSampler;\n"
"void main(){\n"
"color = texture( myTextureSampler, UV ).rgba;\n"
"}\n";*/
std::string strFragmentShaderCode;
std::ifstream FragmentShaderStream("Shader\\FragmentShader.txt", std::ios::in);
if(FragmentShaderStream.is_open())
{
std::string Line = "";
while(getline(FragmentShaderStream, Line))
strFragmentShaderCode += "\n" + Line;
FragmentShaderStream.close();
}
GLuint gluiVertexShaderId = glCreateShader(GL_VERTEX_SHADER);
char const * VertexSourcePointer = strVertexShaderCode.c_str();
glShaderSource(gluiVertexShaderId, 1, &VertexSourcePointer , NULL);
glCompileShader(gluiVertexShaderId);
GLint Result = GL_FALSE;
int InfoLogLength;
glGetShaderiv(gluiVertexShaderId, GL_COMPILE_STATUS, &Result);
glGetShaderiv(gluiVertexShaderId, GL_INFO_LOG_LENGTH, &InfoLogLength);
std::vector<char> VertexShaderErrorMessage(InfoLogLength);
glGetShaderInfoLog(gluiVertexShaderId, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
std::string strInfoLog = std::string(&VertexShaderErrorMessage[0]);
GLuint gluiFragmentShaderId = glCreateShader(GL_FRAGMENT_SHADER);
char const * FragmentSourcePointer = strFragmentShaderCode.c_str();
glShaderSource(gluiFragmentShaderId, 1, &FragmentSourcePointer , NULL);
glCompileShader(gluiFragmentShaderId);
Result = GL_FALSE;
glGetShaderiv(gluiFragmentShaderId, GL_COMPILE_STATUS, &Result);
glGetShaderiv(gluiFragmentShaderId, GL_INFO_LOG_LENGTH, &InfoLogLength);
std::vector<char> FragmentShaderErrorMessage(InfoLogLength);
glGetShaderInfoLog(gluiFragmentShaderId, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
strInfoLog = std::string(&FragmentShaderErrorMessage[0]);
GLuint gluiProgramId = glCreateProgram();
glAttachShader(gluiProgramId, gluiVertexShaderId);
glAttachShader(gluiProgramId, gluiFragmentShaderId);
glLinkProgram(gluiProgramId);
Result = GL_FALSE;
glGetProgramiv(gluiProgramId, GL_LINK_STATUS, &Result);
glGetProgramiv(gluiProgramId, GL_INFO_LOG_LENGTH, &InfoLogLength);
std::vector<char> ProgramErrorMessage( std::max(InfoLogLength, int(1)) );
glGetProgramInfoLog(gluiProgramId, InfoLogLength, NULL, &ProgramErrorMessage[0]);
strInfoLog = std::string(&ProgramErrorMessage[0]);
glDeleteShader(gluiVertexShaderId);
glDeleteShader(gluiFragmentShaderId);
return gluiProgramId;
}
void addQuadToLocalVerticeArray(Vertex * ptrVertexArrayLocal, Quad *quad, int *ptrQuadVerticeCounter)
{
//Links oben
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 1.0f;
++(*ptrQuadVerticeCounter);
//Links unten
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y - quad->height;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 0.0f;
++(*ptrQuadVerticeCounter);
//Rechts unten
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x + quad->width;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y - quad->height;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 1.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 0.0f;
++(*ptrQuadVerticeCounter);
//Rechts unten
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x + quad->width;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y - quad->height;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 1.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 0.0f;
++(*ptrQuadVerticeCounter);
//Rechts oben
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x + quad->width;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 1.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 1.0f;
++(*ptrQuadVerticeCounter);
//Links oben
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 1.0f;
++(*ptrQuadVerticeCounter);
}
#定义GLEW#U静态
#包括“glew.h”
#包括“glfw.h”
#包括“glm.hpp”
#包括“glm/gtc/matrix_transform.hpp”
#包括“glm/gtx/transform.hpp”
#包括
#包括
#包括
#定义缓冲区偏移量(i)((字符*)NULL+(i))
std::of流记录器(“Log\\Ausgabe.txt”,(std::ios::out | std::ios::app));
类顶点
{
公众:
浮动x;
浮动y;
浮动z;
浮动发送;
浮雕;
};
四年级
{
公众:
浮动x;
浮动y;
浮动宽度;
浮动高度;
};
int gethighrestimeinmillides(bool bFirstRun);
GLuint buildShader();
void addQuadToLocalVerticeArray(顶点*ptrvertextarraylocal,四元*Quad,int*iQuadCounter);
int main()
{
记录器您的CPU/GPU型号、使用情况和FPS是什么?从前面的问题判断,您的问题可能在其他地方。我不太习惯OpenGl。我目前显示约300个纹理(我猜是数字),一个循环周期需要20-60ms。我的CPU使用情况(从一个核心)始终是100%。我不知道我的GPU有多少功能,CPU有多少,因为我还是一个初学者。你需要一个简单的例子来说明我如何在屏幕上打印东西吗?你的CPU和GPU型号是什么?渲染300个纹理四边形对10岁的GPUsIntel Core 2 Duo CPU@2.5GHz,NVIDIA GeForce 9300M GS(256MB)来说也是轻而易举的事。不是最好的,我知道,但必须有一个简单的方法来加速游戏。我不会渲染带有高清纹理和大量粒子效果、照明和物理的3D地图。我只想显示一个20*15场的关卡。这意味着你的代码中有一些非常愚蠢的东西,阻碍了性能。抱歉,这不是一个解决问题的地方如此广泛的问题。这是为了更具体的问题。