2014-02-20 273 views
2

我很好奇,看到使用OpenGL配置紋理上傳的性能,並注意到我認爲很奇怪的事情。我使用glTexStorage2D創建了一個4K紋理,格式爲GL_RGBA8。然後,我使用每個幀glTexSubImage2D重新上傳靜態圖像緩衝區到紋理。基於幀速率,我得到大約5.19GB/s。接下來,我將紋理格式更改爲GL_SRGB8_ALPHA8並重新嘗試實驗。這一次我得到了2.81GB/s,顯着下降。這看起來很奇怪,因爲據我所知,上傳sRGB數據和上傳RGB數據應該沒有什麼不同,因爲沒有應該發生的轉換(在採樣過程中,sRGB轉換應該在着色器中進行)。glTexSubImage2D性能怪異

一些附加信息。對於第一次測試,我在撥打glTexSubImage2D時使用GL_RGBAGL_UNSIGNED_INT_8_8_8_8_REV,因爲這是驅動程序(通過glGetInternalformativ)告訴我的理想選擇。根據司機的建議,我使用GL_UNSIGNED_INT_8_8_8_8進行第二次測試。一些測試證實這些是分別使用的最快格式。這是使用332.21驅動程序在Windows 7 x64上使用Nvidia GeForce GTX 760。

#include <GL/glew.h> 
#include <GLFW/glfw3.h> 
#include <vector> 
#include <cstdlib> 
#include <cstdio> 

#define SCREEN_SIZE_X 1024 
#define SCREEN_SIZE_Y 1024 

#define GLSL(src) "#version 440 core\n" #src 

const char* vertex_shader = GLSL(
    const vec2 data[4] = vec2[] 
    (
     vec2(-1.0, 1.0), 
     vec2(-1.0, -1.0), 
     vec2(1.0, 1.0), 
     vec2(1.0, -1.0) 
    ); 

    void main() 
    { 
     gl_Position = vec4(data[gl_VertexID], 0.0, 1.0); 
    } 
); 

const char* fragment_shader = GLSL(
    layout(location = 0) uniform sampler2D texture0; 
    layout(location = 1) uniform vec2 screenSize; 
    out vec4 frag_color; 

    void main() 
    { 
     frag_color = texture(texture0, gl_FragCoord.xy/screenSize); 
    } 
); 

int main(int argc, char *argv[]) 
{ 
    if(!glfwInit()) 
     exit(EXIT_FAILURE); 

    glfwWindowHint(GLFW_RESIZABLE, GL_FALSE); 
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4); 
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 4); 
    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 
    glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); 

    GLFWwindow* window = glfwCreateWindow(SCREEN_SIZE_X, SCREEN_SIZE_Y, "OpenGL Texture Upload", nullptr, nullptr); 

    if(!window) 
    { 
     glfwTerminate(); 
     exit(EXIT_FAILURE); 
    } 

    glfwMakeContextCurrent(window); 
    glfwSwapInterval(0); 

    glewExperimental = GL_TRUE; 

    if(glewInit() != GLEW_OK) 
    { 
     glfwTerminate(); 
     exit(EXIT_FAILURE); 
    } 

    GLuint vao = 0; 
    glGenVertexArrays(1, &vao); 
    glBindVertexArray(vao); 

    GLuint vs = glCreateShader(GL_VERTEX_SHADER); 
    glShaderSource(vs, 1, &vertex_shader, nullptr); 
    glCompileShader(vs); 

    GLuint fs = glCreateShader(GL_FRAGMENT_SHADER); 
    glShaderSource(fs, 1, &fragment_shader, nullptr); 
    glCompileShader(fs); 

    GLuint shader_program = glCreateProgram(); 
    glAttachShader(shader_program, fs); 
    glAttachShader(shader_program, vs); 
    glLinkProgram(shader_program); 
    glUseProgram(shader_program); 

    glProgramUniform2f(shader_program, 1, SCREEN_SIZE_X, SCREEN_SIZE_Y); 

    GLuint texture = 0; 
    glGenTextures(1, &texture); 
#ifdef USE_SRGB 
    glTextureStorage2DEXT(texture, GL_TEXTURE_2D, 1, GL_SRGB8_ALPHA8, 4096, 4096); 
#else 
    glTextureStorage2DEXT(texture, GL_TEXTURE_2D, 1, GL_RGBA8, 4096, 4096); 
#endif 
    glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 
    glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 
    glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 
    glTextureParameteriEXT(texture, GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 
    glBindMultiTextureEXT(GL_TEXTURE0, GL_TEXTURE_2D, texture); 
    glProgramUniform1i(shader_program, 0, 0); 

    std::vector<unsigned int> image_buffer(4096*4096, 0xFF0000FFul); 

    double lastTime = glfwGetTime(); 
    double nbFrames = 0; 

    while(!glfwWindowShouldClose(window)) 
    { 
     double currentTime = glfwGetTime(); 
     nbFrames++; 
     if (currentTime - lastTime >= 1.0) 
     { 
      char cbuffer[50]; 
      snprintf(cbuffer, sizeof(cbuffer), "OpenGL Texture Upload [%.1f fps, %.3f ms]", nbFrames, 1000.0/nbFrames); 
      glfwSetWindowTitle(window, cbuffer); 
      nbFrames = 0; 
      lastTime++; 
     } 
#ifdef USE_SRGB 
     glTextureSubImage2DEXT(texture, GL_TEXTURE_2D, 0, 0, 0, 4096, 4096, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, image_buffer.data()); 
#else 
     glTextureSubImage2DEXT(texture, GL_TEXTURE_2D, 0, 0, 0, 4096, 4096, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, image_buffer.data()); 
#endif 
     glClear(GL_COLOR_BUFFER_BIT); 
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 
     glfwSwapBuffers(window); 
     glfwPollEvents(); 
    } 

    glfwDestroyWindow(window); 
    glfwTerminate(); 
    exit(EXIT_SUCCESS); 
} 

回答

0

顯然有這樣的東西作爲'原生像素格式'。看看this link from Nvidia,特別是第32.1.3節。

+0

這不是解釋。首先,'glGetInternalformativ'在兩種情況下推薦'GL_RGBA',而不是'GL_BGRA'。其次,實驗表明,在任何情況下'GL_BGRA'都不會更快。 –