2016-04-19 23 views
1

我一直在試圖運行計算着色器 - 前綴和演示在提供:glMapBufferRange僅映射4個值中的1個。爲什麼?

https://github.com/openglsuperbible/sb7code/blob/master/src/prefixsum/prefixsum.cpp

我使用的確切代碼:

#define NUM_ELEMENTS 2048 

float random_float() 
{ 
    static unsigned int seed = 0x13371337; 

    float res; 
    unsigned int tmp; 

    seed *= 16807; 

    tmp = seed^(seed >> 4)^(seed << 15); 

    *((unsigned int *)&res) = (tmp >> 9) | 0x3F800000; 

    return (res - 1.0f); 
} 

static int PrefixSum(int programHandle) 
{ 
    GLuint data_buffer[2]; 

    float input_data[NUM_ELEMENTS]; 
    float output_data[NUM_ELEMENTS]; 

    glGenBuffers(2, data_buffer); 

    glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[0]); 
    glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_DRAW); 

    glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[1]); 
    glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_COPY); 

    int i; 

    for (i = 0; i < NUM_ELEMENTS; i++) 
    { 
     input_data[i] = random_float(); 
    } 

    glShaderStorageBlockBinding(programHandle, 0, 0); 
    glShaderStorageBlockBinding(programHandle, 1, 1); 

    float * ptr; 

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[0], 0, sizeof(float) * NUM_ELEMENTS); 
    glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, input_data); 

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS); 

    glUseProgram(programHandle); 
    glDispatchCompute(1, 1, 1); 

    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 
    glFinish(); 

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS); 
    ptr = (float *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, GL_MAP_READ_BIT); 

    char buffer[1024]; 
    sprintf(buffer, "SUM: %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f " 
    "%2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f", 
    ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7], 
    ptr[8], ptr[9], ptr[10], ptr[11], ptr[12], ptr[13], ptr[14], ptr[15]); 

    glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); 
} 

這是着色器:

#version 430 core 

layout (local_size_x = 1024) in; 

layout (binding = 0) coherent readonly buffer block1 
{ 
    float input_data[gl_WorkGroupSize.x]; 
}; 

layout (binding = 1) coherent writeonly buffer block2 
{ 
    float output_data[gl_WorkGroupSize.x]; 
}; 

shared float shared_data[gl_WorkGroupSize.x * 2]; 

void main(void) 
{ 
    uint id = gl_LocalInvocationID.x; 
    uint rd_id; 
    uint wr_id; 
    uint mask; 

    const uint steps = uint(log2(gl_WorkGroupSize.x)) + 1; 
    uint step = 0; 

    shared_data[id * 2] = input_data[id * 2]; 
    shared_data[id * 2 + 1] = input_data[id * 2 + 1]; 

    barrier(); 

    for (step = 0; step < steps; step++) 
    { 
     mask = (1 << step) - 1; 
     rd_id = ((id >> step) << (step + 1)) + mask; 
     wr_id = rd_id + 1 + (id & mask); 

     shared_data[wr_id] += shared_data[rd_id]; 

     barrier(); 
    } 

    output_data[id * 2] = shared_data[id * 2]; 
    output_data[id * 2 + 1] = shared_data[id * 2 + 1]; 
} 

問題是,輸出寫入4個位置中的1個:

SUM: 0.70 0.00 0.00 0.00 1.69 0.00 0.00 0.00 1.81 0.00 0.00 0.00 2.59 0.00 0.00 0.00 

這是輸入:

[0] 0.700959682 float 
    [1] 0.837353945 float 
    [2] 0.403481007 float 
    [3] 0.856583834 float 
    [4] 0.993326187 float 
    [5] 0.727316380 float 
    [6] 0.768217087 float 
    [7] 0.0675410032 float 
    [8] 0.112720609 float 
    [9] 0.703838706 float 
    [10] 0.365846157 float 
    [11] 0.504367113 float 
    [12] 0.778576016 float 
    [13] 0.217134356 float 
    [14] 0.944752693 float 
    [15] 0.575236082 float 
    [16] 0.795839429 float 
    [17] 0.707037449 float 
    [18] 0.181974053 float 
    [19] 0.745973587 float 
    [20] 0.281350732 float 
+0

FYI:我不相信你正在使用你的計算着色器SSBOs在需要'coherent'這樣的方式。這主要是因爲工作項目之間通過緩衝區進行交叉對話,並且所有的串擾都是通過共享內存進行的。這不是你的問題,但它可能會提高性能。 –

回答

1

解決:指定用於緩衝包裝標準解決了這個問題:

layout (std430, binding = 1) coherent writeonly buffer block2 
{ 
    float output_data[gl_WorkGroupSize.x]; 
}; 
1
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 

記憶障礙指定你打算如何寫後訪問對象,你怎麼不寫它。你將通過映射它來閱讀對象來閱讀,所以你應該這樣說。具體而言,您應該使用GL_BUFFER_UPDATE_BARRIER_BIT

另外:

glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS); 

這應該只是glBindBuffer(GL_SHADER_STORAGE_BUFFER)。您正在綁定它來映射它,而不是用於存儲操作。

+0

我按照你的建議修改了代碼,但並沒有解決問題。 – markwalberg

相關問題