直接從設備（視頻）顯示當前窗口中的位圖內存

假設我已在設備（視頻）內存中使用cudaMalloc()和cudaMemcpy()的位圖（圖像）。直接從設備（視頻）顯示當前窗口中的位圖內存

直接在屏幕上顯示此位圖（當前窗口）的最簡單方法是什麼？

我猜應該有一個替代GDI的BitBlt() ......但任何方法（Direct3D，OpenGL甚至GDI）都可以。

CUDA SDK（OpenGL版本）中的示例使用Texture，它使用緩衝區（像素緩衝區對象）來保存數據，該數據註冊爲CUDA資源，每幀應該映射/未映射（glutDisplayFunc()調用）。而這一切似乎只是一點點複雜和不必要的。

2011-12-29 AlexP

「而這一切似乎只是一點點複雜和不必要的。」強硬;它就是這樣兒的。 CUDA無法繪製任何內容。 CUDA住在它自己的小世界裏。要繪製一些東西，您需要將數據從CUDA的世界中抽取出來，然後放入能夠真正繪製事物的世界中。將CUDA數據直接生成爲OpenGL或D3D對象會更好。 – 2011-12-29 19:59:05

是的，我知道CUDA僅適用於計算。如果你喜歡，你可以忘記CUDA，因爲主要的是我們有一個線性定位的設備內存地址，它保存位圖。我所需要的只是將這個位圖放置到Framebuffer。我猜測Direct2D可能是其中一種解決方案，但不幸的是它僅在Windows 7/Vista中受到支持。 – AlexP 2011-12-29 20:33:17

我很確定Direct2D沒有在blitting函數中指向「線性定位設備內存」。它需要代表這些指針的對象。這就是爲什麼轉換層是必要的;大多數繪圖API不僅讓你有指向GPU內存的指針，並從它們blit。即使他們這樣做，也不能保證CUDA指針是其他API將使用的指針。 – 2011-12-29 20:38:27

好的，會試着回答我自己的問題。看起來像通過OpenGL顯示位圖有一個更簡單的方法。我們可以使用支持紋理格式GL_RGBA8（自CUDA Toolkit 4.0以來）的cudaGraphicsGLRegisterImage()，而不是使用紋理+像素緩衝區對象。

爲簡單起見，示例沒有cutilSafeCall()或錯誤檢查。我對OpenGL知之甚少，很高興聽到建議。

#include <stdio.h> 
#include <string.h> 

#include <GL/glew.h> 
#include <GL/freeglut.h> 

#include <cuda_runtime_api.h> 
#include <cuda_gl_interop.h> 

/* Handles OpenGL-CUDA exchange. */ 
cudaGraphicsResource *cuda_texture; 

/* Registers (+ resizes) CUDA-Texture (aka Renderbuffer). */ 
void resizeTexture(int w, int h) { 
    static GLuint gl_texture = 0; 

    /* Delete old CUDA-Texture. */ 
    if (gl_texture) { 
     cudaGraphicsUnregisterResource(cuda_texture); 
     glDeleteTextures(1, &gl_texture); 
    } else glEnable(GL_TEXTURE_2D); 

    glGenTextures(1, &gl_texture); 
    glBindTexture(GL_TEXTURE_2D, gl_texture); 

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); 

    cudaGraphicsGLRegisterImage(&cuda_texture, gl_texture, GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard); 
} 

void updateMemDevice(cudaArray *memDevice, int w, int h) { 
    struct Color { 
     unsigned char r, g, b, a; 
    } *memHost = new Color[w*h]; 

    memset(memHost, 128, w*h*4); 
    for (int y = 0; y<h; ++y) { 
     for (int x = 0; x<w; ++x) { 
      Color &c = memHost[y*w + x]; 
      c.r = c.b = 255 * x/w; 
     } 
    } 
    cudaMemcpyToArray(memDevice, 0, 0, memHost, w*h*4, cudaMemcpyHostToDevice); 
    delete [] memHost; 
} 

void editTexture(int w, int h) { 
    cudaGraphicsMapResources(1, &cuda_texture); 
    cudaArray* memDevice; 
    cudaGraphicsSubResourceGetMappedArray(&memDevice, cuda_texture, 0, 0); 
    updateMemDevice(memDevice, w, h); 
    cudaGraphicsUnmapResources(1, &cuda_texture); 
} 


void windowResizeFunc(int w, int h) { 
    glViewport(-w, -h, w*2, h*2); 
} 

void displayFunc() { 
    glBegin(GL_QUADS); 
    glTexCoord2i(0, 0); glVertex2i(0, 0); 
    glTexCoord2i(1, 0); glVertex2i(1, 0); 
    glTexCoord2i(1, 1); glVertex2i(1, 1); 
    glTexCoord2i(0, 1); glVertex2i(0, 1); 
    glEnd(); 

    glFlush(); 
} 


int main(int argc, char *argv[]) { 
    /* Initialize OpenGL context. */ 
    glutInit(&argc, argv); 
    glutInitDisplayMode(GLUT_RGB); 
    glutInitWindowSize(400, 300); 
    glutCreateWindow("Bitmap in Device Memory"); 
    glutReshapeFunc(windowResizeFunc); 
    glutDisplayFunc(displayFunc); 

    glewInit(); 

    cudaGLSetGLDevice(0); 

    int width = 5, height = 5; 
    resizeTexture(width, height); 
    editTexture(width, height); 

    glutMainLoop(); 
    return 0; 
}

來源

2011-12-30 20:24:57 AlexP

這比僅僅將圖像複製到系統內存並顯示它更快嗎？ – 2013-02-08 18:26:52

一般來說，只要我們不調用updateMemDevice（）（或儘可能少地調用它） - 是的，這是更快的。否則，updateMemDevice（）將圖像從系統內存（momHost）複製到設備的（memDevice）。系統存儲器中的圖像將被複制到設備的存儲器中，無論我們是否要求它，以便顯示，因爲它是視頻卡的工作。然而，最快的方法是通過使用CUDA/OpenCL在設備的內存中生成圖像，從而避免使用系統內存。 – AlexP 2013-02-08 20:17:55

直接從設備（視頻）顯示當前窗口中的位圖內存

回答

相關問題