2012-03-23 52 views
1

我在CUDA中實現圖像旋轉時遇到了問題。我有一個非常簡單的旋轉功能的工作如下:CUDA Image Rotation

__device__ float readPixVal(float* ImgSrc,int ImgWidth,int x,int y) 
{ 
    return (float)ImgSrc[y*ImgWidth+x]; 
} 
__device__ void putPixVal(float* ImgSrc,int ImgWidth,int x,int y, float floatVal) 
{ 
    ImgSrc[y*ImgWidth+x] = floatVal; 
} 

__global__ void Rotate(float* Source, float* Destination, int sizeX, int sizeY, float deg) 
{ 
    int i = blockIdx.x * blockDim.x + threadIdx.x;// Kernel definition 
    int j = blockIdx.y * blockDim.y + threadIdx.y; 

    if(i < sizeX && j < sizeY) 
    { 
     putPixVal(Destination, sizeX, ((float)i)*cos(deg) - ((float)j)*sin(deg), ((float)i)*sin(deg) + ((float)j)*cos(deg)), readPixVal(Source, sizeX, i, j)); 
    } 
} 

的問題是,我不知道該怎麼做任何的插值。通過以上,由於整數舍入,許多像素被跳過。任何人都知道如何解決這個問題,或者是否有任何圖像旋轉的免費/開源實現?我找不到任何CUDA。

+1

請記住,該顯卡擁有圖形管線做旋轉和其它線性變換快速的硬件。爲了利用它,使用CUDA來實現OpenGL或DirectX互操作性。 – 2012-03-23 03:43:34

回答

0

這似乎這樣的伎倆

__global__ void Rotate(float* Source, float* Destination, int sizeX, int sizeY, float deg) 
{ 
    int i = blockIdx.x * blockDim.x + threadIdx.x;// Kernel definition 
    int j = blockIdx.y * blockDim.y + threadIdx.y; 
    int xc = sizeX - sizeX/2; 
    int yc = sizeY - sizeY/2; 
    int newx = ((float)i-xc)*cos(deg) - ((float)j-yc)*sin(deg) + xc; 
    int newy = ((float)i-xc)*sin(deg) + ((float)j-yc)*cos(deg) + yc; 
    if (newx >= 0 && newx < sizeX && newy >= 0 && newy < sizeY) 
    { 
     putPixVal(Destination, sizeX, i , j, readPixVal(Source, sizeX, newx, newy)); 
    } 
} 
5

通常在這類圖像處理中,您可以遍歷所有目標像素位置,計算源圖像中相應的像素(或插值像素組)。

這可確保您均勻一致地填充通常所關注的結果圖像。

+0

你讓我更好地實現了thx – 2012-03-23 03:05:21

2
void rotateImage_Kernel(cufftComplex* trg, const cufftComplex* src, const unsigned int imageWidth,const unsigned int imageHeight, const float angle, const float scale) 
{ 
    // compute thread dimension 
    const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; 
    const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; 

    //// compute target address 
    const unsigned int idx = x + y * imageWidth; 

    const int xA = (x - imageWidth/2); 
    const int yA = (y - imageHeight/2); 

    const int xR = (int)floor(1.0f/scale * (xA * cos(angle) - yA * sin(angle))); 
    const int yR = (int)floor(1.0f/scale * (xA * sin(angle) + yA * cos(angle))); 

    float src_x = xR + imageWidth/2; 
    float src_y = yR + imageHeight/2; 



    if (src_x >= 0.0f && src_x < imageWidth && src_y >= 0.0f && src_y < imageHeight) { 
     // BI - LINEAR INTERPOLATION 
     float src_x0 = (float)(int)(src_x); 
     float src_x1 = (src_x0+1); 
     float src_y0 = (float)(int)(src_y); 
     float src_y1 = (src_y0+1); 

     float sx = (src_x-src_x0); 
     float sy = (src_y-src_y0); 


     int idx_src00 = min(max(0.0f,src_x0 + src_y0 * imageWidth),imageWidth*imageHeight-1.0f); 
     int idx_src10 = min(max(0.0f,src_x1 + src_y0 * imageWidth),imageWidth*imageHeight-1.0f); 
     int idx_src01 = min(max(0.0f,src_x0 + src_y1 * imageWidth),imageWidth*imageHeight-1.0f); 
     int idx_src11 = min(max(0.0f,src_x1 + src_y1 * imageWidth),imageWidth*imageHeight-1.0f); 

     trg[idx].y = 0.0f; 

     trg[idx].x = (1.0f-sx)*(1.0f-sy)*src[idx_src00].x; 
     trg[idx].x += ( sx)*(1.0f-sy)*src[idx_src10].x; 
     trg[idx].x += (1.0f-sx)*( sy)*src[idx_src01].x; 
     trg[idx].x += ( sx)*( sy)*src[idx_src11].x; 
    } else { 
     trg[idx].x = 0.0f; 
     trg[idx].y = 0.0f; 
    } 

    DEVICE_METHODE_LAST_COMMAND; 

} 


void translateImage_Kernel(cufftComplex* trg, const cufftComplex* src, const unsigned int imageWidth, const unsigned int imageHeight, const float tX, const float tY) 
{ 
    // compute thread dimension 
    const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; 
    const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; 

    //// compute target address 
    const unsigned int idx = x + y * imageWidth; 

    const int xB = ((int)x + (int)tX); 
    const int yB = ((int)y + (int)tY); 

    if (xB >= 0 && xB < imageWidth && yB >= 0 && yB < imageHeight) { 
     trg[idx] = src[xB + yB * imageWidth]; 
    } else { 
     trg[idx].x = 0.0f; 
     trg[idx].y = 0.0f; 
    } 

    DEVICE_METHODE_LAST_COMMAND; 

}