在一個非常簡單的Cuda和C++程序上需要幫助

我是Cuda編程的新手。我試圖創建一個簡單的cuda和cpp圖像處理程序，以改變圖像的亮度，飽和度和對比度等。我開始使用一個非常簡單的函數，只是爲了改變圖像的亮度，將所有RGB分量具有alpha值的圖像。在一個非常簡單的Cuda和C++程序上需要幫助

這是我的CPP項目：

#include <cutil_inline.h> 
#include <cutil_gl_inline.h> 

#include <cuda_runtime_api.h> 
#include <cuda_gl_interop.h> 

using namespace std; 

struct ImageData { 
    unsigned char *data; /* Points to large array of R,G,B-order data */ 
    int height; 
    int width; 
}; 

ImageData imageData; 
float *imageResult; // to store the image result from cuda after running the kernel 
unsigned char *d_Input; 
unsigned char *d_Output; 
unsigned char *h_Output; 

// These are CUDA functions to handle allocation and launching the kernels 
extern "C" void initInput(unsigned char *data, unsigned char **device, unsigned int size); 
extern "C" void filter(unsigned char *d_src, unsigned char *d_dest, int width, int height, int filterMode, 
       float alpha, float contrast, float saturation, bool use_array); 

void initCuda() 
{ 
     unsigned int size = imageData.width * imageData.height * 3 * sizeof(unsigned char); 
     cutilSafeCall(cudaMalloc ((void**) &d_Input, size)); // allocate storage for device image input 
     cutilSafeCall(cudaMalloc ((void**) &d_Output, size)); // allocate storage for device image output 
    initInput(imageData.data, &d_Input, size); 
} 

int main() { 


    loadPPMImageData((char *)"boxes.ppm", &imageData); //this function is defined in another file 
    cudaGLSetGLDevice(0); 
    initCuda(); 
    filter( d_Input, d_Output, imageData.width, imageData.height, 1, 0.8, 1.0, 1.0, 1); 

    cutilSafeCall(cudaMemcpy(h_Output, d_Output, size, cudaMemcpyDeviceToHost)); // copy output data from device to host 
    //print the output 
    for (int i = 0; i < imageData.size; i++) { 
     cout << d_Output 
    } 
    // do some memory cleanups 

    //done 
    return 0 
}

這是我kernel.cu文件：

#include <iostream> 
#include <cstdlib> 
#include <string> 
#include <cmath> 

#include <shrUtils.h> 
#include <cutil_inline.h> 
#include <cutil_math.h> 


//Kernel function 
__global__ void 
applyAlpha(unsigned char* input, unsigned char* output, int width, int height, float alpha) 
{ 
// calculate normalized coordinates 
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; 
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; 
output[ ((y * width + x) * 3) + 0] = (int) ((int)input [ ((y * width + x) * 3) + 0] * alpha); // r 
output[ ((y * width + x) * 3) + 1] = (int) ((int)input [ ((y * width + x) * 3) + 1] * alpha); // g 
output[ ((y * width + x) * 3) + 2] = (int) ((int)input [ ((y * width + x) * 3) + 2] * alpha); // b 
} 


extern "C" 
int iDivUp(int a, int b){ 
    return (a % b != 0) ? (a/b + 1) : (a/b); 
} 


extern "C" 
void initInput(unsigned char *data, unsigned char **deviceArray, unsigned int size) // /* image data, device pointer, etc */) 
{ 
    /* TODO: Array version DONE 
    * Initialize device memory for array version 
    * and cuda arrays for texture version here 
    */ 
    cutilSafeCall(cudaMemcpy(deviceArray, data, size, cudaMemcpyHostToDevice)); // copy image data from host to device (array version) 
    //TODO: Texture version 

} 


extern "C" 
void filter(unsigned char *d_src, unsigned char *d_dest, int width, int height, int filter_mode, float alpha, float contrast, float saturation, bool use_array) 
{ 
    /* TODO 
    * run different kernels for array and texture version 
    */ 

    dim3 dimBlock(16, 16, 1); 
    dim3 dimGrid(iDivUp (width, dimBlock.x), iDivUp(height, dimBlock.y), 1); 

    if (use_array) { // Array version 
     if (filter_mode == 1) { // filter mode: brightness (alpha) 

      applyAlpha<<< dimGrid, dimBlock >>>(d_src, d_dest, width, height, alpha); 

      // check if kernel execution generated an error 
      cutilCheckMsg("Kernel execution failed"); 

      cutilSafeCall(cutilDeviceSynchronize()); 
     } 
    } 
    else { //Texture Version 
     //not yet implemented 
    } 

}

//編輯 我已經修改上述文件的基礎上，安德魯的回答。不過現在我編譯它後得到了以下錯誤：

ld: warning: ignoring file kernel.o, file was built for i386 which is not the architecture being linked (x86_64) 
Undefined symbols for architecture x86_64: 
    "_initInput", referenced from: 
     initCuda() in CS380_prog4.o 
    "_filter", referenced from: 
     display()  in CS380_prog4.o 
    (maybe you meant: ___GLEW_SGIS_texture_filter4, ___GLEW_EXT_texture_filter_anisotropic , ___GLEW_NV_multisample_filter_hint) 
ld: symbol(s) not found for architecture x86_64 
collect2: ld returned 1 exit status 
make: *** [testprog] Error 1

我用在這兩個這些功能的「外部C」命令：initInput，和過濾。函數聲明（在test.cpp中）和定義（在kernel.cu中）也有相同的參數，但它仍然抱怨它找不到該函數。我如何解決這個鏈接問題？

來源

2011-04-22 all_by_grace

什麼是你的操作系統的位和你安裝了哪個版本的cuda SDK？看來nvcc正在編譯三十二位，但你的C++編譯器是六十四位。最好的猜測是你的平臺有錯誤的Sdk。此外，將__host__和__device__關鍵字放在cu文件的方法中是一個好主意。 – asm 2011-04-23 13:26:51

我現在在使用nvcc編譯器的機器上，看起來你可以告訴它使用-m選項編譯爲64位或32位。如果你運行nvcc --help並查看--machine選項，它會告訴你默認是什麼。我會檢查這個，看看默認值是否與你的操作系統的位數不同。 – asm 2011-04-23 16:52:00

您正在將.cu文件直接包含在.cpp文件中，該文件將內容有效地複製到.cpp文件中。 nvcc將使用一個標準的C++編譯器來編譯它（unix平臺上的g ++），它不知道任何Cuda語法的含義。

您必須編譯每一個的目標文件，然後用C++編譯器將它們鏈接，使得頭導出的功能，在.CU文件，你會爲標準C.以同樣的方式

來源

2011-04-22 21:51:12 asm

嗨安德魯，我已經刪除了包含行，因爲你建議，現在我有一個鏈接錯誤。即使我在cpp文件中使用了「extern C」，C++程序也找不到在內核中定義的一些函數的定義。你有什麼建議嗎？謝謝 – 2011-04-22 22:07:37

有一個很好的例子在cuda-grayscale。

它用於在CUDA 3.1上進行編譯。在那裏有一個Makefile，在它的頂部。

CXX=g++ 

CUDA_INSTALL_PATH=/usr/local/cuda 
CFLAGS= -I. -I$(CUDA_INSTALL_PATH)/include `pkg-config --cflags opencv` 
LDFLAGS= -L$(CUDA_INSTALL_PATH)/lib -lcudart `pkg-config --libs opencv`  

all: 
     $(CXX) $(CFLAGS) -c main.cpp -o Debug/main.o 
     nvcc $(CUDAFLAGS) -c kernel_gpu.cu -o Debug/kernel_gpu.o 
     $(CXX) $(LDFLAGS) Debug/main.o Debug/kernel_gpu.o -o Debug/grayscale 

clean: 
     rm -f Debug/*.o Debug/grayscale

來源

2011-04-22 23:19:08 karlphillip

在一個非常簡單的Cuda和C++程序上需要幫助

回答

相關問題