2014-07-10 59 views
5

分離偶數和奇數我有一個數字爲{1,2,3,4,5,6,7,8,9,10}的數組,我想即使分開和奇數號碼爲:在CUDA

even = {2,4,6,8} 

和:

odd = {1,3,5,7} 

我所知道的原子在CUDA中進行操作,同時也意識到輸出不會受到競爭條件的影響。我不想使用原子操作。如何在不使用原子關鍵字的情況下實現此目標

CODE:

#include <stdio.h> 
#include <cuda.h> 

// Kernel that executes on the CUDA device 
__global__ void square_array(float *total,float *even,float *odd, int N) 
{ 
    int idx = blockIdx.x * blockDim.x + threadIdx.x; 
    int a=total[idx]; 
    if ((a%2)==0) 
    { 
    for (int i=0;i<=idx;i++) 
    { 
     int b = even[i]; 
     if(b==0) 
     { 
      even[i] = total[idx]; 
      break; 

     } 
    } 
    } 
    else 
     { 
    for (int i=0;i<idx;i++) 
    { 
     int c = odd[i]; 

      odd[i] = total[idx]; 
      break; 
    } 
    } 
} 

// main routine that executes on the host 
int main(void) 
{ 
    float *total_h,*even_h, *odd_h,*total_d, *even_d,*odd_d; // Pointer to host & device arrays 
    const int N = 10; // Number of elements in arrays 
    size_t size = N * sizeof(float); 


    total_h = (float *)malloc(size); // Allocate array on host 
    even_h = (float *)malloc(size); // Allocate array on host 
    odd_h = (float *)malloc(size); // Allocate array on host 

    cudaMalloc((void **) &total_d, size); 
    cudaMalloc((void **) &even_d, size); 
    cudaMemset(even_d,0,size);   
    cudaMalloc((void **) &odd_d, size); // Allocate array on device 
    cudaMemset(odd_d,0,size); 


    // Initialize host array and copy it to CUDA device 
    for (int i=0; i<N; i++) total_h[i] = (float)i+1; 
    cudaMemcpy(total_d, total_h, size, cudaMemcpyHostToDevice); 
    // Do calculation on device: 

    square_array <<< 1,10 >>> (total_d,even_d,odd_d, N); 
    // Retrieve result from device and store it in host array 

    cudaMemcpy(even_h, even_d, sizeof(float)*N, cudaMemcpyDeviceToHost); 
    cudaMemcpy(odd_h, odd_d, sizeof(float)*N, cudaMemcpyDeviceToHost); 

    // Print results 
    printf("total Numbers\n"); 
    for (int i=0; i<N; i++) printf("%f\n",total_h[i]); 

    printf("EVEN Numbers\n"); 
    for (int i=0; i<N; i++) printf("%f\n",even_h[i]); 

    printf("ODD Numbers\n"); 
    for (int i=0; i<N; i++) printf("%f\n",odd_h[i]); 
    // Cleanup 
    free(total_h); 
    free(even_h); 
    free(odd_h); 


    cudaFree(total_d); 
    cudaFree(even_d); 
    cudaFree(odd_d); 
} 

OUTPUT: enter image description here

+4

使用'thrust :: partition'或'thrust :: partition_copy' –

回答

3

至於建議由賈裏德Hoberock,它會更容易使用的推力,而不是開始在CUDA提供的有效分區算法開發你自己的分區程序。下面,請找到一個完整的工作示例。

#include <thrust\device_vector.h> 
#include <thrust\partition.h> 
#include <thrust\execution_policy.h> 

struct is_even { __host__ __device__ bool operator()(const int &x) { return (x % 2) == 0; } }; 

void main() { 

    const int N = 10; 

    thrust::host_vector<int> h_data(N); 
    for (int i=0; i<N; i++) h_data[i] = i; 

    thrust::device_vector<int> d_data(h_data); 
    thrust::device_vector<int> d_evens(N/2); 
    thrust::device_vector<int> d_odds(N/2); 

    thrust::partition_copy(d_data.begin(), d_data.end(), d_evens.begin(), d_odds.begin(), is_even()); 

    printf("Even numbers\n"); 
    for (int i=0; i<N/2; i++) { 
     int val = d_evens[i]; 
     printf("evens[%i] = %i\n",i,val); 
    } 

    printf("Odd numbers\n"); 
    for (int i=0; i<N/2; i++) { 
     int val = d_odds[i]; 
     printf("odds[%i] = %i\n",i,val); 
    } 

}