-1
我寫了一個關於字符串比較的簡單代碼。代碼如下所示。 這很簡單。只要比較字符串a和字符串b,如果相應的元素是 相同,則將5賦值給新的矩陣s;如果相應的元素不同,那麼 賦值-3給新的矩陣s。沒有編譯錯誤。但結果不是我期望的 。請給我一些有用的建議。謝謝!CUDA:字符串比較
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include "book.h"
#define M 6
#define BLOCK_SIZE 30 // maximum 1024 threads per block
#define GRID_SIZE 30 // 900 blocks per grid
#define P (900 * 900)
void Init();
char *gpu_a;
char *gpu_b;
float *gpu_s;
float *cpu_s;
char cpu_a[6] = {'A', 'T', 'G', 'C', 'G', 'T'};
char cpu_b[6] = {'G', 'T', 'G', 'A', 'T', 'G'};
void cpu_Allocate1dArray()
{
//cpu_a = (char*) malloc(M * sizeof(char));
//cpu_b = (char*) malloc(M * sizeof(char));
cpu_s = (float*) malloc(M * sizeof(float));
}
void gpu_Allocate1dArray()
{
cudaMalloc((void**)&gpu_a, M * sizeof(char));
cudaMalloc((void**)&gpu_b, M * sizeof(char));
cudaMalloc((void**)&gpu_s, M * sizeof(float));
}
__global__ void mykernel(char *gpu_a, char *gpu_b, float *gpu_s)
{
int i , j , tid;
i = threadIdx.x + blockIdx.x * blockDim.x;
j = threadIdx.y + blockIdx.y * blockDim.y;
tid = i + j * blockDim.x * gridDim.x;
if (tid < P)
{
if(gpu_a[i] == gpu_b[j])
{
gpu_s[tid] = 5;
}
else
gpu_s[tid] = -3;
}
}
int main()
{
int q;
cpu_Allocate1dArray();
gpu_Allocate1dArray();
Init();
dim3 gridDim;
dim3 blockDim;
blockDim.x = blockDim.y = BLOCK_SIZE;
gridDim.x = gridDim.y = GRID_SIZE;
cudaMemcpy(gpu_a, cpu_a, sizeof(char) * M, cudaMemcpyHostToDevice);
cudaMemcpy(gpu_b, cpu_b, sizeof(char) * M, cudaMemcpyHostToDevice);
mykernel<<<gridDim, blockDim>>>(gpu_a, gpu_b, gpu_s);
cudaMemcpy(cpu_s, gpu_s, sizeof(float)* M, cudaMemcpyDeviceToHost);
for (q = 0; q < M; q++)
printf("%f ", cpu_s[q]);
printf("\n");
//Free device memory
free(cpu_s);
cudaFree(gpu_s);
cudaFree(gpu_a);
cudaFree(gpu_b);
return 0;
}
void Init()
{
int i;
for (i = 0; i < M; i++)
cpu_s[i] = 0;
}
結果是: [史密斯@服務器] $ ./test88.exe 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
在每個cuda調用(cudaMemcpy,cudaMalloc,內核調用等)上執行[cuda錯誤檢查](http://stackoverflow.com/tags/cuda/info)總是一個好主意。失敗。 –