2017-02-09 26 views
0

我正在嘗試編寫一個程序來將fortuna界面上的cusolverSp接口。儘管我對C編碼cuda並不陌生,但我不確定如何在Fortran中使用它。在Fortran上的CuSolver稀疏界面

以下是我的代碼:

! Fortran Console Application 
! 
    module cuda_cusolverSP 

    interface 

! cudaMalloc 
integer (c_int) function cudaMalloc (buffer, size) bind (C, name="cudaMalloc") 
    use iso_c_binding 
    implicit none 
    type (c_ptr) :: buffer 
    integer (c_size_t), value :: size 
end function cudaMalloc 

! cudaMemcpy 
integer (c_int) function cudaMemcpy (dst, src, count, kind) bind (C, name="cudaMemcpy") 
    ! note: cudaMemcpyHostToDevice = 1 
    ! note: cudaMemcpyDeviceToHost = 2 
    use iso_c_binding 
    type (C_PTR), value :: dst, src 
    integer (c_size_t), value :: count, kind 
end function cudaMemcpy 

! cudaFree 
integer (c_int) function cudaFree(buffer) bind(C, name="cudaFree") 
    use iso_c_binding 
    implicit none 
    type (C_PTR), value :: buffer 
end function cudaFree 

integer (c_int) function cudaMemGetInfo(fre, tot) bind(C, name="cudaMemGetInfo") 
    use iso_c_binding 
    implicit none 
    type(c_ptr),value :: fre 
    type(c_ptr),value :: tot 
end function cudaMemGetInfo 


integer(c_int) function cusolverSpCreate(cusolver_Hndl) bind(C,name="cusolverSpCreate") 

use iso_c_binding 
implicit none 

type(c_ptr)::cusolver_Hndl 

end function 

integer(c_int) function cusolverSpDestroy(cusolver_Hndl) bind(C,name="cusolverSpDestroy") 

use iso_c_binding 
implicit none 

type(c_ptr),value::cusolver_Hndl 

end function 

integer(c_int) function cusolverSpSgetrf_bufferSize(cusolver_Hndl,m,n,d_A,lda,Lwork) bind(C,name="cusolverSpSgetrf_bufferSize") 

    use iso_c_binding 
    implicit none 

    type(c_ptr),value::cusolver_Hndl 
    integer(c_int),value::m 
    integer(c_int),value::n 
    type(c_ptr),value::d_A 
    integer(c_int),value::lda 
    type(c_ptr),value::Lwork 
end function 

integer(c_int) function cusolverSpSgetrf(cusolver_Hndl,m,n,d_A,lda,d_WS,d_Ipiv,d_devInfo) bind(C, name="cusolverSpSgetrf") 

    use iso_c_binding 
    implicit none 
    type(c_ptr),value::cusolver_Hndl 
    integer(c_int),value::m 
    integer(c_int),value::n 
    type(c_ptr),value::d_A 
    integer(c_int),value::lda 
    type(c_ptr),value::d_WS 
    type(c_ptr),value::d_Ipiv 
    type(c_ptr),value::d_devInfo 

end function 

integer (c_int) function cusolverSpSgetrs(cusolver_Hndl,trans,n,nrhs,d_A,lda,d_Ipiv,d_B,ldb,d_devInfo) bind(C, name="cusolverSpSgetrs") 

    use iso_c_binding 
    implicit none 
    type(c_ptr),value::cusolver_Hndl 
    integer(c_int), value::trans 
    integer(c_int), value::n 
    integer(c_int), value::nrhs 
    type(c_ptr),value::d_A 
    integer(c_int), value::lda  
    type(c_ptr),value::d_Ipiv 
    type(c_ptr),value::d_B 
    integer(c_int),value::ldb 
    type(c_ptr),value::d_devInfo 

     end function 

    end interface 

    end module 

program prog 

use iso_c_binding 
use cuda_cusolverSP 

! ------ Matrix Definition & host CPU storage variables 

integer(c_int) rowsA ! number of rows of A 
integer(c_int) colsA ! number of columns of A 
integer(c_int) nnzA ! number of nonzeros of A 
integer(c_int) baseA ! base index in CSR format 

! CSR(A) from I/O <--- pointers to host CPU memory 
type(c_ptr) :: h_csrRowPtrA 
type(c_ptr) :: h_csrColIndA(:) 
type(c_ptr) :: h_csrValA(:) 

type(c_ptr) :: h_x ! x = A \ b 
type(c_ptr) :: h_b ! b = ones(m,1) 
type(c_ptr) :: h_r ! r = b - A*x 

type(c_ptr) :: h_Q ! <int> n 
        ! reorder to reduce zero fill-in 
        ! Q = symrcm(A) or Q = symamd(A) 
! B = Q*A*Q^T 
type(c_ptr) :: h_csrRowPtrB ! <int> n+1 
type(c_ptr) :: h_csrColIndB ! <int> nnzA 
type(c_ptr) :: h_csrValB ! <double> nnzA 
type(c_ptr) :: h_mapBfromA ! <int> nnzA 


integer size_perm 
type(c_ptr) :: buffer_cpu ! working space for permutation: B = Q*A*Q^T 


! -------------------- pointers to device memory  
type(c_ptr) :: d_csrRowPtrA 
type(c_ptr) :: d_csrColIndA 
type(c_ptr) :: d_csrValA 
type(c_ptr) :: d_x ! x = A \ b 
type(c_ptr) :: d_b ! a copy of h_b 
type(c_ptr) :: d_r ! r = b - A*x 

doubleprecision tol 
integer reorder 
integer singularity 

type(c_ptr)::cpfre,cptot 
integer*8,target::free,total 
integer res 
integer*8 cudaMemcpyDeviceToHost, cudaMemcpyHostToDevice 
integer*4 CUBLAS_OP_N, CUBLAS_OP_T 
parameter (cudaMemcpyHostToDevice=1) 
parameter (cudaMemcpyDeviceToHost=2) 
parameter (CUBLAS_OP_N=0) 
parameter (CUBLAS_OP_T=1) 


! ================================================================== 
rowsA = 0 
colsA = 0 
nnzA = 0 
baseA = 0  



A_size = SIZEOF(rowsA) 
B_size = SIZEOF(B) 
X_size = SIZEOF(X) 

size_perm = 0 
tol = 1.e-12 
reorder = 0 ! no reordering 
singularity = 0 ! -1 if A is invertible under tol. 

! Step 1: Create cudense handle --------------- 
cusolver_stat = cusolverSpCreate(cusolver_Hndl) 
if (cusolver_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cusolverSpCreate error: ", cusolver_stat 
    write (*,*) 
    stop 
end if 

! Step 2: copy A and B to Device 

A_mem_stat = cudaMalloc(d_A,A_size) 
if (A_mem_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cudaMalloc 1 error: ", A_mem_stat 
    write (*,*) 
    stop 
end if 

B_mem_stat = cudaMalloc(d_B,B_size) 
if (B_mem_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cudaMalloc 2 error: ", B_mem_stat 
    write (*,*) 
    stop 
end if  


! ---------- copy A and B to Device 

A_mem_stat = cudaMemcpy(d_A,CPU_A_ptr,A_size,cudaMemcpyHostToDevice) 
if (A_mem_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cudaMemcpy 1 error: ", A_mem_stat 
    write (*,*) 
!  stop 
    end if 

B_mem_stat = cudaMemcpy(d_B,CPU_B_ptr,B_size,cudaMemcpyHostToDevice) 
if (B_mem_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cudaMemcpy 2 error: ", B_mem_stat 
    write (*,*) 
!  stop 
    end if 

! Step 3: query working space of Sgetrf (and allocate memory on device) 

Lwork = 5 
cusolver_stat = cusolverSpSgetrf_bufferSize(cusolver_Hndl,m,n,d_A,lda,CPU_Lwork_ptr) 
if (cusolver_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " SpSgetrf_bufferSize error: ", cusolver_stat 
    write (*,*) 
!  stop 
    end if 

write (*,*) 
write (*, '(A, I12)') " Lwork: ", Lwork 
write (*,*) 


Workspace = 4*Lwork 
WS_mem_stat = cudaMalloc(d_WS,Workspace) 
if (WS_mem_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cudaMalloc 6 error: ", WS_mem_stat 
    write (*,*) 
!  stop 
    end if 

! Step 4: compute LU factorization of [A] 

cusolver_stat = cusolverSpSgetrf(cusolver_Hndl,m,n,d_A,lda,d_WS,d_Ipiv,d_devInfo) 
if (cusolver_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cusolverSpSgetrf error: ", WS_mem_stat 
    write (*,*) 
!  stop 
    end if 

! Step 5: compute solution vector [X] for Right hand side [B] 

cusolver_stat = cusolverSpSgetrs(cusolver_Hndl,CUBLAS_OP_N,n,nrhs,d_A,lda,d_Ipiv,d_B,ldb,d_devInfo) 
if (cusolver_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cusolverSpSgetrs error: ", WS_mem_stat 
    write (*,*) 
!  stop 
    end if 

! Step 6: copy solution vector stored in [B] on device into [X] vector on host 

X_mem_stat = cudaMemcpy(CPU_X_ptr,d_B,B_size,cudaMemcpyDeviceToHost) 
if (X_mem_stat .ne. 0) then 
    write (*,*) 
    write (*, '(A, I2)') " cudaMemcpy 4 error: ", WS_mem_stat 
    write (*,*) 
!  stop 
    end if 

! do i = 1, n 
!  print *, x(i,1) 
! enddo 

! step 7: free memory on device and release CPU-side resources 

A_mem_Stat = cudafree(d_A) 
B_mem_Stat = cudafree(d_B) 
Ipiv_mem_stat = cudafree(d_Ipiv) 
WS_mem_stat = cudafree(d_WS) 
Lwork_mem_stat = cudafree(d_Lwork) 

cusolver_stat = cusolverSpDestroy(cusolver_Hndl) 

! Step 8: deallocate memory on host before exit 

! deallocate(A) 
! deallocate(ATest) 
! deallocate(B) 
! deallocate(X) 
! deallocate(Ipiv) 



end program prog 

當前的錯誤我的構建過程中

錯誤S0188:參數編號#至cusolverspcreate的/ etc:類型不匹配

這我不知道如何解決它。這個程序是一個工作cusolverDn的修改,我肯定意味着我犯了一堆錯誤,因爲沒有很多接口示例可以參考。

+1

與錯誤無關,但如果您已經在使用'iso_c_binding'考慮使用別的東西而不是'integer * 4'和'integer * 8'。根據C端的內容,它可以是'integer(c_int)'和'integer(c_size_t)'或'integer(c_intptr_t)',或者也可以是'integer(c_int32_t)'和'integer(c_int64_t)'。在這種情況下,'tol = 1.e-12'可以正常工作,但一般要小心,因爲文字「1.e-12」是單精度的。 –

+0

您需要一致地使用c個可互操作的數據類型來避免不匹配錯誤。 – tim18

回答

1

您的主程序中沒有implicit none,並且cusolver_Hndl未聲明,因此它被假定爲real

使用implicit none並聲明所有變量。 cusolver_Hndl應該是type(ptr),並且不要忘記設置它的值(如果它不是輸出參數,那麼接口不會顯示任何意圖)。