1
我想在SIGGRAPH'16發表的論文中重新檢查有關GPU自動調度的實驗。如何使用自動調度程序爲GPU生成時間表?
我使用auto-scheduler分支編譯了下面的代碼,但生成的代碼是爲CPU預定的。
如何使用自動調度程序爲GPU生成時間表?
#include "Halide.h"
#include "HalideBuffer.h"
#include <climits>
#include <iostream>
using namespace Halide;
class conv2dHalide : public Halide::Generator<conv2dHalide>{
public:
ImageParam in{UInt(8), 2, "input_image"};
ImageParam kernel{Float(32), 2, "kernel"};
Var x, y;
Func build(){
Func out("output_image");
// Algorithm
Func clamped_in;
Expr k_wid = 3;
Expr k_half = k_wid/2;
Expr i_wid = in.width();
Expr i_hei = in.height();
Expr c_x = clamp(x, 0, in.width()-1);
Expr c_y = clamp(y, 0, in.height()-1);
clamped_in(x,y) = in(c_x,c_y);
RDom r(-k_half, k_wid, -k_half, k_wid);
r.where(k_half<=x && x<i_wid-k_half && k_half<=y && y<i_hei-k_half);
out(x,y)=cast<uint8_t>(min(sum(clamped_in(x+r.x, y+r.y)*kernel(r.x+k_half, r.y+k_half)), 255.0f));
out.estimate(x, 0, 512).estimate(y, 0, 512);
in.dim(0).set_bounds_estimate(0, i_wid);
in.dim(1).set_bounds_estimate(0, i_hei);
kernel.dim(0).set_bounds_estimate(0, k_wid);
kernel.dim(1).set_bounds_estimate(0, k_wid);
Target target = Halide::get_host_target();
target.set_feature(Target::CUDA);
target.set_feature(Target::CUDACapability50);
std::cout << "gpu enabled" << std::endl;
Pipeline p(out);
p.auto_schedule(target);
out.print_loop_nest();
return out;
}
};
HALIDE_REGISTER_GENERATOR(conv2dHalide, "conv2dHalide")