26 #define __CL_ENABLE_EXCEPTIONS
28 #include "density_clustering_opencl.hpp"
32 namespace Clustering {
36 std::map<float, std::vector<std::size_t>>
38 const std::size_t n_rows,
39 const std::size_t n_cols,
40 std::vector<float> radii) {
41 unsigned int uint_n_rows = (
unsigned int) n_rows;
42 unsigned int uint_n_cols = (
unsigned int) n_cols;
44 std::vector<cl::Platform> platforms;
45 cl::Platform::get(&platforms);
46 cl_context_properties cps[3] = {
48 (cl_context_properties) (platforms[0])(),
51 cl::Context context(CL_DEVICE_TYPE_GPU, cps);
52 std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
53 std::vector<cl::CommandQueue> queues;
54 for (cl::Device dev: devices) {
55 queues.push_back(cl::CommandQueue(context, dev));
57 std::size_t n_queues = queues.size();
59 std::size_t n_bytes_per_row;
60 std::size_t n_bytes_global_mem;
61 n_bytes_per_row = n_cols *
sizeof(float);
62 n_bytes_global_mem = devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
64 if (n_bytes_per_row * n_rows + (
sizeof(
unsigned int)*n_rows) > n_bytes_global_mem) {
66 std::cerr <<
"error: not enough memory on device for full data set.\n"
67 <<
" will support memory-size independet computation in a later release.\n"
68 <<
" please check for updates." << std::endl;
72 cl::Buffer buf_coords = cl::Buffer(context, CL_MEM_READ_ONLY, n_rows*n_cols*
sizeof(
float));
73 std::vector<cl::Buffer> buf_pops(n_queues);
74 for (
int iq=0; iq < n_queues; ++iq) {
75 queues[iq].enqueueWriteBuffer(buf_coords, CL_TRUE, 0, n_rows*n_cols*
sizeof(
float), coords);
76 buf_pops[iq] = cl::Buffer(context, CL_MEM_WRITE_ONLY, n_rows*
sizeof(
unsigned int));
83 cl::Program::Sources src(1, {kernel_src.c_str(), kernel_src.length()+1});
84 cl::Program prog = cl::Program(context, src);
85 int err = prog.build(devices);
86 if (err != CL_SUCCESS) {
87 std::cerr <<
"error during kernel compilation" << std::endl;
88 std::cerr << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl;
92 cl::Kernel krnl_init_pops(prog,
"init_pops");
93 for (
int iq=0; iq < n_queues; ++iq) {
94 krnl_init_pops.setArg(0, buf_pops[iq]);
95 cl::NDRange full_global_range(n_rows);
97 queues[iq].enqueueNDRangeKernel(krnl_init_pops, cl::NullRange, full_global_range, cl::NullRange, NULL, &event);
101 cl::Kernel krnl_pops(prog,
"pops");
103 float rad2 = radii[0]*radii[0];
105 const unsigned int GLOBAL_SIZE = 1024;
106 const unsigned int WORKGROUP_SIZE = 128;
107 krnl_pops.setArg(2,
sizeof(
unsigned int), &uint_n_rows);
108 krnl_pops.setArg(3,
sizeof(
unsigned int), &uint_n_cols);
109 krnl_pops.setArg(4, buf_coords);
110 krnl_pops.setArg(5,
sizeof(
float), &rad2);
111 krnl_pops.setArg(7,
sizeof(
float)*WORKGROUP_SIZE*n_cols, NULL);
115 unsigned int range_length = n_rows;
116 while (range_length % WORKGROUP_SIZE != 0) {
119 cl::NDRange global(GLOBAL_SIZE);
120 cl::NDRange local(WORKGROUP_SIZE);
122 for (
unsigned int i_block_ref=0; i_block_ref < range_length; i_block_ref += WORKGROUP_SIZE) {
123 krnl_pops.setArg(1,
sizeof(
unsigned int), &i_block_ref);
124 for (
unsigned int i_block=0; i_block < range_length; i_block += n_queues*GLOBAL_SIZE) {
125 std::vector<cl::Event> events(n_queues);
126 for (
unsigned int iq=0; iq < n_queues; ++iq) {
127 unsigned int q_block = i_block + iq*GLOBAL_SIZE;
128 krnl_pops.setArg(0,
sizeof(
unsigned int), &q_block);
129 krnl_pops.setArg(6, buf_pops[iq]);
130 queues[iq].enqueueNDRangeKernel(kernel, cl::NullRange, global, local, NULL, &events[iq]);
132 cl::Event::waitForEvents(events);
136 std::vector<std::size_t> pops(n_rows, 0);
137 for (
int iq=0; iq < n_queues; ++iq) {
138 std::vector<unsigned int> partial_pops(n_rows);
139 queues[iq].enqueueReadBuffer(buf_pops[iq], CL_TRUE, 0, n_rows*
sizeof(
unsigned int), partial_pops.data());
140 for (std::size_t i=0; i < n_rows; ++i) {
141 pops[i] += partial_pops[i];
144 return {{radii[0], pops}};
145 }
catch(cl::Error error) {
146 std::cerr <<
"error in OpenCL call: " << error.what() <<
"(" << error.err() <<
")" << std::endl;
std::map< float, std::vector< std::size_t > > calculate_populations(const float *coords, const std::size_t n_rows, const std::size_t n_cols, std::vector< float > radii)