#include "ClGenerators.h" #ifdef WITH_OPENCL #include #include using namespace cl; using mnd::ClGenerator; using mnd::ClGeneratorFloat; using mnd::ClGeneratorDouble; using mnd::ClGenerator128; Platform getPlatform() { /* Returns the first platform found. */ std::vector all_platforms; Platform::get(&all_platforms); if (all_platforms.size()==0) { std::cout << "No platforms found. Check OpenCL installation!\n"; exit(1); } for (auto& p : all_platforms) { std::string name = p.getInfo(); std::string profile = p.getInfo(); printf("Platform: %s, %s\n", name.c_str(), profile.c_str()); } return all_platforms[0]; } Device getDevice(Platform& platform, int i, bool display = false) { /* Returns the deviced specified by the index i on platform. * If display is true, then all of the platforms are listed. */ std::vector all_devices; platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); if (all_devices.size() == 0) { std::cout << "No devices found. Check OpenCL installation!\n"; exit(1); } if (display) { for (::size_t j = 0; j < all_devices.size(); j++) { printf("Device %d: %s\n", int(j), all_devices[j].getInfo().c_str()); printf("preferred float width: %d\n", all_devices[j].getInfo()); printf("vendor: %s\n", all_devices[j].getInfo().c_str()); } } return all_devices[i]; } ClGenerator::ClGenerator(cl::Device device) : device{ device } { /*Platform p = getPlatform(); device = getDevice(p, 0, true); context = Context{ device }; Program::Sources sources; std::string kcode = this->getKernelCode(); sources.push_back({ kcode.c_str(), kcode.length() }); program = Program{ context, sources }; if (program.build({ device }) != CL_SUCCESS) { std::cout << "Error building: " << program.getBuildInfo(device) << std::endl; exit(1); } queue = CommandQueue(context, device);*/ } ClGenerator::~ClGenerator(void) { queue.flush(); queue.finish(); } void ClGenerator::generate(const mnd::MandelInfo& info, float* data) { ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float); Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize); float pixelScaleX = info.view.width / info.bWidth; float pixelScaleY = info.view.height / info.bHeight; Kernel iterate = Kernel(program, "iterate"); iterate.setArg(0, buffer_A); iterate.setArg(1, int(info.bWidth)); iterate.setArg(2, float(info.view.x)); iterate.setArg(3, float(info.view.y)); iterate.setArg(4, float(pixelScaleX)); iterate.setArg(5, float(pixelScaleY)); iterate.setArg(6, int(info.maxIter)); // TODO check for overflow if (false && device.getInfo() == 4) { queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight / 4)); } else { queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight)); } queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data); } ClGeneratorFloat::ClGeneratorFloat(cl::Device device) : ClGenerator{ device } { /*Platform p = getPlatform(); device = getDevice(p, 0, true);*/ context = Context{ device }; Program::Sources sources; std::string kcode = this->getKernelCode(); sources.push_back({ kcode.c_str(), kcode.length() }); program = Program{ context, sources }; if (program.build({ device }) != CL_SUCCESS) { throw std::string(program.getBuildInfo(device)); } queue = CommandQueue(context, device); } std::string ClGeneratorFloat::getKernelCode(void) const { if (false && device.getInfo() == 4) { return "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {\n" " int index = get_global_id(0) * 4;\n" " int x = index % (width);\n" " int y = index / (width);\n" " float4 av = (float4)(x * pixelScaleX + xl, (x + 1) * pixelScaleX + xl, (x + 2) * pixelScaleX + xl, (x + 3) * pixelScaleX + xl);\n" // "(x + 4) * pixelScale + xl, (x + 5) * pixelScale + xl, (x + 6) * pixelScale + xl, (x + 7) * pixelScale + xl);\n" " float4 bv = (float4)(y * pixelScaleY + yt);\n" " float4 ca = av;\n" " float4 cb = bv;\n" "" " int4 counter = (int4) 1;" " float4 threshold = (float4) 16;" " int n = 0;\n" " while (n < max) {\n" " float4 aa = av * av;\n" " float4 bb = bv * bv;\n" " float4 ab = av * bv;\n" " av = aa - bb + ca;\n" " bv = 2 * ab + cb;\n" " counter += -(threshold > (aa + bb));\n" " if(all(threshold < (aa + bb))) break;\n" " //if (aa + bb > 16) break;\n" " n++;\n" " }\n\n" " A[index] = (float) counter[0];\n" " A[index + 1] = (float) counter[1];\n" " A[index + 2] = (float) counter[2];\n" " A[index + 3] = (float) counter[3];\n" /* " A[index + 4] = (float) counter[4];\n" " A[index + 5] = (float) counter[5];\n" " A[index + 6] = (float) counter[6];\n" " A[index + 7] = (float) counter[7];\n"*/ // " A[get_global_id(0)] = 1;\n" "}\n"; } else { return // "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {" " int index = get_global_id(0);\n" " int x = index % width;" " int y = index / width;" " float a = x * pixelScaleX + xl;" " float b = y * pixelScaleY + yt;" " float ca = a;" " float cb = b;" "" " int n = 0;" " while (n < max - 1) {" " float aa = a * a;" " float bb = b * b;" " float ab = a * b;" " if (aa + bb > 16) break;" " a = aa - bb + ca;" " b = 2 * ab + cb;" " n++;" " }\n" // N + 1 - log (log |Z(N)|) / log 2 " if (n >= max - 1)\n" " A[index] = max;\n" " else" " A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n" // " A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n" // " A[get_global_id(0)] = 5;" "}"; } } ClGeneratorDouble::ClGeneratorDouble(cl::Device device) : ClGenerator{ device } { context = Context{ device }; Program::Sources sources; std::string kcode = this->getKernelCode(); sources.push_back({ kcode.c_str(), kcode.length() }); program = Program{ context, sources }; if (program.build({ device }) != CL_SUCCESS) { throw std::string(program.getBuildInfo(device)); } queue = CommandQueue(context, device); } void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data) { ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float); Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize); float pixelScaleX = info.view.width / info.bWidth; float pixelScaleY = info.view.height / info.bHeight; Kernel iterate = Kernel(program, "iterate"); iterate.setArg(0, buffer_A); iterate.setArg(1, int(info.bWidth)); iterate.setArg(2, double(info.view.x)); iterate.setArg(3, double(info.view.y)); iterate.setArg(4, double(pixelScaleX)); iterate.setArg(5, double(pixelScaleY)); iterate.setArg(6, int(info.maxIter)); queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight)); queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data); } std::string ClGeneratorDouble::getKernelCode(void) const { return "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max) {\n" " int index = get_global_id(0);\n" " int x = index % width;" " int y = index / width;" " double a = x * pixelScaleX + xl;" " double b = y * pixelScaleY + yt;" " double ca = a;" " double cb = b;" "" " int n = 0;" " while (n < max - 1) {" " double aa = a * a;" " double bb = b * b;" " double ab = a * b;" " if (aa + bb > 16) break;" " a = aa - bb + ca;" " b = 2 * ab + cb;" " n++;" " }\n" // N + 1 - log (log |Z(N)|) / log 2 " if (n >= max - 1)\n" " A[index] = max;\n" " else" " A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n" // " A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n" // " A[get_global_id(0)] = 5;" "}"; } ClGenerator128::ClGenerator128(cl::Device device) : ClGenerator{ device } { context = Context{ device }; Program::Sources sources; std::string kcode = this->getKernelCode(); sources.push_back({ kcode.c_str(), kcode.length() }); program = Program{ context, sources }; if (program.build({ device }) != CL_SUCCESS) { throw std::string(program.getBuildInfo(device)); } queue = CommandQueue(context, device); } void ClGenerator128::generate(const mnd::MandelInfo& info, float* data) { ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float); Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize); float pixelScaleX = info.view.width / info.bWidth; float pixelScaleY = info.view.height / info.bHeight; Kernel iterate = Kernel(program, "iterate"); iterate.setArg(0, buffer_A); iterate.setArg(1, int(info.bWidth)); iterate.setArg(2, double(info.view.x)); iterate.setArg(3, double(info.view.y)); iterate.setArg(4, double(pixelScaleX)); iterate.setArg(5, double(pixelScaleY)); iterate.setArg(6, int(info.maxIter)); queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight)); queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data); } #include #include #include std::string ClGenerator128::getKernelCode(void) const { //fprintf(stderr, "starting file read\n"); std::ifstream t("mandel128.cl"); std::string str((std::istreambuf_iterator(t)), std::istreambuf_iterator()); //fprintf(stderr, "%s\n", str); return str; } #endif // WITH_OPENCL