ClGenerators.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. #include "ClGenerators.h"
  2. #include "doubledouble.h"
  3. #ifdef WITH_OPENCL
  4. #include <iostream>
  5. #include <iterator>
  6. using namespace cl;
  7. using mnd::ClGenerator;
  8. using mnd::ClGeneratorFloat;
  9. using mnd::ClGeneratorDouble;
  10. using mnd::ClGeneratorDoubleDouble;
  11. using mnd::ClGenerator128;
  12. Platform getPlatform() {
  13. /* Returns the first platform found. */
  14. std::vector<Platform> all_platforms;
  15. Platform::get(&all_platforms);
  16. if (all_platforms.size()==0) {
  17. std::cout << "No platforms found. Check OpenCL installation!\n";
  18. exit(1);
  19. }
  20. for (auto& p : all_platforms) {
  21. std::string name = p.getInfo<CL_PLATFORM_NAME>();
  22. std::string profile = p.getInfo<CL_PLATFORM_PROFILE>();
  23. printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
  24. }
  25. return all_platforms[0];
  26. }
  27. Device getDevice(Platform& platform, int i, bool display = false) {
  28. /* Returns the deviced specified by the index i on platform.
  29. * If display is true, then all of the platforms are listed.
  30. */
  31. std::vector<Device> all_devices;
  32. platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
  33. if (all_devices.size() == 0) {
  34. std::cout << "No devices found. Check OpenCL installation!\n";
  35. exit(1);
  36. }
  37. if (display) {
  38. for (::size_t j = 0; j < all_devices.size(); j++) {
  39. printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
  40. printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  41. printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
  42. }
  43. }
  44. return all_devices[i];
  45. }
  46. ClGenerator::ClGenerator(cl::Device device) :
  47. device{ device }
  48. {
  49. /*Platform p = getPlatform();
  50. device = getDevice(p, 0, true);
  51. context = Context{ device };
  52. Program::Sources sources;
  53. std::string kcode = this->getKernelCode();
  54. sources.push_back({ kcode.c_str(), kcode.length() });
  55. program = Program{ context, sources };
  56. if (program.build({ device }) != CL_SUCCESS) {
  57. std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
  58. exit(1);
  59. }
  60. queue = CommandQueue(context, device);*/
  61. }
  62. ClGenerator::~ClGenerator(void)
  63. {
  64. queue.flush();
  65. queue.finish();
  66. }
  67. void ClGenerator::generate(const mnd::MandelInfo& info, float* data)
  68. {
  69. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  70. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  71. float pixelScaleX = float(info.view.width / info.bWidth);
  72. float pixelScaleY = float(info.view.height / info.bHeight);
  73. Kernel iterate = Kernel(program, "iterate");
  74. iterate.setArg(0, buffer_A);
  75. iterate.setArg(1, int(info.bWidth));
  76. iterate.setArg(2, float(info.view.x));
  77. iterate.setArg(3, float(info.view.y));
  78. iterate.setArg(4, float(pixelScaleX));
  79. iterate.setArg(5, float(pixelScaleY));
  80. iterate.setArg(6, int(info.maxIter));
  81. // TODO check for overflow
  82. if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
  83. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight / 4));
  84. } else {
  85. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  86. }
  87. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  88. }
  89. ClGeneratorFloat::ClGeneratorFloat(cl::Device device, bool smooth) :
  90. ClGenerator{ device }
  91. {
  92. /*Platform p = getPlatform();
  93. device = getDevice(p, 0, true);*/
  94. context = Context{ device };
  95. Program::Sources sources;
  96. std::string kcode = this->getKernelCode(smooth);
  97. sources.push_back({ kcode.c_str(), kcode.length() });
  98. program = Program{ context, sources };
  99. if (program.build({ device }) != CL_SUCCESS) {
  100. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  101. }
  102. queue = CommandQueue(context, device);
  103. }
  104. std::string ClGeneratorFloat::getKernelCode(bool smooth) const
  105. {
  106. if (smooth) {
  107. return
  108. // "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
  109. "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {"
  110. " int index = get_global_id(0);\n"
  111. " int x = index % width;"
  112. " int y = index / width;"
  113. " float a = x * pixelScaleX + xl;"
  114. " float b = y * pixelScaleY + yt;"
  115. " float ca = a;"
  116. " float cb = b;"
  117. ""
  118. " int n = 0;"
  119. " while (n < max - 1) {"
  120. " float aa = a * a;"
  121. " float bb = b * b;"
  122. " float ab = a * b;"
  123. " if (aa + bb > 16) break;"
  124. " a = aa - bb + ca;"
  125. " b = 2 * ab + cb;"
  126. " n++;"
  127. " }\n"
  128. // N + 1 - log (log |Z(N)|) / log 2
  129. " if (n >= max - 1)\n"
  130. " A[index] = max;\n"
  131. " else"
  132. " A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
  133. // " A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
  134. // " A[get_global_id(0)] = 5;"
  135. "}";
  136. }
  137. else {
  138. return
  139. // "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
  140. "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {"
  141. " int index = get_global_id(0);\n"
  142. " int x = index % width;"
  143. " int y = index / width;"
  144. " float a = x * pixelScaleX + xl;"
  145. " float b = y * pixelScaleY + yt;"
  146. " float ca = a;"
  147. " float cb = b;"
  148. ""
  149. " int n = 0;"
  150. " while (n < max - 1) {"
  151. " float aa = a * a;"
  152. " float bb = b * b;"
  153. " float ab = a * b;"
  154. " if (aa + bb > 16) break;"
  155. " a = aa - bb + ca;"
  156. " b = 2 * ab + cb;"
  157. " n++;"
  158. " }\n"
  159. // N + 1 - log (log |Z(N)|) / log 2
  160. " if (n >= max - 1)\n"
  161. " A[index] = max;\n"
  162. " else"
  163. " A[index] = ((float)n);\n"
  164. // " A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
  165. // " A[get_global_id(0)] = 5;"
  166. "}";
  167. }
  168. }
  169. ClGeneratorDouble::ClGeneratorDouble(cl::Device device, bool smooth) :
  170. ClGenerator{ device }
  171. {
  172. context = Context{ device };
  173. Program::Sources sources;
  174. std::string kcode = this->getKernelCode(smooth);
  175. sources.push_back({ kcode.c_str(), kcode.length() });
  176. program = Program{ context, sources };
  177. if (program.build({ device }) != CL_SUCCESS) {
  178. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  179. }
  180. queue = CommandQueue(context, device);
  181. }
  182. void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
  183. {
  184. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  185. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  186. double pixelScaleX = double(info.view.width / info.bWidth);
  187. double pixelScaleY = double(info.view.height / info.bHeight);
  188. Kernel iterate = Kernel(program, "iterate");
  189. iterate.setArg(0, buffer_A);
  190. iterate.setArg(1, int(info.bWidth));
  191. iterate.setArg(2, double(info.view.x));
  192. iterate.setArg(3, double(info.view.y));
  193. iterate.setArg(4, double(pixelScaleX));
  194. iterate.setArg(5, double(pixelScaleY));
  195. iterate.setArg(6, int(info.maxIter));
  196. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  197. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  198. }
  199. std::string ClGeneratorDouble::getKernelCode(bool smooth) const
  200. {
  201. if (smooth) {
  202. return
  203. "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
  204. "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max) {\n"
  205. " int index = get_global_id(0);\n"
  206. " int x = index % width;"
  207. " int y = index / width;"
  208. " double a = x * pixelScaleX + xl;"
  209. " double b = y * pixelScaleY + yt;"
  210. " double ca = a;"
  211. " double cb = b;"
  212. ""
  213. " int n = 0;"
  214. " while (n < max - 1) {"
  215. " double aa = a * a;"
  216. " double bb = b * b;"
  217. " double ab = a * b;"
  218. " if (aa + bb > 16) break;"
  219. " a = aa - bb + ca;"
  220. " b = 2 * ab + cb;"
  221. " n++;"
  222. " }\n"
  223. // N + 1 - log (log |Z(N)|) / log 2
  224. " if (n >= max - 1)\n"
  225. " A[index] = max;\n"
  226. " else"
  227. " A[index] = ((float)n) + 1 - log(log((float)(a * a + b * b)) / 2) / log(2.0f);\n"
  228. // " A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
  229. // " A[get_global_id(0)] = 5;"
  230. "}";
  231. }
  232. else {
  233. return
  234. "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
  235. "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max) {\n"
  236. " int index = get_global_id(0);\n"
  237. " int x = index % width;"
  238. " int y = index / width;"
  239. " double a = x * pixelScaleX + xl;"
  240. " double b = y * pixelScaleY + yt;"
  241. " double ca = a;"
  242. " double cb = b;"
  243. ""
  244. " int n = 0;"
  245. " while (n < max - 1) {"
  246. " double aa = a * a;"
  247. " double bb = b * b;"
  248. " double ab = a * b;"
  249. " if (aa + bb > 16) break;"
  250. " a = aa - bb + ca;"
  251. " b = 2 * ab + cb;"
  252. " n++;"
  253. " }\n"
  254. // N + 1 - log (log |Z(N)|) / log 2
  255. " if (n >= max - 1)\n"
  256. " A[index] = max;\n"
  257. " else"
  258. " A[index] = ((float)n);\n"
  259. // " A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
  260. // " A[get_global_id(0)] = 5;"
  261. "}";
  262. }
  263. }
  264. ClGeneratorDoubleDouble::ClGeneratorDoubleDouble(cl::Device device, bool smooth) :
  265. ClGenerator{ device },
  266. smooth{ smooth }
  267. {
  268. context = Context{ device };
  269. Program::Sources sources;
  270. std::string kcode = this->getKernelCode(false);
  271. sources.push_back({ kcode.c_str(), kcode.length() });
  272. program = Program{ context, sources };
  273. if (program.build({ device }) != CL_SUCCESS) {
  274. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  275. }
  276. queue = CommandQueue(context, device);
  277. }
  278. void ClGeneratorDoubleDouble::generate(const mnd::MandelInfo& info, float* data)
  279. {
  280. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  281. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  282. mnd::DoubleDouble x = mnd::convert<mnd::DoubleDouble>(info.view.x);
  283. mnd::DoubleDouble y = mnd::convert<mnd::DoubleDouble>(info.view.y);
  284. mnd::DoubleDouble psx = mnd::convert<mnd::DoubleDouble>(info.view.width / info.bWidth);
  285. mnd::DoubleDouble psy = mnd::convert<mnd::DoubleDouble>(info.view.height / info.bHeight);
  286. Kernel iterate = Kernel(program, "iterate");
  287. iterate.setArg(0, buffer_A);
  288. iterate.setArg(1, int(info.bWidth));
  289. iterate.setArg(2, x.x[0]);
  290. iterate.setArg(3, x.x[1]);
  291. iterate.setArg(4, y.x[0]);
  292. iterate.setArg(5, y.x[1]);
  293. iterate.setArg(6, psx.x[0]);
  294. iterate.setArg(7, psx.x[1]);
  295. iterate.setArg(8, psy.x[0]);
  296. iterate.setArg(9, psy.x[1]);
  297. iterate.setArg(10, int(info.maxIter));
  298. iterate.setArg(11, int(smooth ? 1 : 0));
  299. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  300. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  301. }
  302. std::string ClGeneratorDoubleDouble::getKernelCode(bool smooth) const
  303. {
  304. return (char*) doubledouble_cl;
  305. }
  306. ClGenerator128::ClGenerator128(cl::Device device, bool smooth) :
  307. ClGenerator{ device }
  308. {
  309. context = Context{ device };
  310. Program::Sources sources;
  311. std::string kcode = this->getKernelCode(smooth);
  312. sources.push_back({ kcode.c_str(), kcode.length() });
  313. program = Program{ context, sources };
  314. if (program.build({ device }) != CL_SUCCESS) {
  315. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  316. }
  317. queue = CommandQueue(context, device);
  318. }
  319. void ClGenerator128::generate(const mnd::MandelInfo& info, float* data)
  320. {
  321. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  322. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  323. float pixelScaleX = float(info.view.width / info.bWidth);
  324. float pixelScaleY = float(info.view.height / info.bHeight);
  325. Kernel iterate = Kernel(program, "iterate");
  326. iterate.setArg(0, buffer_A);
  327. iterate.setArg(1, int(info.bWidth));
  328. iterate.setArg(2, double(info.view.x));
  329. iterate.setArg(3, double(info.view.y));
  330. iterate.setArg(4, double(pixelScaleX));
  331. iterate.setArg(5, double(pixelScaleY));
  332. iterate.setArg(6, int(info.maxIter));
  333. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  334. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  335. }
  336. #include <string>
  337. #include <fstream>
  338. #include <streambuf>
  339. std::string ClGenerator128::getKernelCode(bool smooth) const
  340. {
  341. //fprintf(stderr, "starting file read\n");
  342. std::ifstream t("mandel128.cl");
  343. std::string str((std::istreambuf_iterator<char>(t)),
  344. std::istreambuf_iterator<char>());
  345. //fprintf(stderr, "%s\n", str);
  346. return str;
  347. }
  348. #endif // WITH_OPENCL