ClGenerators.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
  1. #include "ClGenerators.h"
  2. #include "Types.h"
  3. #include "Mandel.h"
  4. #include "OpenClInternal.h"
  5. #include "OpenClCode.h"
  6. #include <CL/cl2.hpp>
  7. #if WITH_OPENCL
  8. #include <iostream>
  9. #include <iterator>
  10. #include <utility>
  11. using namespace cl;
  12. using mnd::ClGenerator;
  13. using mnd::ClGeneratorFloat;
  14. using mnd::ClGeneratorDoubleFloat;
  15. using mnd::ClGeneratorTripleFloat;
  16. using mnd::ClGeneratorDouble;
  17. using mnd::ClGeneratorDoubleDouble;
  18. using mnd::ClGeneratorTripleDouble;
  19. using mnd::ClGeneratorQuadDouble;
  20. using mnd::ClGeneratorHexDouble;
  21. using mnd::ClGeneratorOctaDouble;
  22. using mnd::ClGenerator128;
  23. using mnd::ClGenerator64;
  24. Platform getPlatform() {
  25. /* Returns the first platform found. */
  26. std::vector<Platform> all_platforms;
  27. Platform::get(&all_platforms);
  28. if (all_platforms.size() == 0) {
  29. std::cout << "No platforms found. Check OpenCL installation!\n";
  30. exit(1);
  31. }
  32. for (auto& p : all_platforms) {
  33. std::string name = p.getInfo<CL_PLATFORM_NAME>();
  34. std::string profile = p.getInfo<CL_PLATFORM_PROFILE>();
  35. printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
  36. }
  37. return all_platforms[0];
  38. }
  39. Device getDevice(Platform& platform, int i, bool display = false) {
  40. /* Returns the deviced specified by the index i on platform.
  41. * If display is true, then all of the platforms are listed.
  42. */
  43. std::vector<Device> all_devices;
  44. platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
  45. if (all_devices.size() == 0) {
  46. std::cout << "No devices found. Check OpenCL installation!\n";
  47. exit(1);
  48. }
  49. if (display) {
  50. for (::size_t j = 0; j < all_devices.size(); j++) {
  51. printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
  52. printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  53. printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
  54. }
  55. }
  56. return all_devices[i];
  57. }
  58. ClGenerator::ClGenerator(mnd::MandelDevice& device, const std::string& source, mnd::Precision type) :
  59. MandelGenerator{ type },
  60. device{ device },
  61. context{ device.getClDevice().context }
  62. {
  63. const cl::Device& dev = device.getClDevice().device;
  64. Program::Sources sources;
  65. sources.push_back({ source.c_str(), source.length() });
  66. program = Program{ context, sources };
  67. if (program.build({ dev }) != CL_SUCCESS) {
  68. printf("code -> %s\n", source.c_str());
  69. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev));
  70. }
  71. #if CL_HPP_TARGET_OPENCL_VERSION >= 200
  72. /*cl_queue_properties qcp[] {
  73. CL_QUEUE_PROPERTIES, 0,
  74. // CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR,
  75. 0
  76. };
  77. int err = 0;
  78. cl_command_queue dq = clCreateCommandQueueWithProperties(
  79. this->context.get(),
  80. dev.get(),
  81. qcp,
  82. &err
  83. );
  84. if (err == CL_SUCCESS) {
  85. printf("queue success\n");
  86. queue = CommandQueue(dq);
  87. }
  88. else {
  89. printf("queue non-success\n");
  90. queue = CommandQueue(context, dev);
  91. }
  92. */
  93. queue = CommandQueue(context, dev);
  94. #else
  95. queue = CommandQueue(context, dev);
  96. #endif
  97. /*Platform p = getPlatform();
  98. device = getDevice(p, 0, true);
  99. context = Context{ device };
  100. Program::Sources sources;
  101. std::string kcode = this->getKernelCode();
  102. sources.push_back({ kcode.c_str(), kcode.length() });
  103. program = Program{ context, sources };
  104. if (program.build({ device }) != CL_SUCCESS) {
  105. std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
  106. exit(1);
  107. }
  108. queue = CommandQueue(context, device);*/
  109. }
  110. ClGenerator::~ClGenerator(void)
  111. {
  112. queue.flush();
  113. queue.finish();
  114. }
  115. mnd::MandelDevice* ClGenerator::getDevice(void)
  116. {
  117. return &device;
  118. }
  119. ClGeneratorFloat::ClGeneratorFloat(mnd::MandelDevice& device, const std::string& code) :
  120. ClGenerator{ device, code, mnd::Precision::FLOAT }
  121. {
  122. const cl::Device& dev = device.getClDevice().device;
  123. useVec = dev.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() >= 4;
  124. // often still slower than non-vec variation
  125. useVec = false;
  126. kernel = Kernel(program, useVec ? "iterate_vec4" : "iterate");
  127. }
  128. void ClGeneratorFloat::generate(const mnd::MandelInfo& info, float* data)
  129. {
  130. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  131. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  132. float pixelScaleX = float(info.view.width / info.bWidth);
  133. float pixelScaleY = float(info.view.height / info.bHeight);
  134. kernel.setArg(0, buffer_A);
  135. kernel.setArg(1, int(info.bWidth));
  136. kernel.setArg(2, float(info.view.x));
  137. kernel.setArg(3, float(info.view.y));
  138. kernel.setArg(4, float(pixelScaleX));
  139. kernel.setArg(5, float(pixelScaleY));
  140. kernel.setArg(6, int(info.maxIter));
  141. kernel.setArg(7, int(info.smooth ? 1 : 0));
  142. kernel.setArg(8, int(info.julia ? 1 : 0));
  143. kernel.setArg(9, float(info.juliaX));
  144. kernel.setArg(10, float(info.juliaY));
  145. if (useVec) {
  146. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight / 4));
  147. } else {
  148. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  149. }
  150. cl::Event event;
  151. queue.enqueueReadBuffer(buffer_A, CL_FALSE, 0, bufferSize, data, nullptr, &event);
  152. queue.flush();
  153. event.wait();
  154. }
  155. std::string ClGeneratorFloat::getKernelCode(bool smooth) const
  156. {
  157. return mnd::getFloat_cl();
  158. }
  159. ClGeneratorDoubleFloat::ClGeneratorDoubleFloat(mnd::MandelDevice& device) :
  160. ClGenerator{ device, this->getKernelCode(false), mnd::Precision::DOUBLE_FLOAT }
  161. {
  162. kernel = Kernel(program, "iterate");
  163. }
  164. void ClGeneratorDoubleFloat::generate(const mnd::MandelInfo& info, float* data)
  165. {
  166. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  167. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  168. mnd::LightDoubleFloat pixelScX = double(info.view.width / info.bWidth);
  169. mnd::LightDoubleFloat pixelScY = double(info.view.height / info.bHeight);
  170. mnd::LightDoubleFloat x = double(info.view.x);
  171. mnd::LightDoubleFloat y = double(info.view.y);
  172. mnd::LightDoubleFloat jx = double(info.juliaX);
  173. mnd::LightDoubleFloat jy = double(info.juliaY);
  174. kernel.setArg(0, buffer_A);
  175. kernel.setArg(1, int(info.bWidth));
  176. kernel.setArg(2, x[0]);
  177. kernel.setArg(3, x[1]);
  178. kernel.setArg(4, y[0]);
  179. kernel.setArg(5, y[1]);
  180. kernel.setArg(6, pixelScX[0]);
  181. kernel.setArg(7, pixelScX[1]);
  182. kernel.setArg(8, pixelScY[0]);
  183. kernel.setArg(9, pixelScY[1]);
  184. kernel.setArg(10, int(info.maxIter));
  185. kernel.setArg(11, int(info.smooth ? 1 : 0));
  186. kernel.setArg(12, int(info.julia ? 1 : 0));
  187. kernel.setArg(13, jx[0]);
  188. kernel.setArg(14, jx[1]);
  189. kernel.setArg(15, jy[0]);
  190. kernel.setArg(16, jy[1]);
  191. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  192. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  193. }
  194. std::string ClGeneratorDoubleFloat::getKernelCode(bool smooth) const
  195. {
  196. return getDoubleFloat_cl();
  197. }
  198. ClGeneratorTripleFloat::ClGeneratorTripleFloat(mnd::MandelDevice& device) :
  199. ClGenerator{ device, this->getKernelCode(false), mnd::Precision::TRIPLE_FLOAT }
  200. {
  201. kernel = Kernel(program, "iterate");
  202. }
  203. void ClGeneratorTripleFloat::generate(const mnd::MandelInfo& info, float* data)
  204. {
  205. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  206. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  207. mnd::TripleFloat pixelScX = mnd::convert<mnd::TripleFloat>(info.view.width / info.bWidth);
  208. mnd::TripleFloat pixelScY = mnd::convert<mnd::TripleFloat>(info.view.height / info.bHeight);
  209. mnd::TripleFloat x = mnd::convert<mnd::TripleFloat>(info.view.x);
  210. mnd::TripleFloat y = mnd::convert<mnd::TripleFloat>(info.view.y);
  211. mnd::TripleFloat jx = mnd::convert<mnd::TripleFloat>(info.juliaX);
  212. mnd::TripleFloat jy = mnd::convert<mnd::TripleFloat>(info.juliaY);
  213. kernel.setArg(0, buffer_A);
  214. kernel.setArg(1, int(info.bWidth));
  215. kernel.setArg(2, x[0]);
  216. kernel.setArg(3, x[1]);
  217. kernel.setArg(4, x[2]);
  218. kernel.setArg(5, y[0]);
  219. kernel.setArg(6, y[1]);
  220. kernel.setArg(7, y[2]);
  221. kernel.setArg(8, pixelScX[0]);
  222. kernel.setArg(9, pixelScX[1]);
  223. kernel.setArg(10, pixelScX[2]);
  224. kernel.setArg(11, pixelScY[0]);
  225. kernel.setArg(12, pixelScY[1]);
  226. kernel.setArg(13, pixelScY[2]);
  227. kernel.setArg(14, int(info.maxIter));
  228. kernel.setArg(15, int(info.smooth ? 1 : 0));
  229. kernel.setArg(16, int(info.julia ? 1 : 0));
  230. kernel.setArg(17, jx[0]);
  231. kernel.setArg(18, jx[1]);
  232. kernel.setArg(19, jx[2]);
  233. kernel.setArg(20, jy[0]);
  234. kernel.setArg(21, jy[1]);
  235. kernel.setArg(22, jy[2]);
  236. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  237. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  238. }
  239. std::string ClGeneratorTripleFloat::getKernelCode(bool smooth) const
  240. {
  241. return getTripleFloat_cl();
  242. }
  243. ClGeneratorDouble::ClGeneratorDouble(mnd::MandelDevice& device, const std::string& source) :
  244. ClGenerator{ device, source, mnd::Precision::DOUBLE }
  245. {
  246. kernel = Kernel(program, "iterate");
  247. }
  248. void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
  249. {
  250. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  251. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  252. double pixelScaleX = double(info.view.width / info.bWidth);
  253. double pixelScaleY = double(info.view.height / info.bHeight);
  254. kernel.setArg(0, buffer_A);
  255. kernel.setArg(1, int(info.bWidth));
  256. kernel.setArg(2, double(info.view.x));
  257. kernel.setArg(3, double(info.view.y));
  258. kernel.setArg(4, double(pixelScaleX));
  259. kernel.setArg(5, double(pixelScaleY));
  260. kernel.setArg(6, int(info.maxIter));
  261. kernel.setArg(7, int(info.smooth ? 1 : 0));
  262. kernel.setArg(8, int(info.julia ? 1 : 0));
  263. kernel.setArg(9, double(info.juliaX));
  264. kernel.setArg(10, double(info.juliaY));
  265. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  266. cl::Event event;
  267. queue.enqueueReadBuffer(buffer_A, CL_FALSE, 0, bufferSize, data, nullptr, &event);
  268. queue.flush();
  269. event.wait();
  270. }
  271. std::string ClGeneratorDouble::getKernelCode(bool smooth) const
  272. {
  273. return
  274. "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
  275. "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max, int smooth) {\n"
  276. " int index = get_global_id(0);\n"
  277. " int x = index % width;"
  278. " int y = index / width;"
  279. " double a = x * pixelScaleX + xl;"
  280. " double b = y * pixelScaleY + yt;"
  281. " double ca = a;"
  282. " double cb = b;"
  283. ""
  284. " int n = 0;"
  285. " while (n < max - 1) {"
  286. " double aa = a * a;"
  287. " double bb = b * b;"
  288. " double ab = a * b;"
  289. " if (aa + bb > 16) break;"
  290. " a = aa - bb + ca;"
  291. " b = ab + ab + cb;"
  292. " n++;"
  293. " }\n"
  294. // N + 1 - log (log |Z(N)|) / log 2
  295. " if (n >= max - 1)\n"
  296. " A[index] = max;\n"
  297. " else {"
  298. " if (smooth != 0)\n"
  299. " A[index] = ((float)n) + 1 - log(log((float)(a * a + b * b)) / 2) / log(2.0f);\n"
  300. " else\n"
  301. " A[index] = ((float)n);\n"
  302. " }"
  303. "}";
  304. }
  305. ClGeneratorDoubleDouble::ClGeneratorDoubleDouble(mnd::MandelDevice& device) :
  306. ClGenerator{ device, getDoubleDouble_cl(), mnd::Precision::DOUBLE_DOUBLE }
  307. {
  308. kernel = Kernel(program, "iterate");
  309. }
  310. void ClGeneratorDoubleDouble::generate(const mnd::MandelInfo& info, float* data)
  311. {
  312. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  313. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  314. mnd::DoubleDouble x = mnd::convert<mnd::DoubleDouble>(info.view.x);
  315. mnd::DoubleDouble y = mnd::convert<mnd::DoubleDouble>(info.view.y);
  316. mnd::DoubleDouble psx = mnd::convert<mnd::DoubleDouble>(info.view.width / info.bWidth);
  317. mnd::DoubleDouble psy = mnd::convert<mnd::DoubleDouble>(info.view.height / info.bHeight);
  318. mnd::DoubleDouble juliaX = mnd::convert<mnd::DoubleDouble>(info.juliaX);
  319. mnd::DoubleDouble juliaY = mnd::convert<mnd::DoubleDouble>(info.juliaY);
  320. kernel.setArg(0, buffer_A);
  321. kernel.setArg(1, int(info.bWidth));
  322. kernel.setArg(2, x.x[0]);
  323. kernel.setArg(3, x.x[1]);
  324. kernel.setArg(4, y.x[0]);
  325. kernel.setArg(5, y.x[1]);
  326. kernel.setArg(6, psx.x[0]);
  327. kernel.setArg(7, psx.x[1]);
  328. kernel.setArg(8, psy.x[0]);
  329. kernel.setArg(9, psy.x[1]);
  330. kernel.setArg(10, int(info.maxIter));
  331. kernel.setArg(11, int(info.smooth ? 1 : 0));
  332. kernel.setArg(12, info.julia ? 1 : 0);
  333. kernel.setArg(13, juliaX.x[0]);
  334. kernel.setArg(14, juliaX.x[1]);
  335. kernel.setArg(15, juliaY.x[0]);
  336. kernel.setArg(16, juliaY.x[1]);
  337. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  338. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  339. }
  340. std::string ClGeneratorDoubleDouble::getKernelCode(bool smooth) const
  341. {
  342. return getDoubleDouble_cl();
  343. }
  344. ClGeneratorTripleDouble::ClGeneratorTripleDouble(mnd::MandelDevice& device) :
  345. ClGenerator{ device, getTripleDouble_cl(), mnd::Precision::TRIPLE_DOUBLE }
  346. {
  347. kernel = Kernel(program, "iterate");
  348. }
  349. void ClGeneratorTripleDouble::generate(const mnd::MandelInfo& info, float* data)
  350. {
  351. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  352. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  353. mnd::TripleDouble x = mnd::convert<mnd::TripleDouble>(info.view.x);
  354. mnd::TripleDouble y = mnd::convert<mnd::TripleDouble>(info.view.y);
  355. mnd::TripleDouble psx = mnd::convert<mnd::TripleDouble>(info.view.width / info.bWidth);
  356. mnd::TripleDouble psy = mnd::convert<mnd::TripleDouble>(info.view.height / info.bHeight);
  357. mnd::TripleDouble juliaX = mnd::convert<mnd::TripleDouble>(info.juliaX);
  358. mnd::TripleDouble juliaY = mnd::convert<mnd::TripleDouble>(info.juliaY);
  359. kernel.setArg(0, buffer_A);
  360. kernel.setArg(1, int(info.bWidth));
  361. kernel.setArg(2, x.x[0]);
  362. kernel.setArg(3, x.x[1]);
  363. kernel.setArg(4, x.x[2]);
  364. kernel.setArg(5, y.x[0]);
  365. kernel.setArg(6, y.x[1]);
  366. kernel.setArg(7, y.x[2]);
  367. kernel.setArg(8, psx.x[0]);
  368. kernel.setArg(9, psx.x[1]);
  369. kernel.setArg(10, psx.x[2]);
  370. kernel.setArg(11, psy.x[0]);
  371. kernel.setArg(12, psy.x[1]);
  372. kernel.setArg(13, psy.x[2]);
  373. kernel.setArg(14, int(info.maxIter));
  374. kernel.setArg(15, int(info.smooth ? 1 : 0));
  375. kernel.setArg(16, info.julia ? 1 : 0);
  376. kernel.setArg(17, juliaX.x[0]);
  377. kernel.setArg(18, juliaX.x[1]);
  378. kernel.setArg(19, juliaX.x[2]);
  379. kernel.setArg(20, juliaY.x[0]);
  380. kernel.setArg(21, juliaY.x[1]);
  381. kernel.setArg(22, juliaY.x[2]);
  382. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  383. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  384. }
  385. std::string ClGeneratorTripleDouble::getKernelCode(bool smooth) const
  386. {
  387. return getTripleDouble_cl();
  388. }
  389. ClGeneratorQuadDouble::ClGeneratorQuadDouble(mnd::MandelDevice& device) :
  390. ClGenerator{ device, getQuadDouble_cl(), mnd::Precision::QUAD_DOUBLE }
  391. {
  392. kernel = Kernel(program, "iterate");
  393. }
  394. void ClGeneratorQuadDouble::generate(const mnd::MandelInfo& info, float* data)
  395. {
  396. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  397. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  398. mnd::QuadDouble x = mnd::convert<mnd::QuadDouble>(info.view.x);
  399. mnd::QuadDouble y = mnd::convert<mnd::QuadDouble>(info.view.y);
  400. mnd::QuadDouble psx = mnd::convert<mnd::QuadDouble>(info.view.width / info.bWidth);
  401. mnd::QuadDouble psy = mnd::convert<mnd::QuadDouble>(info.view.height / info.bHeight);
  402. mnd::QuadDouble jx = mnd::convert<mnd::QuadDouble>(info.juliaX);
  403. mnd::QuadDouble jy = mnd::convert<mnd::QuadDouble>(info.juliaY);
  404. kernel.setArg(0, buffer_A);
  405. kernel.setArg(1, int(info.bWidth));
  406. kernel.setArg(2, x.x[0]);
  407. kernel.setArg(3, x.x[1]);
  408. kernel.setArg(4, x.x[2]);
  409. kernel.setArg(5, x.x[3]);
  410. kernel.setArg(6, y.x[0]);
  411. kernel.setArg(7, y.x[1]);
  412. kernel.setArg(8, y.x[2]);
  413. kernel.setArg(9, y.x[3]);
  414. kernel.setArg(10, psx.x[0]);
  415. kernel.setArg(11, psx.x[1]);
  416. kernel.setArg(12, psx.x[2]);
  417. kernel.setArg(13, psx.x[3]);
  418. kernel.setArg(14, psy.x[0]);
  419. kernel.setArg(15, psy.x[1]);
  420. kernel.setArg(16, psy.x[2]);
  421. kernel.setArg(17, psy.x[3]);
  422. kernel.setArg(18, int(info.maxIter));
  423. kernel.setArg(19, int(info.smooth ? 1 : 0));
  424. kernel.setArg(20, int(info.julia ? 1 : 0));
  425. kernel.setArg(21, jx.x[0]);
  426. kernel.setArg(22, jx.x[1]);
  427. kernel.setArg(23, jx.x[2]);
  428. kernel.setArg(24, jx.x[3]);
  429. kernel.setArg(25, jy.x[0]);
  430. kernel.setArg(26, jy.x[1]);
  431. kernel.setArg(27, jy.x[2]);
  432. kernel.setArg(28, jy.x[3]);
  433. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  434. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  435. }
  436. std::string ClGeneratorQuadDouble::getKernelCode(bool smooth) const
  437. {
  438. return getQuadDouble_cl();
  439. }
  440. ClGeneratorHexDouble::ClGeneratorHexDouble(mnd::MandelDevice& device) :
  441. ClGenerator{ device, getHexDouble_cl(), mnd::Precision::HEX_DOUBLE }
  442. {
  443. kernel = Kernel(program, "iterate");
  444. }
  445. void ClGeneratorHexDouble::generate(const mnd::MandelInfo& info, float* data)
  446. {
  447. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  448. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  449. mnd::HexDouble x = mnd::convert<mnd::HexDouble>(info.view.x);
  450. mnd::HexDouble y = mnd::convert<mnd::HexDouble>(info.view.y);
  451. mnd::HexDouble psx = mnd::convert<mnd::HexDouble>(info.view.width / info.bWidth);
  452. mnd::HexDouble psy = mnd::convert<mnd::HexDouble>(info.view.height / info.bHeight);
  453. mnd::HexDouble jx = mnd::convert<mnd::HexDouble>(info.juliaX);
  454. mnd::HexDouble jy = mnd::convert<mnd::HexDouble>(info.juliaY);
  455. const size_t argBufSize = 6 * sizeof(double);
  456. Buffer xbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, x.x);
  457. Buffer ybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, y.x);
  458. Buffer psxbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, psx.x);
  459. Buffer psybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, psy.x);
  460. Buffer jxbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, jx.x);
  461. Buffer jybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize);
  462. kernel.setArg(0, buffer_A);
  463. kernel.setArg(1, int(info.bWidth));
  464. kernel.setArg(2, xbuf);
  465. kernel.setArg(3, ybuf);
  466. kernel.setArg(4, psxbuf);
  467. kernel.setArg(5, psybuf);
  468. kernel.setArg(6, int(info.maxIter));
  469. kernel.setArg(7, int(info.smooth ? 1 : 0));
  470. kernel.setArg(8, int(info.julia ? 1 : 0));
  471. kernel.setArg(9, jxbuf);
  472. kernel.setArg(10, jybuf);
  473. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  474. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  475. }
  476. std::string ClGeneratorHexDouble::getKernelCode(bool smooth) const
  477. {
  478. return getHexDouble_cl();
  479. }
  480. ClGeneratorOctaDouble::ClGeneratorOctaDouble(mnd::MandelDevice& device) :
  481. ClGenerator{ device, getOctaDouble_cl(), mnd::Precision::OCTA_DOUBLE }
  482. {
  483. kernel = Kernel(program, "iterate");
  484. }
  485. void ClGeneratorOctaDouble::generate(const mnd::MandelInfo& info, float* data)
  486. {
  487. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  488. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  489. mnd::OctaDouble x = mnd::convert<mnd::OctaDouble>(info.view.x);
  490. mnd::OctaDouble y = mnd::convert<mnd::OctaDouble>(info.view.y);
  491. mnd::OctaDouble psx = mnd::convert<mnd::OctaDouble>(info.view.width / info.bWidth);
  492. mnd::OctaDouble psy = mnd::convert<mnd::OctaDouble>(info.view.height / info.bHeight);
  493. mnd::OctaDouble jx = mnd::convert<mnd::OctaDouble>(info.juliaX);
  494. mnd::OctaDouble jy = mnd::convert<mnd::OctaDouble>(info.juliaY);
  495. const size_t argBufSize = 8 * sizeof(double);
  496. Buffer xbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, x.x);
  497. Buffer ybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, y.x);
  498. Buffer psxbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, psx.x);
  499. Buffer psybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, psy.x);
  500. Buffer jxbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, jx.x);
  501. Buffer jybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize);
  502. kernel.setArg(0, buffer_A);
  503. kernel.setArg(1, int(info.bWidth));
  504. kernel.setArg(2, xbuf);
  505. kernel.setArg(3, ybuf);
  506. kernel.setArg(4, psxbuf);
  507. kernel.setArg(5, psybuf);
  508. kernel.setArg(6, int(info.maxIter));
  509. kernel.setArg(7, int(info.smooth ? 1 : 0));
  510. kernel.setArg(8, int(info.julia ? 1 : 0));
  511. kernel.setArg(9, jxbuf);
  512. kernel.setArg(10, jybuf);
  513. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  514. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  515. }
  516. std::string ClGeneratorOctaDouble::getKernelCode(bool smooth) const
  517. {
  518. return getOctaDouble_cl();
  519. }
  520. ClGenerator128::ClGenerator128(mnd::MandelDevice& device) :
  521. ClGenerator{ device, getFixed512_cl(), mnd::Precision::FIXED128 }
  522. {
  523. kernel = Kernel(program, "iterate");
  524. }
  525. void ClGenerator128::generate(const mnd::MandelInfo& info, float* data)
  526. {
  527. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  528. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  529. float pixelScaleX = float(info.view.width / info.bWidth);
  530. float pixelScaleY = float(info.view.height / info.bHeight);
  531. using ull = unsigned long long;
  532. ull x1 = ull(double(info.view.x) * 0x10000ULL);
  533. ull x2 = 0;
  534. ull y1 = ull(double(info.view.y) * 0x10000ULL);
  535. ull y2 = 0;
  536. ull w1 = ull(double(pixelScaleX) * 0x10000ULL);
  537. ull w2 = 0;
  538. ull h1 = ull(double(pixelScaleY) * 0x10000ULL);
  539. ull h2 = 0;
  540. ull jx1 = ull(double(info.juliaX) * 0x10000ULL);
  541. ull jx2 = 0;
  542. ull jy1 = ull(double(info.juliaY) * 0x10000ULL);
  543. ull jy2 = 0;
  544. kernel.setArg(0, buffer_A);
  545. kernel.setArg(1, int(info.bWidth));
  546. kernel.setArg(2, ull(x1));
  547. kernel.setArg(3, ull(x2));
  548. kernel.setArg(4, ull(y1));
  549. kernel.setArg(5, ull(y2));
  550. kernel.setArg(6, ull(w1));
  551. kernel.setArg(7, ull(w2));
  552. kernel.setArg(8, ull(h1));
  553. kernel.setArg(9, ull(h2));
  554. kernel.setArg(10, int(info.maxIter));
  555. kernel.setArg(11, int(info.smooth ? 1 : 0));
  556. kernel.setArg(12, int(info.julia ? 1 : 0));
  557. kernel.setArg(13, ull(jx1));
  558. kernel.setArg(14, ull(jx2));
  559. kernel.setArg(15, ull(jy1));
  560. kernel.setArg(16, ull(jy2));
  561. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  562. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  563. }
  564. std::string ClGenerator128::getKernelCode(bool smooth) const
  565. {
  566. /*//fprintf(stderr, "starting file read\n");
  567. std::ifstream t("mandel128.cl");
  568. std::string str((std::istreambuf_iterator<char>(t)),
  569. std::istreambuf_iterator<char>());
  570. //fprintf(stderr, "%s\n", str);
  571. return str;*/
  572. return getFixed128_cl();
  573. }
  574. ClGenerator64::ClGenerator64(mnd::MandelDevice& device) :
  575. ClGenerator{ device, getFixed64_cl(), mnd::Precision::FIXED64 }
  576. {
  577. kernel = Kernel(program, "iterate");
  578. }
  579. #include "CpuGenerators.h"
  580. void ClGenerator64::generate(const mnd::MandelInfo& info, float* data)
  581. {
  582. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  583. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  584. float pixelScaleX = float(info.view.width / info.bWidth);
  585. float pixelScaleY = float(info.view.height / info.bHeight);
  586. using ull = uint64_t;
  587. ull x = ull(::round(double(info.view.x) * (1LL << 48)));
  588. ull y = ull(::round(double(info.view.y) * (1LL << 48)));
  589. ull w = ull(::round(double(pixelScaleX) * (1LL << 48)));
  590. ull h = ull(::round(double(pixelScaleY) * (1LL << 48)));
  591. ull jx = ull(::round(double(info.juliaX) * (1LL << 48)));
  592. ull jy = ull(::round(double(info.juliaY) * (1LL << 48)));
  593. //x = 0;
  594. //y = 0;
  595. kernel.setArg(0, buffer_A);
  596. kernel.setArg(1, int(info.bWidth));
  597. kernel.setArg(2, ull(x));
  598. kernel.setArg(3, ull(y));
  599. kernel.setArg(4, ull(w));
  600. kernel.setArg(5, ull(h));
  601. kernel.setArg(6, int(info.maxIter));
  602. kernel.setArg(7, int(info.smooth ? 1 : 0));
  603. kernel.setArg(8, int(info.julia ? 1 : 0));
  604. kernel.setArg(9, ull(jx));
  605. kernel.setArg(10, ull(jy));
  606. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  607. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  608. //CpuGenerator<Fixed64> fx;
  609. //fx.generate(info, data);
  610. }
  611. std::string ClGenerator64::getKernelCode(bool smooth) const
  612. {
  613. /*//fprintf(stderr, "starting file read\n");
  614. std::ifstream t("mandel128.cl");
  615. std::string str((std::istreambuf_iterator<char>(t)),
  616. std::istreambuf_iterator<char>());
  617. //fprintf(stderr, "%s\n", str);
  618. return str;*/
  619. return getFixed64_cl();
  620. }
  621. #endif // WITH_OPENCL