ClGenerators.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711
  1. #include "ClGenerators.h"
  2. #include "Mandel.h"
  3. #include "OpenClInternal.h"
  4. #include "OpenClCode.h"
  5. #include <CL/cl2.hpp>
  6. #if WITH_OPENCL
  7. #include <iostream>
  8. #include <iterator>
  9. #include <utility>
  10. using namespace cl;
  11. using mnd::ClGenerator;
  12. using mnd::ClGeneratorFloat;
  13. using mnd::ClGeneratorDoubleFloat;
  14. using mnd::ClGeneratorDouble;
  15. using mnd::ClGeneratorDoubleDouble;
  16. using mnd::ClGeneratorTripleDouble;
  17. using mnd::ClGeneratorQuadDouble;
  18. using mnd::ClGeneratorHexDouble;
  19. using mnd::ClGenerator128;
  20. using mnd::ClGenerator64;
  21. Platform getPlatform() {
  22. /* Returns the first platform found. */
  23. std::vector<Platform> all_platforms;
  24. Platform::get(&all_platforms);
  25. if (all_platforms.size() == 0) {
  26. std::cout << "No platforms found. Check OpenCL installation!\n";
  27. exit(1);
  28. }
  29. for (auto& p : all_platforms) {
  30. std::string name = p.getInfo<CL_PLATFORM_NAME>();
  31. std::string profile = p.getInfo<CL_PLATFORM_PROFILE>();
  32. printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
  33. }
  34. return all_platforms[0];
  35. }
  36. Device getDevice(Platform& platform, int i, bool display = false) {
  37. /* Returns the deviced specified by the index i on platform.
  38. * If display is true, then all of the platforms are listed.
  39. */
  40. std::vector<Device> all_devices;
  41. platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
  42. if (all_devices.size() == 0) {
  43. std::cout << "No devices found. Check OpenCL installation!\n";
  44. exit(1);
  45. }
  46. if (display) {
  47. for (::size_t j = 0; j < all_devices.size(); j++) {
  48. printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
  49. printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  50. printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
  51. }
  52. }
  53. return all_devices[i];
  54. }
  55. ClGenerator::ClGenerator(mnd::MandelDevice& device, const std::string& source, mnd::Precision type) :
  56. MandelGenerator{ type },
  57. device{ device },
  58. context{ device.getClDevice().context }
  59. {
  60. const cl::Device& dev = device.getClDevice().device;
  61. Program::Sources sources;
  62. sources.push_back({ source.c_str(), source.length() });
  63. program = Program{ context, sources };
  64. if (program.build({ dev }) != CL_SUCCESS) {
  65. printf("code -> %s\n", source.c_str());
  66. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev));
  67. }
  68. #if CL_HPP_TARGET_OPENCL_VERSION >= 200
  69. /*cl_queue_properties qcp[] {
  70. CL_QUEUE_PROPERTIES, 0,
  71. // CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR,
  72. 0
  73. };
  74. int err = 0;
  75. cl_command_queue dq = clCreateCommandQueueWithProperties(
  76. this->context.get(),
  77. dev.get(),
  78. qcp,
  79. &err
  80. );
  81. if (err == CL_SUCCESS) {
  82. printf("queue success\n");
  83. queue = CommandQueue(dq);
  84. }
  85. else {
  86. printf("queue non-success\n");
  87. queue = CommandQueue(context, dev);
  88. }
  89. */
  90. queue = CommandQueue(context, dev);
  91. #else
  92. queue = CommandQueue(context, dev);
  93. #endif
  94. /*Platform p = getPlatform();
  95. device = getDevice(p, 0, true);
  96. context = Context{ device };
  97. Program::Sources sources;
  98. std::string kcode = this->getKernelCode();
  99. sources.push_back({ kcode.c_str(), kcode.length() });
  100. program = Program{ context, sources };
  101. if (program.build({ device }) != CL_SUCCESS) {
  102. std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
  103. exit(1);
  104. }
  105. queue = CommandQueue(context, device);*/
  106. }
  107. ClGenerator::~ClGenerator(void)
  108. {
  109. queue.flush();
  110. queue.finish();
  111. }
  112. mnd::MandelDevice* ClGenerator::getDevice(void)
  113. {
  114. return &device;
  115. }
  116. ClGeneratorFloat::ClGeneratorFloat(mnd::MandelDevice& device, const std::string& code) :
  117. ClGenerator{ device, code, mnd::Precision::FLOAT }
  118. {
  119. const cl::Device& dev = device.getClDevice().device;
  120. useVec = dev.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() >= 4;
  121. // often still slower than non-vec variation
  122. useVec = false;
  123. kernel = Kernel(program, useVec ? "iterate_vec4" : "iterate");
  124. }
  125. void ClGeneratorFloat::generate(const mnd::MandelInfo& info, float* data)
  126. {
  127. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  128. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  129. float pixelScaleX = float(info.view.width / info.bWidth);
  130. float pixelScaleY = float(info.view.height / info.bHeight);
  131. kernel.setArg(0, buffer_A);
  132. kernel.setArg(1, int(info.bWidth));
  133. kernel.setArg(2, float(info.view.x));
  134. kernel.setArg(3, float(info.view.y));
  135. kernel.setArg(4, float(pixelScaleX));
  136. kernel.setArg(5, float(pixelScaleY));
  137. kernel.setArg(6, int(info.maxIter));
  138. kernel.setArg(7, int(info.smooth ? 1 : 0));
  139. kernel.setArg(8, int(info.julia ? 1 : 0));
  140. kernel.setArg(9, float(info.juliaX));
  141. kernel.setArg(10, float(info.juliaY));
  142. if (useVec) {
  143. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight / 4));
  144. } else {
  145. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  146. }
  147. cl::Event event;
  148. queue.enqueueReadBuffer(buffer_A, CL_FALSE, 0, bufferSize, data, nullptr, &event);
  149. queue.flush();
  150. event.wait();
  151. }
  152. std::string ClGeneratorFloat::getKernelCode(bool smooth) const
  153. {
  154. return mnd::getFloat_cl();
  155. }
  156. ClGeneratorDoubleFloat::ClGeneratorDoubleFloat(mnd::MandelDevice& device) :
  157. ClGenerator{ device, this->getKernelCode(false), mnd::Precision::DOUBLE_FLOAT }
  158. {
  159. kernel = Kernel(program, "iterate");
  160. }
  161. std::pair<float, float> twoSum(float a, float b) {
  162. float s = a + b;
  163. float v = s - a;
  164. float r = (a - (s - v)) + (b - v);
  165. return { s, r };
  166. }
  167. std::pair<float, float> split(float a) {
  168. float c = (4096 + 1) * a;
  169. float abig = c - a;
  170. float ahi = c - abig;
  171. float alo = a - ahi;
  172. return { ahi, alo };
  173. }
  174. std::pair<float, float> twoProd(float a, float b) {
  175. float x = a * b;
  176. auto aex = split(a);
  177. auto bex = split(b);
  178. float errx = x - (aex.first * bex.first);
  179. float erry = errx - (aex.second * bex.first);
  180. float errz = erry - (aex.first * bex.second);
  181. float y = (aex.second * bex.second) - errz;
  182. return { x, y };
  183. }
  184. std::pair<float, float> add(std::pair<float, float> a, std::pair<float, float> b) {
  185. float r = a.first + b.first;
  186. float s;
  187. if (fabs(a.first) >= fabs(b.first)) {
  188. s = (((a.first - r) + b.first) + b.second) + a.second;
  189. }
  190. else {
  191. s = (((b.first - r) + a.first) + a.second) + b.second;
  192. }
  193. return twoSum(r, s);
  194. }
  195. std::pair<float, float> mul(std::pair<float, float> a, std::pair<float, float> b) {
  196. auto t = twoProd(a.first, b.first);
  197. t.second += ((a.first * b.second) + (a.second * b.first));
  198. return twoSum(t.first, t.second);
  199. }
  200. std::pair<float, float> mulFloat(std::pair<float, float> a, float b) {
  201. std::pair<float, float> t = twoProd(a.first, b);
  202. float t3 = (a.second * b) + t.second;
  203. return twoSum(t.first, t.second);
  204. }
  205. void ClGeneratorDoubleFloat::generate(const mnd::MandelInfo& info, float* data)
  206. {
  207. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  208. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  209. mnd::LightDoubleFloat pixelScX = double(info.view.width / info.bWidth);
  210. mnd::LightDoubleFloat pixelScY = double(info.view.height / info.bHeight);
  211. mnd::LightDoubleFloat x = double(info.view.x);
  212. mnd::LightDoubleFloat y = double(info.view.y);
  213. mnd::LightDoubleFloat jx = double(info.juliaX);
  214. mnd::LightDoubleFloat jy = double(info.juliaY);
  215. kernel.setArg(0, buffer_A);
  216. kernel.setArg(1, int(info.bWidth));
  217. kernel.setArg(2, x[0]);
  218. kernel.setArg(3, x[1]);
  219. kernel.setArg(4, y[0]);
  220. kernel.setArg(5, y[1]);
  221. kernel.setArg(6, pixelScX[0]);
  222. kernel.setArg(7, pixelScX[1]);
  223. kernel.setArg(8, pixelScY[0]);
  224. kernel.setArg(9, pixelScY[1]);
  225. kernel.setArg(10, int(info.maxIter));
  226. kernel.setArg(11, int(info.smooth ? 1 : 0));
  227. kernel.setArg(12, int(info.julia ? 1 : 0));
  228. kernel.setArg(13, jx[0]);
  229. kernel.setArg(14, jx[1]);
  230. kernel.setArg(15, jy[0]);
  231. kernel.setArg(16, jy[1]);
  232. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  233. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  234. }
  235. std::string ClGeneratorDoubleFloat::getKernelCode(bool smooth) const
  236. {
  237. return getDoubleFloat_cl();
  238. }
  239. ClGeneratorDouble::ClGeneratorDouble(mnd::MandelDevice& device, const std::string& source) :
  240. ClGenerator{ device, source, mnd::Precision::DOUBLE }
  241. {
  242. kernel = Kernel(program, "iterate");
  243. }
  244. void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
  245. {
  246. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  247. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  248. double pixelScaleX = double(info.view.width / info.bWidth);
  249. double pixelScaleY = double(info.view.height / info.bHeight);
  250. kernel.setArg(0, buffer_A);
  251. kernel.setArg(1, int(info.bWidth));
  252. kernel.setArg(2, double(info.view.x));
  253. kernel.setArg(3, double(info.view.y));
  254. kernel.setArg(4, double(pixelScaleX));
  255. kernel.setArg(5, double(pixelScaleY));
  256. kernel.setArg(6, int(info.maxIter));
  257. kernel.setArg(7, int(info.smooth ? 1 : 0));
  258. kernel.setArg(8, int(info.julia ? 1 : 0));
  259. kernel.setArg(9, double(info.juliaX));
  260. kernel.setArg(10, double(info.juliaY));
  261. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  262. cl::Event event;
  263. queue.enqueueReadBuffer(buffer_A, CL_FALSE, 0, bufferSize, data, nullptr, &event);
  264. queue.flush();
  265. event.wait();
  266. }
  267. std::string ClGeneratorDouble::getKernelCode(bool smooth) const
  268. {
  269. return
  270. "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
  271. "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max, int smooth) {\n"
  272. " int index = get_global_id(0);\n"
  273. " int x = index % width;"
  274. " int y = index / width;"
  275. " double a = x * pixelScaleX + xl;"
  276. " double b = y * pixelScaleY + yt;"
  277. " double ca = a;"
  278. " double cb = b;"
  279. ""
  280. " int n = 0;"
  281. " while (n < max - 1) {"
  282. " double aa = a * a;"
  283. " double bb = b * b;"
  284. " double ab = a * b;"
  285. " if (aa + bb > 16) break;"
  286. " a = aa - bb + ca;"
  287. " b = ab + ab + cb;"
  288. " n++;"
  289. " }\n"
  290. // N + 1 - log (log |Z(N)|) / log 2
  291. " if (n >= max - 1)\n"
  292. " A[index] = max;\n"
  293. " else {"
  294. " if (smooth != 0)\n"
  295. " A[index] = ((float)n) + 1 - log(log((float)(a * a + b * b)) / 2) / log(2.0f);\n"
  296. " else\n"
  297. " A[index] = ((float)n);\n"
  298. " }"
  299. "}";
  300. }
  301. ClGeneratorDoubleDouble::ClGeneratorDoubleDouble(mnd::MandelDevice& device) :
  302. ClGenerator{ device, getDoubleDouble_cl(), mnd::Precision::DOUBLE_DOUBLE }
  303. {
  304. kernel = Kernel(program, "iterate");
  305. }
  306. void ClGeneratorDoubleDouble::generate(const mnd::MandelInfo& info, float* data)
  307. {
  308. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  309. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  310. mnd::DoubleDouble x = mnd::convert<mnd::DoubleDouble>(info.view.x);
  311. mnd::DoubleDouble y = mnd::convert<mnd::DoubleDouble>(info.view.y);
  312. mnd::DoubleDouble psx = mnd::convert<mnd::DoubleDouble>(info.view.width / info.bWidth);
  313. mnd::DoubleDouble psy = mnd::convert<mnd::DoubleDouble>(info.view.height / info.bHeight);
  314. mnd::DoubleDouble juliaX = mnd::convert<mnd::DoubleDouble>(info.juliaX);
  315. mnd::DoubleDouble juliaY = mnd::convert<mnd::DoubleDouble>(info.juliaY);
  316. kernel.setArg(0, buffer_A);
  317. kernel.setArg(1, int(info.bWidth));
  318. kernel.setArg(2, x.x[0]);
  319. kernel.setArg(3, x.x[1]);
  320. kernel.setArg(4, y.x[0]);
  321. kernel.setArg(5, y.x[1]);
  322. kernel.setArg(6, psx.x[0]);
  323. kernel.setArg(7, psx.x[1]);
  324. kernel.setArg(8, psy.x[0]);
  325. kernel.setArg(9, psy.x[1]);
  326. kernel.setArg(10, int(info.maxIter));
  327. kernel.setArg(11, int(info.smooth ? 1 : 0));
  328. kernel.setArg(12, info.julia ? 1 : 0);
  329. kernel.setArg(13, juliaX.x[0]);
  330. kernel.setArg(14, juliaX.x[1]);
  331. kernel.setArg(15, juliaY.x[0]);
  332. kernel.setArg(16, juliaY.x[1]);
  333. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  334. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  335. }
  336. std::string ClGeneratorDoubleDouble::getKernelCode(bool smooth) const
  337. {
  338. return getDoubleDouble_cl();
  339. }
  340. ClGeneratorTripleDouble::ClGeneratorTripleDouble(mnd::MandelDevice& device) :
  341. ClGenerator{ device, getTripleDouble_cl(), mnd::Precision::TRIPLE_DOUBLE }
  342. {
  343. kernel = Kernel(program, "iterate");
  344. }
  345. void ClGeneratorTripleDouble::generate(const mnd::MandelInfo& info, float* data)
  346. {
  347. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  348. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  349. mnd::TripleDouble x = mnd::convert<mnd::TripleDouble>(info.view.x);
  350. mnd::TripleDouble y = mnd::convert<mnd::TripleDouble>(info.view.y);
  351. mnd::TripleDouble psx = mnd::convert<mnd::TripleDouble>(info.view.width / info.bWidth);
  352. mnd::TripleDouble psy = mnd::convert<mnd::TripleDouble>(info.view.height / info.bHeight);
  353. mnd::TripleDouble juliaX = mnd::convert<mnd::TripleDouble>(info.juliaX);
  354. mnd::TripleDouble juliaY = mnd::convert<mnd::TripleDouble>(info.juliaY);
  355. kernel.setArg(0, buffer_A);
  356. kernel.setArg(1, int(info.bWidth));
  357. kernel.setArg(2, x.x[0]);
  358. kernel.setArg(3, x.x[1]);
  359. kernel.setArg(4, x.x[2]);
  360. kernel.setArg(5, y.x[0]);
  361. kernel.setArg(6, y.x[1]);
  362. kernel.setArg(7, y.x[2]);
  363. kernel.setArg(8, psx.x[0]);
  364. kernel.setArg(9, psx.x[1]);
  365. kernel.setArg(10, psx.x[2]);
  366. kernel.setArg(11, psy.x[0]);
  367. kernel.setArg(12, psy.x[1]);
  368. kernel.setArg(13, psy.x[2]);
  369. kernel.setArg(14, int(info.maxIter));
  370. kernel.setArg(15, int(info.smooth ? 1 : 0));
  371. kernel.setArg(16, info.julia ? 1 : 0);
  372. kernel.setArg(17, juliaX.x[0]);
  373. kernel.setArg(18, juliaX.x[1]);
  374. kernel.setArg(19, juliaX.x[2]);
  375. kernel.setArg(20, juliaY.x[0]);
  376. kernel.setArg(21, juliaY.x[1]);
  377. kernel.setArg(22, juliaY.x[2]);
  378. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  379. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  380. }
  381. std::string ClGeneratorTripleDouble::getKernelCode(bool smooth) const
  382. {
  383. return getTripleDouble_cl();
  384. }
  385. ClGeneratorQuadDouble::ClGeneratorQuadDouble(mnd::MandelDevice& device) :
  386. ClGenerator{ device, getQuadDouble_cl(), mnd::Precision::QUAD_DOUBLE }
  387. {
  388. kernel = Kernel(program, "iterate");
  389. }
  390. void ClGeneratorQuadDouble::generate(const mnd::MandelInfo& info, float* data)
  391. {
  392. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  393. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  394. mnd::QuadDouble x = mnd::convert<mnd::QuadDouble>(info.view.x);
  395. mnd::QuadDouble y = mnd::convert<mnd::QuadDouble>(info.view.y);
  396. mnd::QuadDouble psx = mnd::convert<mnd::QuadDouble>(info.view.width / info.bWidth);
  397. mnd::QuadDouble psy = mnd::convert<mnd::QuadDouble>(info.view.height / info.bHeight);
  398. mnd::QuadDouble jx = mnd::convert<mnd::QuadDouble>(info.juliaX);
  399. mnd::QuadDouble jy = mnd::convert<mnd::QuadDouble>(info.juliaY);
  400. kernel.setArg(0, buffer_A);
  401. kernel.setArg(1, int(info.bWidth));
  402. kernel.setArg(2, x.x[0]);
  403. kernel.setArg(3, x.x[1]);
  404. kernel.setArg(4, x.x[2]);
  405. kernel.setArg(5, x.x[3]);
  406. kernel.setArg(6, y.x[0]);
  407. kernel.setArg(7, y.x[1]);
  408. kernel.setArg(8, y.x[2]);
  409. kernel.setArg(9, y.x[3]);
  410. kernel.setArg(10, psx.x[0]);
  411. kernel.setArg(11, psx.x[1]);
  412. kernel.setArg(12, psx.x[2]);
  413. kernel.setArg(13, psx.x[3]);
  414. kernel.setArg(14, psy.x[0]);
  415. kernel.setArg(15, psy.x[1]);
  416. kernel.setArg(16, psy.x[2]);
  417. kernel.setArg(17, psy.x[3]);
  418. kernel.setArg(18, int(info.maxIter));
  419. kernel.setArg(19, int(info.smooth ? 1 : 0));
  420. kernel.setArg(20, int(info.julia ? 1 : 0));
  421. kernel.setArg(21, jx.x[0]);
  422. kernel.setArg(22, jx.x[1]);
  423. kernel.setArg(23, jx.x[2]);
  424. kernel.setArg(24, jx.x[3]);
  425. kernel.setArg(25, jy.x[0]);
  426. kernel.setArg(26, jy.x[1]);
  427. kernel.setArg(27, jy.x[2]);
  428. kernel.setArg(28, jy.x[3]);
  429. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  430. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  431. }
  432. std::string ClGeneratorQuadDouble::getKernelCode(bool smooth) const
  433. {
  434. return getQuadDouble_cl();
  435. }
  436. ClGeneratorHexDouble::ClGeneratorHexDouble(mnd::MandelDevice& device) :
  437. ClGenerator{ device, getHexDouble_cl(), mnd::Precision::HEX_DOUBLE }
  438. {
  439. kernel = Kernel(program, "iterate");
  440. }
  441. void ClGeneratorHexDouble::generate(const mnd::MandelInfo& info, float* data)
  442. {
  443. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  444. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  445. mnd::HexDouble x = mnd::convert<mnd::HexDouble>(info.view.x);
  446. mnd::HexDouble y = mnd::convert<mnd::HexDouble>(info.view.y);
  447. mnd::HexDouble psx = mnd::convert<mnd::HexDouble>(info.view.width / info.bWidth);
  448. mnd::HexDouble psy = mnd::convert<mnd::HexDouble>(info.view.height / info.bHeight);
  449. mnd::HexDouble jx = mnd::convert<mnd::HexDouble>(info.juliaX);
  450. mnd::HexDouble jy = mnd::convert<mnd::HexDouble>(info.juliaY);
  451. double vals[] = {250, 250, 250, 250, 250, 250 };
  452. const size_t argBufSize = 6 * sizeof(double);
  453. Buffer xbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, x.x);
  454. Buffer ybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, y.x);
  455. Buffer psxbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, psx.x);
  456. Buffer psybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, psy.x);
  457. Buffer jxbuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize, jx.x);
  458. Buffer jybuf(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, argBufSize);
  459. kernel.setArg(0, buffer_A);
  460. kernel.setArg(1, int(info.bWidth));
  461. kernel.setArg(2, xbuf);
  462. kernel.setArg(3, ybuf);
  463. kernel.setArg(4, psxbuf);
  464. kernel.setArg(5, psybuf);
  465. kernel.setArg(6, int(info.maxIter));
  466. kernel.setArg(7, int(info.smooth ? 1 : 0));
  467. kernel.setArg(8, int(info.julia ? 1 : 0));
  468. kernel.setArg(9, jxbuf);
  469. kernel.setArg(10, jybuf);
  470. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  471. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  472. }
  473. std::string ClGeneratorHexDouble::getKernelCode(bool smooth) const
  474. {
  475. return getHexDouble_cl();
  476. }
  477. ClGenerator128::ClGenerator128(mnd::MandelDevice& device) :
  478. ClGenerator{ device, getFixed512_cl(), mnd::Precision::FIXED128 }
  479. {
  480. kernel = Kernel(program, "iterate");
  481. }
  482. void ClGenerator128::generate(const mnd::MandelInfo& info, float* data)
  483. {
  484. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  485. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  486. float pixelScaleX = float(info.view.width / info.bWidth);
  487. float pixelScaleY = float(info.view.height / info.bHeight);
  488. using ull = unsigned long long;
  489. ull x1 = ull(double(info.view.x) * 0x10000ULL);
  490. ull x2 = 0;
  491. ull y1 = ull(double(info.view.y) * 0x10000ULL);
  492. ull y2 = 0;
  493. ull w1 = ull(double(pixelScaleX) * 0x10000ULL);
  494. ull w2 = 0;
  495. ull h1 = ull(double(pixelScaleY) * 0x10000ULL);
  496. ull h2 = 0;
  497. ull jx1 = ull(double(info.juliaX) * 0x10000ULL);
  498. ull jx2 = 0;
  499. ull jy1 = ull(double(info.juliaY) * 0x10000ULL);
  500. ull jy2 = 0;
  501. kernel.setArg(0, buffer_A);
  502. kernel.setArg(1, int(info.bWidth));
  503. kernel.setArg(2, ull(x1));
  504. kernel.setArg(3, ull(x2));
  505. kernel.setArg(4, ull(y1));
  506. kernel.setArg(5, ull(y2));
  507. kernel.setArg(6, ull(w1));
  508. kernel.setArg(7, ull(w2));
  509. kernel.setArg(8, ull(h1));
  510. kernel.setArg(9, ull(h2));
  511. kernel.setArg(10, int(info.maxIter));
  512. kernel.setArg(11, int(info.smooth ? 1 : 0));
  513. kernel.setArg(12, int(info.julia ? 1 : 0));
  514. kernel.setArg(13, ull(jx1));
  515. kernel.setArg(14, ull(jx2));
  516. kernel.setArg(15, ull(jy1));
  517. kernel.setArg(16, ull(jy2));
  518. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  519. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  520. }
  521. std::string ClGenerator128::getKernelCode(bool smooth) const
  522. {
  523. /*//fprintf(stderr, "starting file read\n");
  524. std::ifstream t("mandel128.cl");
  525. std::string str((std::istreambuf_iterator<char>(t)),
  526. std::istreambuf_iterator<char>());
  527. //fprintf(stderr, "%s\n", str);
  528. return str;*/
  529. return getFixed128_cl();
  530. }
  531. ClGenerator64::ClGenerator64(mnd::MandelDevice& device) :
  532. ClGenerator{ device, getFixed64_cl(), mnd::Precision::FIXED64 }
  533. {
  534. kernel = Kernel(program, "iterate");
  535. }
  536. #include "CpuGenerators.h"
  537. void ClGenerator64::generate(const mnd::MandelInfo& info, float* data)
  538. {
  539. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  540. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  541. float pixelScaleX = float(info.view.width / info.bWidth);
  542. float pixelScaleY = float(info.view.height / info.bHeight);
  543. using ull = uint64_t;
  544. ull x = ull(::round(double(info.view.x) * (1LL << 48)));
  545. ull y = ull(::round(double(info.view.y) * (1LL << 48)));
  546. ull w = ull(::round(double(pixelScaleX) * (1LL << 48)));
  547. ull h = ull(::round(double(pixelScaleY) * (1LL << 48)));
  548. ull jx = ull(::round(double(info.juliaX) * (1LL << 48)));
  549. ull jy = ull(::round(double(info.juliaY) * (1LL << 48)));
  550. //x = 0;
  551. //y = 0;
  552. kernel.setArg(0, buffer_A);
  553. kernel.setArg(1, int(info.bWidth));
  554. kernel.setArg(2, ull(x));
  555. kernel.setArg(3, ull(y));
  556. kernel.setArg(4, ull(w));
  557. kernel.setArg(5, ull(h));
  558. kernel.setArg(6, int(info.maxIter));
  559. kernel.setArg(7, int(info.smooth ? 1 : 0));
  560. kernel.setArg(8, int(info.julia ? 1 : 0));
  561. kernel.setArg(9, ull(jx));
  562. kernel.setArg(10, ull(jy));
  563. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  564. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  565. //CpuGenerator<Fixed64> fx;
  566. //fx.generate(info, data);
  567. }
  568. std::string ClGenerator64::getKernelCode(bool smooth) const
  569. {
  570. /*//fprintf(stderr, "starting file read\n");
  571. std::ifstream t("mandel128.cl");
  572. std::string str((std::istreambuf_iterator<char>(t)),
  573. std::istreambuf_iterator<char>());
  574. //fprintf(stderr, "%s\n", str);
  575. return str;*/
  576. return getFixed64_cl();
  577. }
  578. #endif // WITH_OPENCL