ClGenerators.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. #include "ClGenerators.h"
  2. #include "doubledouble.h"
  3. #include "OpenClCode.h"
  4. #ifdef WITH_OPENCL
  5. #include <iostream>
  6. #include <iterator>
  7. #include <utility>
  8. using namespace cl;
  9. using mnd::ClGenerator;
  10. using mnd::ClGeneratorFloat;
  11. using mnd::ClGeneratorDoubleFloat;
  12. using mnd::ClGeneratorDouble;
  13. using mnd::ClGeneratorDoubleDouble;
  14. using mnd::ClGeneratorQuadDouble;
  15. using mnd::ClGenerator128;
  16. using mnd::ClGenerator64;
  17. Platform getPlatform() {
  18. /* Returns the first platform found. */
  19. std::vector<Platform> all_platforms;
  20. Platform::get(&all_platforms);
  21. if (all_platforms.size() == 0) {
  22. std::cout << "No platforms found. Check OpenCL installation!\n";
  23. exit(1);
  24. }
  25. for (auto& p : all_platforms) {
  26. std::string name = p.getInfo<CL_PLATFORM_NAME>();
  27. std::string profile = p.getInfo<CL_PLATFORM_PROFILE>();
  28. printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
  29. }
  30. return all_platforms[0];
  31. }
  32. Device getDevice(Platform& platform, int i, bool display = false) {
  33. /* Returns the deviced specified by the index i on platform.
  34. * If display is true, then all of the platforms are listed.
  35. */
  36. std::vector<Device> all_devices;
  37. platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
  38. if (all_devices.size() == 0) {
  39. std::cout << "No devices found. Check OpenCL installation!\n";
  40. exit(1);
  41. }
  42. if (display) {
  43. for (::size_t j = 0; j < all_devices.size(); j++) {
  44. printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
  45. printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  46. printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
  47. }
  48. }
  49. return all_devices[i];
  50. }
  51. ClGenerator::ClGenerator(cl::Device device, const mnd::Real& precision) :
  52. Generator{ precision },
  53. device{ device }
  54. {
  55. /*Platform p = getPlatform();
  56. device = getDevice(p, 0, true);
  57. context = Context{ device };
  58. Program::Sources sources;
  59. std::string kcode = this->getKernelCode();
  60. sources.push_back({ kcode.c_str(), kcode.length() });
  61. program = Program{ context, sources };
  62. if (program.build({ device }) != CL_SUCCESS) {
  63. std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
  64. exit(1);
  65. }
  66. queue = CommandQueue(context, device);*/
  67. }
  68. ClGenerator::~ClGenerator(void)
  69. {
  70. queue.flush();
  71. queue.finish();
  72. }
  73. void ClGenerator::generate(const mnd::MandelInfo& info, float* data)
  74. {
  75. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  76. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  77. float pixelScaleX = float(info.view.width / info.bWidth);
  78. float pixelScaleY = float(info.view.height / info.bHeight);
  79. bool useVec = device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() >= 4;
  80. Kernel iterate = Kernel(program, useVec ? "iterate_vec4" : "iterate");
  81. iterate.setArg(0, buffer_A);
  82. iterate.setArg(1, int(info.bWidth));
  83. iterate.setArg(2, float(info.view.x));
  84. iterate.setArg(3, float(info.view.y));
  85. iterate.setArg(4, float(pixelScaleX));
  86. iterate.setArg(5, float(pixelScaleY));
  87. iterate.setArg(6, int(info.maxIter));
  88. iterate.setArg(7, int(info.smooth ? 1 : 0));
  89. if (useVec) {
  90. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight / 4));
  91. } else {
  92. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  93. }
  94. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  95. }
  96. ClGeneratorFloat::ClGeneratorFloat(cl::Device device) :
  97. ClGenerator{ device, mnd::getPrecision<float>() }
  98. {
  99. context = Context{ device };
  100. Program::Sources sources;
  101. std::string kcode = this->getKernelCode(false);
  102. sources.push_back({ kcode.c_str(), kcode.length() });
  103. program = Program{ context, sources };
  104. if (program.build({ device }) != CL_SUCCESS) {
  105. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  106. }
  107. queue = CommandQueue(context, device);
  108. }
  109. std::string ClGeneratorFloat::getKernelCode(bool smooth) const
  110. {
  111. return mnd::getFloat_cl();
  112. /*
  113. // "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
  114. "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max, int smooth) {"
  115. " int index = get_global_id(0) * 4;\n"
  116. " int x = index % width;"
  117. " int y = index / width;"
  118. " float4 a = (float4) (x * pixelScaleX + xl, (x + 1) * pixelScaleX + xl, (x + 2) * pixelScaleX + xl, (x + 3) * pixelScaleX + xl);"
  119. " float4 b = (float4) (y * pixelScaleY + yt);"
  120. " float4 ca = a;"
  121. " float4 cb = b;"
  122. " float4 resa = a;"
  123. " float4 resb = b;"
  124. " int4 count = (int4)(0);"
  125. ""
  126. " int n = 0;"
  127. " while (n < max) {"
  128. // " float aa = a * a;"
  129. // " float bb = b * b;"
  130. " float4 ab = a * b;\n"
  131. " float4 cmpVal = fma(a, a, b * b);\n"
  132. " int4 cmp = isless(cmpVal, (float4)(16.0f));\n"
  133. " if (!any(cmp)) break;\n"
  134. " a = fma(a, a, -fma(b, b, -ca));\n"
  135. " b = fma(2, ab, cb);\n"
  136. " if (smooth) {\n"
  137. " resa = as_float4(as_int4(a) & cmp | (as_int4(resa) & ~cmp));"
  138. " resb = as_float4(as_int4(b) & cmp | (as_int4(resb) & ~cmp));"
  139. " }\n"
  140. " count += cmp & (int4)(1);\n"
  141. " n++;"
  142. " }\n"
  143. " for (int i = 0; i < 4 && i + x < width; i++) {"
  144. " if (smooth != 0)\n"
  145. " A[index + i] = ((float) count[i]) + 1 - log(log(fma(resa[i], resa[i], resb[i] * resb[i])) / 2) / log(2.0f);\n"
  146. " else\n"
  147. " A[index + i] = ((float) count[i]);\n"
  148. " }"
  149. "}";*/
  150. /*
  151. // "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
  152. "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max, int smooth) {"
  153. " int index = get_global_id(0);\n"
  154. " int x = index % width;"
  155. " int y = index / width;"
  156. " float a = x * pixelScaleX + xl;"
  157. " float b = y * pixelScaleY + yt;"
  158. " float ca = a;"
  159. " float cb = b;"
  160. ""
  161. " int n = 0;"
  162. " while (n < max - 1) {"
  163. // " float aa = a * a;"
  164. // " float bb = b * b;"
  165. " float ab = a * b;"
  166. " if (fma(a, a, b * b) > 16) break;"
  167. " a = fma(a, a, -fma(b, b, -ca));"
  168. " b = fma(2, ab, cb);"
  169. " n++;"
  170. " }\n"
  171. " if (n >= max - 1)\n"
  172. " A[index] = max;\n"
  173. " else {"
  174. " if (smooth != 0)\n"
  175. " A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
  176. " else\n"
  177. " A[index] = ((float)n);\n"
  178. " }"
  179. "}";
  180. */
  181. }
  182. ClGeneratorDoubleFloat::ClGeneratorDoubleFloat(cl::Device device) :
  183. ClGenerator{ device, mnd::getPrecision(mnd::Precision::DOUBLE_FLOAT) }
  184. {
  185. context = Context{ device };
  186. Program::Sources sources;
  187. std::string kcode = this->getKernelCode(false);
  188. sources.push_back({ kcode.c_str(), kcode.length() });
  189. program = Program{ context, sources };
  190. if (program.build({ device }) != CL_SUCCESS) {
  191. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  192. }
  193. queue = CommandQueue(context, device);
  194. }
  195. std::pair<float, float> twoSum(float a, float b) {
  196. float s = a + b;
  197. float v = s - a;
  198. float r = (a - (s - v)) + (b - v);
  199. return { s, r };
  200. }
  201. std::pair<float, float> split(float a) {
  202. float c = (4096 + 1) * a;
  203. float abig = c - a;
  204. float ahi = c - abig;
  205. float alo = a - ahi;
  206. return { ahi, alo };
  207. }
  208. std::pair<float, float> twoProd(float a, float b) {
  209. float x = a * b;
  210. auto aex = split(a);
  211. auto bex = split(b);
  212. float errx = x - (aex.first * bex.first);
  213. float erry = errx - (aex.second * bex.first);
  214. float errz = erry - (aex.first * bex.second);
  215. float y = (aex.second * bex.second) - errz;
  216. return { x, y };
  217. }
  218. std::pair<float, float> add(std::pair<float, float> a, std::pair<float, float> b) {
  219. float r = a.first + b.first;
  220. float s;
  221. if (fabs(a.first) >= fabs(b.first)) {
  222. s = (((a.first - r) + b.first) + b.second) + a.second;
  223. }
  224. else {
  225. s = (((b.first - r) + a.first) + a.second) + b.second;
  226. }
  227. return twoSum(r, s);
  228. }
  229. std::pair<float, float> mul(std::pair<float, float> a, std::pair<float, float> b) {
  230. auto t = twoProd(a.first, b.first);
  231. t.second += ((a.first * b.second) + (a.second * b.first));
  232. return twoSum(t.first, t.second);
  233. }
  234. std::pair<float, float> mulFloat(std::pair<float, float> a, float b) {
  235. std::pair<float, float> t = twoProd(a.first, b);
  236. float t3 = (a.second * b) + t.second;
  237. return twoSum(t.first, t.second);
  238. }
  239. void ClGeneratorDoubleFloat::generate(const mnd::MandelInfo& info, float* data)
  240. {
  241. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  242. auto splitDouble = [] (double x) {
  243. /*uint64_t xl = *((uint64_t*)&x);
  244. uint64_t mantissa = xl & 0x000FFFFFFFFFFFFFULL;
  245. uint64_t exp = (xl & 0x7FF0000000000000ULL) >> 53;
  246. bool sign = (xl & 0x1000000000000000ULL) != 0;
  247. uint32_t floathi = exp << 23;*/
  248. float hi = float(x);
  249. float lo = float(x - double(hi));
  250. if (abs(lo) >= 1.0e-10f) {
  251. //printf("hi: %.10ef, lo: %.10ef\n", hi, lo);
  252. //fflush(stdout);
  253. }
  254. return std::pair{ hi, lo };
  255. };
  256. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  257. double pixelScX = double(info.view.width / info.bWidth);
  258. double pixelScY = double(info.view.height / info.bHeight);
  259. auto[x1, x2] = splitDouble(double(info.view.x));
  260. auto[y1, y2] = splitDouble(double(info.view.y));
  261. auto[w1, w2] = splitDouble(pixelScX);
  262. auto[h1, h2] = splitDouble(pixelScY);
  263. /*
  264. for (int px = 0; px < info.bWidth; px++) {
  265. for (int py = 0; py < info.bHeight; py++) {
  266. std::pair<float, float> xl = { x1, x2 };
  267. std::pair<float, float> yt = { y1, y2 };
  268. std::pair<float, float> pixelScaleX = { w1, w2 };
  269. std::pair<float, float> pixelScaleY = { h1, h2 };
  270. std::pair<float, float> a = add(mulFloat(pixelScaleX, (float) px), xl); // pixelScaleX * px + xl
  271. std::pair<float, float> b = add(mulFloat(pixelScaleY, (float) py), yt); // pixelScaleY * py + yt
  272. std::pair<float, float> ca = a;
  273. std::pair<float, float> cb = b;
  274. int n = 0;
  275. while (n < info.maxIter - 1) {
  276. std::pair<float, float> aa = mul(a, a);
  277. std::pair<float, float> bb = mul(b, b);
  278. std::pair<float, float> ab = mul(a, b);
  279. if (aa.first + bb.first > 16) break;
  280. std::pair<float, float> minusbb = { -bb.first, -bb.second };
  281. a = add(add(aa, minusbb), ca);
  282. b = add(add(ab, ab), cb);
  283. n++;
  284. }
  285. // N + 1 - log (log |Z(N)|) / log 2
  286. if (n >= info.maxIter - 1)
  287. data[px + py * info.bWidth] = info.maxIter;
  288. else {
  289. if (info.smooth)
  290. data[px + py * info.bWidth] = ((float) n) + 1 - log(log(a.first * a.first + b.first * b.first ) / 2) / log(2.0f);
  291. else
  292. data[px + py * info.bWidth] = ((float)n);
  293. }
  294. }
  295. }
  296. return;
  297. */
  298. Kernel iterate = Kernel(program, "iterate");
  299. iterate.setArg(0, buffer_A);
  300. iterate.setArg(1, int(info.bWidth));
  301. iterate.setArg(2, x1);
  302. iterate.setArg(3, x2);
  303. iterate.setArg(4, y1);
  304. iterate.setArg(5, y2);
  305. iterate.setArg(6, w1);
  306. iterate.setArg(7, w2);
  307. iterate.setArg(8, h1);
  308. iterate.setArg(9, h2);
  309. iterate.setArg(10, int(info.maxIter));
  310. iterate.setArg(11, int(info.smooth ? 1 : 0));
  311. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  312. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  313. }
  314. std::string ClGeneratorDoubleFloat::getKernelCode(bool smooth) const
  315. {
  316. return getDoubleFloat_cl();
  317. }
  318. ClGeneratorDouble::ClGeneratorDouble(cl::Device device) :
  319. ClGenerator{ device, mnd::getPrecision<double>() }
  320. {
  321. context = Context{ device };
  322. Program::Sources sources;
  323. std::string kcode = this->getKernelCode(false);
  324. sources.push_back({ kcode.c_str(), kcode.length() });
  325. program = Program{ context, sources };
  326. if (program.build({ device }) != CL_SUCCESS) {
  327. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  328. }
  329. queue = CommandQueue(context, device);
  330. }
  331. void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
  332. {
  333. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  334. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  335. double pixelScaleX = double(info.view.width / info.bWidth);
  336. double pixelScaleY = double(info.view.height / info.bHeight);
  337. Kernel iterate = Kernel(program, "iterate");
  338. iterate.setArg(0, buffer_A);
  339. iterate.setArg(1, int(info.bWidth));
  340. iterate.setArg(2, double(info.view.x));
  341. iterate.setArg(3, double(info.view.y));
  342. iterate.setArg(4, double(pixelScaleX));
  343. iterate.setArg(5, double(pixelScaleY));
  344. iterate.setArg(6, int(info.maxIter));
  345. iterate.setArg(7, int(info.smooth ? 1 : 0));
  346. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  347. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  348. }
  349. std::string ClGeneratorDouble::getKernelCode(bool smooth) const
  350. {
  351. return
  352. "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
  353. "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max, int smooth) {\n"
  354. " int index = get_global_id(0);\n"
  355. " int x = index % width;"
  356. " int y = index / width;"
  357. " double a = x * pixelScaleX + xl;"
  358. " double b = y * pixelScaleY + yt;"
  359. " double ca = a;"
  360. " double cb = b;"
  361. ""
  362. " int n = 0;"
  363. " while (n < max - 1) {"
  364. " double aa = a * a;"
  365. " double bb = b * b;"
  366. " double ab = a * b;"
  367. " if (aa + bb > 16) break;"
  368. " a = aa - bb + ca;"
  369. " b = ab + ab + cb;"
  370. " n++;"
  371. " }\n"
  372. // N + 1 - log (log |Z(N)|) / log 2
  373. " if (n >= max - 1)\n"
  374. " A[index] = max;\n"
  375. " else {"
  376. " if (smooth != 0)\n"
  377. " A[index] = ((float)n) + 1 - log(log((float)(a * a + b * b)) / 2) / log(2.0f);\n"
  378. " else\n"
  379. " A[index] = ((float)n);\n"
  380. " }"
  381. "}";
  382. }
  383. ClGeneratorDoubleDouble::ClGeneratorDoubleDouble(cl::Device device) :
  384. ClGenerator{ device, mnd::getPrecision<DoubleDouble>() }
  385. {
  386. context = Context{ device };
  387. Program::Sources sources;
  388. std::string kcode = this->getKernelCode(false);
  389. sources.push_back({ kcode.c_str(), kcode.length() });
  390. program = Program{ context, sources };
  391. if (program.build({ device }) != CL_SUCCESS) {
  392. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  393. }
  394. queue = CommandQueue(context, device);
  395. }
  396. void ClGeneratorDoubleDouble::generate(const mnd::MandelInfo& info, float* data)
  397. {
  398. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  399. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  400. mnd::DoubleDouble x = mnd::convert<mnd::DoubleDouble>(info.view.x);
  401. mnd::DoubleDouble y = mnd::convert<mnd::DoubleDouble>(info.view.y);
  402. mnd::DoubleDouble psx = mnd::convert<mnd::DoubleDouble>(info.view.width / info.bWidth);
  403. mnd::DoubleDouble psy = mnd::convert<mnd::DoubleDouble>(info.view.height / info.bHeight);
  404. Kernel iterate = Kernel(program, "iterate");
  405. iterate.setArg(0, buffer_A);
  406. iterate.setArg(1, int(info.bWidth));
  407. iterate.setArg(2, x.x[0]);
  408. iterate.setArg(3, x.x[1]);
  409. iterate.setArg(4, y.x[0]);
  410. iterate.setArg(5, y.x[1]);
  411. iterate.setArg(6, psx.x[0]);
  412. iterate.setArg(7, psx.x[1]);
  413. iterate.setArg(8, psy.x[0]);
  414. iterate.setArg(9, psy.x[1]);
  415. iterate.setArg(10, int(info.maxIter));
  416. iterate.setArg(11, int(info.smooth ? 1 : 0));
  417. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  418. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  419. }
  420. std::string ClGeneratorDoubleDouble::getKernelCode(bool smooth) const
  421. {
  422. return (char*) doubledouble_cl;
  423. }
  424. ClGeneratorQuadDouble::ClGeneratorQuadDouble(cl::Device device) :
  425. ClGenerator{ device, mnd::getPrecision<QuadDouble>() }
  426. {
  427. context = Context{ device };
  428. Program::Sources sources;
  429. std::string kcode = this->getKernelCode(false);
  430. sources.push_back({ kcode.c_str(), kcode.length() });
  431. program = Program{ context, sources };
  432. if (program.build({ device }) != CL_SUCCESS) {
  433. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  434. }
  435. queue = CommandQueue(context, device);
  436. }
  437. void ClGeneratorQuadDouble::generate(const mnd::MandelInfo& info, float* data)
  438. {
  439. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  440. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  441. mnd::DoubleDouble x = mnd::convert<mnd::DoubleDouble>(info.view.x);
  442. mnd::DoubleDouble y = mnd::convert<mnd::DoubleDouble>(info.view.y);
  443. mnd::DoubleDouble psx = mnd::convert<mnd::DoubleDouble>(info.view.width / info.bWidth);
  444. mnd::DoubleDouble psy = mnd::convert<mnd::DoubleDouble>(info.view.height / info.bHeight);
  445. Kernel iterate = Kernel(program, "iterate");
  446. iterate.setArg(0, buffer_A);
  447. iterate.setArg(1, int(info.bWidth));
  448. iterate.setArg(2, x.x[0]);
  449. iterate.setArg(3, x.x[1]);
  450. iterate.setArg(4, y.x[0]);
  451. iterate.setArg(5, y.x[1]);
  452. iterate.setArg(6, psx.x[0]);
  453. iterate.setArg(7, psx.x[1]);
  454. iterate.setArg(8, psy.x[0]);
  455. iterate.setArg(9, psy.x[1]);
  456. iterate.setArg(10, int(info.maxIter));
  457. iterate.setArg(11, int(info.smooth ? 1 : 0));
  458. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  459. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  460. }
  461. std::string ClGeneratorQuadDouble::getKernelCode(bool smooth) const
  462. {
  463. return (char*) doubledouble_cl;
  464. }
  465. ClGenerator128::ClGenerator128(cl::Device device) :
  466. ClGenerator{ device, mnd::getPrecision<Fixed128>() }
  467. {
  468. context = Context{ device };
  469. Program::Sources sources;
  470. std::string kcode = this->getKernelCode(false);
  471. sources.push_back({ kcode.c_str(), kcode.length() });
  472. program = Program{ context, sources };
  473. if (program.build({ device }) != CL_SUCCESS) {
  474. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  475. }
  476. queue = CommandQueue(context, device);
  477. }
  478. void ClGenerator128::generate(const mnd::MandelInfo& info, float* data)
  479. {
  480. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  481. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  482. float pixelScaleX = float(info.view.width / info.bWidth);
  483. float pixelScaleY = float(info.view.height / info.bHeight);
  484. using ull = unsigned long long;
  485. ull x1 = ull(double(info.view.x) * 0x100000000ULL);
  486. ull x2 = 0;
  487. ull y1 = ull(double(info.view.y) * 0x100000000ULL);
  488. ull y2 = 0;
  489. ull w1 = ull(double(pixelScaleX) * 0x100000000ULL);
  490. ull w2 = 0;
  491. ull h1 = ull(double(pixelScaleY) * 0x100000000ULL);
  492. ull h2 = 0;
  493. Kernel iterate = Kernel(program, "iterate");
  494. iterate.setArg(0, buffer_A);
  495. iterate.setArg(1, int(info.bWidth));
  496. iterate.setArg(2, ull(x1));
  497. iterate.setArg(3, ull(x2));
  498. iterate.setArg(4, ull(y1));
  499. iterate.setArg(5, ull(y2));
  500. iterate.setArg(6, ull(w1));
  501. iterate.setArg(7, ull(w2));
  502. iterate.setArg(8, ull(h1));
  503. iterate.setArg(9, ull(h2));
  504. iterate.setArg(10, int(info.maxIter));
  505. iterate.setArg(11, int(info.smooth ? 1 : 0));
  506. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  507. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  508. }
  509. std::string ClGenerator128::getKernelCode(bool smooth) const
  510. {
  511. /*//fprintf(stderr, "starting file read\n");
  512. std::ifstream t("mandel128.cl");
  513. std::string str((std::istreambuf_iterator<char>(t)),
  514. std::istreambuf_iterator<char>());
  515. //fprintf(stderr, "%s\n", str);
  516. return str;*/
  517. return getFixed512_cl();
  518. }
  519. ClGenerator64::ClGenerator64(cl::Device device) :
  520. ClGenerator{ device, mnd::getPrecision<Fixed64>() }
  521. {
  522. context = Context{ device };
  523. Program::Sources sources;
  524. std::string kcode = this->getKernelCode(false);
  525. sources.push_back({ kcode.c_str(), kcode.length() });
  526. program = Program{ context, sources };
  527. if (program.build({ device }) != CL_SUCCESS) {
  528. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  529. }
  530. queue = CommandQueue(context, device);
  531. }
  532. #include "CpuGenerators.h"
  533. void ClGenerator64::generate(const mnd::MandelInfo& info, float* data)
  534. {
  535. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  536. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  537. float pixelScaleX = float(info.view.width / info.bWidth);
  538. float pixelScaleY = float(info.view.height / info.bHeight);
  539. using ull = unsigned long long;
  540. ull x = ull(::round(double(info.view.x) * (1LL << 48)));
  541. ull y = ull(::round(double(info.view.y) * (1LL << 48)));
  542. ull w = ull(::round(double(pixelScaleX) * (1LL << 48)));
  543. ull h = ull(::round(double(pixelScaleY) * (1LL << 48)));
  544. //x = 0;
  545. //y = 0;
  546. Kernel iterate = Kernel(program, "iterate");
  547. iterate.setArg(0, buffer_A);
  548. iterate.setArg(1, int(info.bWidth));
  549. iterate.setArg(2, ull(x));
  550. iterate.setArg(3, ull(y));
  551. iterate.setArg(4, ull(w));
  552. iterate.setArg(5, ull(h));
  553. iterate.setArg(6, int(info.maxIter));
  554. iterate.setArg(7, int(info.smooth ? 1 : 0));
  555. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  556. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  557. //CpuGenerator<Fixed64> fx;
  558. //fx.generate(info, data);
  559. }
  560. std::string ClGenerator64::getKernelCode(bool smooth) const
  561. {
  562. /*//fprintf(stderr, "starting file read\n");
  563. std::ifstream t("mandel128.cl");
  564. std::string str((std::istreambuf_iterator<char>(t)),
  565. std::istreambuf_iterator<char>());
  566. //fprintf(stderr, "%s\n", str);
  567. return str;*/
  568. return getFixed64_cl();
  569. }
  570. #endif // WITH_OPENCL