ClGenerators.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. #include "ClGenerators.h"
  2. #include "OpenClCode.h"
  3. #ifdef WITH_OPENCL
  4. #include <iostream>
  5. #include <iterator>
  6. #include <utility>
  7. using namespace cl;
  8. using mnd::ClGenerator;
  9. using mnd::ClGeneratorFloat;
  10. using mnd::ClGeneratorDoubleFloat;
  11. using mnd::ClGeneratorDouble;
  12. using mnd::ClGeneratorDoubleDouble;
  13. using mnd::ClGeneratorQuadDouble;
  14. using mnd::ClGenerator128;
  15. using mnd::ClGenerator64;
  16. Platform getPlatform() {
  17. /* Returns the first platform found. */
  18. std::vector<Platform> all_platforms;
  19. Platform::get(&all_platforms);
  20. if (all_platforms.size() == 0) {
  21. std::cout << "No platforms found. Check OpenCL installation!\n";
  22. exit(1);
  23. }
  24. for (auto& p : all_platforms) {
  25. std::string name = p.getInfo<CL_PLATFORM_NAME>();
  26. std::string profile = p.getInfo<CL_PLATFORM_PROFILE>();
  27. printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
  28. }
  29. return all_platforms[0];
  30. }
  31. Device getDevice(Platform& platform, int i, bool display = false) {
  32. /* Returns the deviced specified by the index i on platform.
  33. * If display is true, then all of the platforms are listed.
  34. */
  35. std::vector<Device> all_devices;
  36. platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
  37. if (all_devices.size() == 0) {
  38. std::cout << "No devices found. Check OpenCL installation!\n";
  39. exit(1);
  40. }
  41. if (display) {
  42. for (::size_t j = 0; j < all_devices.size(); j++) {
  43. printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
  44. printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  45. printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
  46. }
  47. }
  48. return all_devices[i];
  49. }
  50. ClGenerator::ClGenerator(cl::Device device, const mnd::Real& precision) :
  51. MandelGenerator{ precision },
  52. device{ device }
  53. {
  54. /*Platform p = getPlatform();
  55. device = getDevice(p, 0, true);
  56. context = Context{ device };
  57. Program::Sources sources;
  58. std::string kcode = this->getKernelCode();
  59. sources.push_back({ kcode.c_str(), kcode.length() });
  60. program = Program{ context, sources };
  61. if (program.build({ device }) != CL_SUCCESS) {
  62. std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
  63. exit(1);
  64. }
  65. queue = CommandQueue(context, device);*/
  66. }
  67. ClGenerator::~ClGenerator(void)
  68. {
  69. queue.flush();
  70. queue.finish();
  71. }
  72. void ClGenerator::generate(const mnd::MandelInfo& info, float* data)
  73. {
  74. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  75. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  76. float pixelScaleX = float(info.view.width / info.bWidth);
  77. float pixelScaleY = float(info.view.height / info.bHeight);
  78. bool useVec = device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() >= 4;
  79. Kernel iterate = Kernel(program, useVec ? "iterate_vec4" : "iterate");
  80. iterate.setArg(0, buffer_A);
  81. iterate.setArg(1, int(info.bWidth));
  82. iterate.setArg(2, float(info.view.x));
  83. iterate.setArg(3, float(info.view.y));
  84. iterate.setArg(4, float(pixelScaleX));
  85. iterate.setArg(5, float(pixelScaleY));
  86. iterate.setArg(6, int(info.maxIter));
  87. iterate.setArg(7, int(info.smooth ? 1 : 0));
  88. iterate.setArg(8, int(info.julia ? 1 : 0));
  89. iterate.setArg(9, float(info.juliaX));
  90. iterate.setArg(10, float(info.juliaY));
  91. if (useVec) {
  92. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight / 4));
  93. } else {
  94. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  95. }
  96. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  97. }
  98. ClGeneratorFloat::ClGeneratorFloat(cl::Device device, const std::string& code) :
  99. ClGenerator{ device, mnd::getPrecision<float>() }
  100. {
  101. context = Context{ device };
  102. Program::Sources sources;
  103. sources.push_back({ code.c_str(), code.length() });
  104. program = Program{ context, sources };
  105. if (program.build({ device }) != CL_SUCCESS) {
  106. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  107. }
  108. queue = CommandQueue(context, device);
  109. }
  110. std::string ClGeneratorFloat::getKernelCode(bool smooth) const
  111. {
  112. return mnd::getFloat_cl();
  113. }
  114. ClGeneratorDoubleFloat::ClGeneratorDoubleFloat(cl::Device device) :
  115. ClGenerator{ device, mnd::getPrecision(mnd::Precision::DOUBLE_FLOAT) }
  116. {
  117. context = Context{ device };
  118. Program::Sources sources;
  119. std::string kcode = this->getKernelCode(false);
  120. sources.push_back({ kcode.c_str(), kcode.length() });
  121. program = Program{ context, sources };
  122. if (program.build({ device }) != CL_SUCCESS) {
  123. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  124. }
  125. queue = CommandQueue(context, device);
  126. }
  127. std::pair<float, float> twoSum(float a, float b) {
  128. float s = a + b;
  129. float v = s - a;
  130. float r = (a - (s - v)) + (b - v);
  131. return { s, r };
  132. }
  133. std::pair<float, float> split(float a) {
  134. float c = (4096 + 1) * a;
  135. float abig = c - a;
  136. float ahi = c - abig;
  137. float alo = a - ahi;
  138. return { ahi, alo };
  139. }
  140. std::pair<float, float> twoProd(float a, float b) {
  141. float x = a * b;
  142. auto aex = split(a);
  143. auto bex = split(b);
  144. float errx = x - (aex.first * bex.first);
  145. float erry = errx - (aex.second * bex.first);
  146. float errz = erry - (aex.first * bex.second);
  147. float y = (aex.second * bex.second) - errz;
  148. return { x, y };
  149. }
  150. std::pair<float, float> add(std::pair<float, float> a, std::pair<float, float> b) {
  151. float r = a.first + b.first;
  152. float s;
  153. if (fabs(a.first) >= fabs(b.first)) {
  154. s = (((a.first - r) + b.first) + b.second) + a.second;
  155. }
  156. else {
  157. s = (((b.first - r) + a.first) + a.second) + b.second;
  158. }
  159. return twoSum(r, s);
  160. }
  161. std::pair<float, float> mul(std::pair<float, float> a, std::pair<float, float> b) {
  162. auto t = twoProd(a.first, b.first);
  163. t.second += ((a.first * b.second) + (a.second * b.first));
  164. return twoSum(t.first, t.second);
  165. }
  166. std::pair<float, float> mulFloat(std::pair<float, float> a, float b) {
  167. std::pair<float, float> t = twoProd(a.first, b);
  168. float t3 = (a.second * b) + t.second;
  169. return twoSum(t.first, t.second);
  170. }
  171. void ClGeneratorDoubleFloat::generate(const mnd::MandelInfo& info, float* data)
  172. {
  173. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  174. auto splitDouble = [] (double x) {
  175. /*uint64_t xl = *((uint64_t*)&x);
  176. uint64_t mantissa = xl & 0x000FFFFFFFFFFFFFULL;
  177. uint64_t exp = (xl & 0x7FF0000000000000ULL) >> 53;
  178. bool sign = (xl & 0x1000000000000000ULL) != 0;
  179. uint32_t floathi = exp << 23;*/
  180. float hi = float(x);
  181. float lo = float(x - double(hi));
  182. if (abs(lo) >= 1.0e-10f) {
  183. //printf("hi: %.10ef, lo: %.10ef\n", hi, lo);
  184. //fflush(stdout);
  185. }
  186. return std::pair{ hi, lo };
  187. };
  188. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  189. double pixelScX = double(info.view.width / info.bWidth);
  190. double pixelScY = double(info.view.height / info.bHeight);
  191. auto[x1, x2] = splitDouble(double(info.view.x));
  192. auto[y1, y2] = splitDouble(double(info.view.y));
  193. auto[w1, w2] = splitDouble(pixelScX);
  194. auto[h1, h2] = splitDouble(pixelScY);
  195. /*
  196. for (int px = 0; px < info.bWidth; px++) {
  197. for (int py = 0; py < info.bHeight; py++) {
  198. std::pair<float, float> xl = { x1, x2 };
  199. std::pair<float, float> yt = { y1, y2 };
  200. std::pair<float, float> pixelScaleX = { w1, w2 };
  201. std::pair<float, float> pixelScaleY = { h1, h2 };
  202. std::pair<float, float> a = add(mulFloat(pixelScaleX, (float) px), xl); // pixelScaleX * px + xl
  203. std::pair<float, float> b = add(mulFloat(pixelScaleY, (float) py), yt); // pixelScaleY * py + yt
  204. std::pair<float, float> ca = a;
  205. std::pair<float, float> cb = b;
  206. int n = 0;
  207. while (n < info.maxIter - 1) {
  208. std::pair<float, float> aa = mul(a, a);
  209. std::pair<float, float> bb = mul(b, b);
  210. std::pair<float, float> ab = mul(a, b);
  211. if (aa.first + bb.first > 16) break;
  212. std::pair<float, float> minusbb = { -bb.first, -bb.second };
  213. a = add(add(aa, minusbb), ca);
  214. b = add(add(ab, ab), cb);
  215. n++;
  216. }
  217. // N + 1 - log (log |Z(N)|) / log 2
  218. if (n >= info.maxIter - 1)
  219. data[px + py * info.bWidth] = info.maxIter;
  220. else {
  221. if (info.smooth)
  222. data[px + py * info.bWidth] = ((float) n) + 1 - log(log(a.first * a.first + b.first * b.first ) / 2) / log(2.0f);
  223. else
  224. data[px + py * info.bWidth] = ((float)n);
  225. }
  226. }
  227. }
  228. return;
  229. */
  230. Kernel iterate = Kernel(program, "iterate");
  231. iterate.setArg(0, buffer_A);
  232. iterate.setArg(1, int(info.bWidth));
  233. iterate.setArg(2, x1);
  234. iterate.setArg(3, x2);
  235. iterate.setArg(4, y1);
  236. iterate.setArg(5, y2);
  237. iterate.setArg(6, w1);
  238. iterate.setArg(7, w2);
  239. iterate.setArg(8, h1);
  240. iterate.setArg(9, h2);
  241. iterate.setArg(10, int(info.maxIter));
  242. iterate.setArg(11, int(info.smooth ? 1 : 0));
  243. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  244. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  245. }
  246. std::string ClGeneratorDoubleFloat::getKernelCode(bool smooth) const
  247. {
  248. return getDoubleFloat_cl();
  249. }
  250. ClGeneratorDouble::ClGeneratorDouble(cl::Device device) :
  251. ClGenerator{ device, mnd::getPrecision<double>() }
  252. {
  253. context = Context{ device };
  254. Program::Sources sources;
  255. std::string kcode = getDouble_cl();
  256. sources.push_back({ kcode.c_str(), kcode.length() });
  257. program = Program{ context, sources };
  258. if (program.build({ device }) != CL_SUCCESS) {
  259. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  260. }
  261. queue = CommandQueue(context, device);
  262. }
  263. void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
  264. {
  265. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  266. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  267. double pixelScaleX = double(info.view.width / info.bWidth);
  268. double pixelScaleY = double(info.view.height / info.bHeight);
  269. Kernel iterate = Kernel(program, "iterate");
  270. iterate.setArg(0, buffer_A);
  271. iterate.setArg(1, int(info.bWidth));
  272. iterate.setArg(2, double(info.view.x));
  273. iterate.setArg(3, double(info.view.y));
  274. iterate.setArg(4, double(pixelScaleX));
  275. iterate.setArg(5, double(pixelScaleY));
  276. iterate.setArg(6, int(info.maxIter));
  277. iterate.setArg(7, int(info.smooth ? 1 : 0));
  278. iterate.setArg(8, int(info.julia ? 1 : 0));
  279. iterate.setArg(9, double(info.juliaX));
  280. iterate.setArg(10, double(info.juliaY));
  281. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  282. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  283. }
  284. std::string ClGeneratorDouble::getKernelCode(bool smooth) const
  285. {
  286. return
  287. "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
  288. "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max, int smooth) {\n"
  289. " int index = get_global_id(0);\n"
  290. " int x = index % width;"
  291. " int y = index / width;"
  292. " double a = x * pixelScaleX + xl;"
  293. " double b = y * pixelScaleY + yt;"
  294. " double ca = a;"
  295. " double cb = b;"
  296. ""
  297. " int n = 0;"
  298. " while (n < max - 1) {"
  299. " double aa = a * a;"
  300. " double bb = b * b;"
  301. " double ab = a * b;"
  302. " if (aa + bb > 16) break;"
  303. " a = aa - bb + ca;"
  304. " b = ab + ab + cb;"
  305. " n++;"
  306. " }\n"
  307. // N + 1 - log (log |Z(N)|) / log 2
  308. " if (n >= max - 1)\n"
  309. " A[index] = max;\n"
  310. " else {"
  311. " if (smooth != 0)\n"
  312. " A[index] = ((float)n) + 1 - log(log((float)(a * a + b * b)) / 2) / log(2.0f);\n"
  313. " else\n"
  314. " A[index] = ((float)n);\n"
  315. " }"
  316. "}";
  317. }
  318. ClGeneratorDoubleDouble::ClGeneratorDoubleDouble(cl::Device device) :
  319. ClGenerator{ device, mnd::getPrecision<DoubleDouble>() }
  320. {
  321. context = Context{ device };
  322. Program::Sources sources;
  323. std::string kcode = this->getKernelCode(false);
  324. sources.push_back({ kcode.c_str(), kcode.length() });
  325. program = Program{ context, sources };
  326. if (program.build({ device }) != CL_SUCCESS) {
  327. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  328. }
  329. queue = CommandQueue(context, device);
  330. }
  331. void ClGeneratorDoubleDouble::generate(const mnd::MandelInfo& info, float* data)
  332. {
  333. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  334. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  335. mnd::DoubleDouble x = mnd::convert<mnd::DoubleDouble>(info.view.x);
  336. mnd::DoubleDouble y = mnd::convert<mnd::DoubleDouble>(info.view.y);
  337. mnd::DoubleDouble psx = mnd::convert<mnd::DoubleDouble>(info.view.width / info.bWidth);
  338. mnd::DoubleDouble psy = mnd::convert<mnd::DoubleDouble>(info.view.height / info.bHeight);
  339. Kernel iterate = Kernel(program, "iterate");
  340. iterate.setArg(0, buffer_A);
  341. iterate.setArg(1, int(info.bWidth));
  342. iterate.setArg(2, x.x[0]);
  343. iterate.setArg(3, x.x[1]);
  344. iterate.setArg(4, y.x[0]);
  345. iterate.setArg(5, y.x[1]);
  346. iterate.setArg(6, psx.x[0]);
  347. iterate.setArg(7, psx.x[1]);
  348. iterate.setArg(8, psy.x[0]);
  349. iterate.setArg(9, psy.x[1]);
  350. iterate.setArg(10, int(info.maxIter));
  351. iterate.setArg(11, int(info.smooth ? 1 : 0));
  352. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  353. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  354. }
  355. std::string ClGeneratorDoubleDouble::getKernelCode(bool smooth) const
  356. {
  357. return getDoubleDouble_cl();
  358. }
  359. ClGeneratorQuadDouble::ClGeneratorQuadDouble(cl::Device device) :
  360. ClGenerator{ device, mnd::getPrecision<QuadDouble>() }
  361. {
  362. context = Context{ device };
  363. Program::Sources sources;
  364. std::string kcode = this->getKernelCode(false);
  365. sources.push_back({ kcode.c_str(), kcode.length() });
  366. program = Program{ context, sources };
  367. if (program.build({ device }) != CL_SUCCESS) {
  368. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  369. }
  370. queue = CommandQueue(context, device);
  371. }
  372. void ClGeneratorQuadDouble::generate(const mnd::MandelInfo& info, float* data)
  373. {
  374. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  375. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  376. mnd::QuadDouble x = mnd::convert<mnd::QuadDouble>(info.view.x);
  377. mnd::QuadDouble y = mnd::convert<mnd::QuadDouble>(info.view.y);
  378. mnd::QuadDouble psx = mnd::convert<mnd::QuadDouble>(info.view.width / info.bWidth);
  379. mnd::QuadDouble psy = mnd::convert<mnd::QuadDouble>(info.view.height / info.bHeight);
  380. Kernel iterate = Kernel(program, "iterate");
  381. iterate.setArg(0, buffer_A);
  382. iterate.setArg(1, int(info.bWidth));
  383. iterate.setArg(2, x.x[0]);
  384. iterate.setArg(3, x.x[1]);
  385. iterate.setArg(4, x.x[2]);
  386. iterate.setArg(5, x.x[3]);
  387. iterate.setArg(6, y.x[0]);
  388. iterate.setArg(7, y.x[1]);
  389. iterate.setArg(8, y.x[2]);
  390. iterate.setArg(9, y.x[3]);
  391. iterate.setArg(10, psx.x[0]);
  392. iterate.setArg(11, psx.x[1]);
  393. iterate.setArg(12, psx.x[2]);
  394. iterate.setArg(13, psx.x[3]);
  395. iterate.setArg(14, psy.x[0]);
  396. iterate.setArg(15, psy.x[1]);
  397. iterate.setArg(16, psy.x[2]);
  398. iterate.setArg(17, psy.x[3]);
  399. iterate.setArg(18, int(info.maxIter));
  400. iterate.setArg(19, int(info.smooth ? 1 : 0));
  401. cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  402. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  403. }
  404. std::string ClGeneratorQuadDouble::getKernelCode(bool smooth) const
  405. {
  406. return getQuadDouble_cl();
  407. }
  408. ClGenerator128::ClGenerator128(cl::Device device) :
  409. ClGenerator{ device, mnd::getPrecision<Fixed128>() }
  410. {
  411. context = Context{ device };
  412. Program::Sources sources;
  413. std::string kcode = this->getKernelCode(false);
  414. sources.push_back({ kcode.c_str(), kcode.length() });
  415. program = Program{ context, sources };
  416. if (program.build({ device }) != CL_SUCCESS) {
  417. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  418. }
  419. queue = CommandQueue(context, device);
  420. }
  421. void ClGenerator128::generate(const mnd::MandelInfo& info, float* data)
  422. {
  423. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  424. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  425. float pixelScaleX = float(info.view.width / info.bWidth);
  426. float pixelScaleY = float(info.view.height / info.bHeight);
  427. using ull = unsigned long long;
  428. ull x1 = ull(double(info.view.x) * 0x100000000ULL);
  429. ull x2 = 0;
  430. ull y1 = ull(double(info.view.y) * 0x100000000ULL);
  431. ull y2 = 0;
  432. ull w1 = ull(double(pixelScaleX) * 0x100000000ULL);
  433. ull w2 = 0;
  434. ull h1 = ull(double(pixelScaleY) * 0x100000000ULL);
  435. ull h2 = 0;
  436. Kernel iterate = Kernel(program, "iterate");
  437. iterate.setArg(0, buffer_A);
  438. iterate.setArg(1, int(info.bWidth));
  439. iterate.setArg(2, ull(x1));
  440. iterate.setArg(3, ull(x2));
  441. iterate.setArg(4, ull(y1));
  442. iterate.setArg(5, ull(y2));
  443. iterate.setArg(6, ull(w1));
  444. iterate.setArg(7, ull(w2));
  445. iterate.setArg(8, ull(h1));
  446. iterate.setArg(9, ull(h2));
  447. iterate.setArg(10, int(info.maxIter));
  448. iterate.setArg(11, int(info.smooth ? 1 : 0));
  449. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  450. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  451. }
  452. std::string ClGenerator128::getKernelCode(bool smooth) const
  453. {
  454. /*//fprintf(stderr, "starting file read\n");
  455. std::ifstream t("mandel128.cl");
  456. std::string str((std::istreambuf_iterator<char>(t)),
  457. std::istreambuf_iterator<char>());
  458. //fprintf(stderr, "%s\n", str);
  459. return str;*/
  460. return getFixed512_cl();
  461. }
  462. ClGenerator64::ClGenerator64(cl::Device device) :
  463. ClGenerator{ device, mnd::getPrecision<Fixed64>() }
  464. {
  465. context = Context{ device };
  466. Program::Sources sources;
  467. std::string kcode = this->getKernelCode(false);
  468. sources.push_back({ kcode.c_str(), kcode.length() });
  469. program = Program{ context, sources };
  470. if (program.build({ device }) != CL_SUCCESS) {
  471. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
  472. }
  473. queue = CommandQueue(context, device);
  474. }
  475. #include "CpuGenerators.h"
  476. void ClGenerator64::generate(const mnd::MandelInfo& info, float* data)
  477. {
  478. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  479. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  480. float pixelScaleX = float(info.view.width / info.bWidth);
  481. float pixelScaleY = float(info.view.height / info.bHeight);
  482. using ull = unsigned long long;
  483. ull x = ull(::round(double(info.view.x) * (1LL << 48)));
  484. ull y = ull(::round(double(info.view.y) * (1LL << 48)));
  485. ull w = ull(::round(double(pixelScaleX) * (1LL << 48)));
  486. ull h = ull(::round(double(pixelScaleY) * (1LL << 48)));
  487. //x = 0;
  488. //y = 0;
  489. Kernel iterate = Kernel(program, "iterate");
  490. iterate.setArg(0, buffer_A);
  491. iterate.setArg(1, int(info.bWidth));
  492. iterate.setArg(2, ull(x));
  493. iterate.setArg(3, ull(y));
  494. iterate.setArg(4, ull(w));
  495. iterate.setArg(5, ull(h));
  496. iterate.setArg(6, int(info.maxIter));
  497. iterate.setArg(7, int(info.smooth ? 1 : 0));
  498. queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
  499. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  500. //CpuGenerator<Fixed64> fx;
  501. //fx.generate(info, data);
  502. }
  503. std::string ClGenerator64::getKernelCode(bool smooth) const
  504. {
  505. /*//fprintf(stderr, "starting file read\n");
  506. std::ifstream t("mandel128.cl");
  507. std::string str((std::istreambuf_iterator<char>(t)),
  508. std::istreambuf_iterator<char>());
  509. //fprintf(stderr, "%s\n", str);
  510. return str;*/
  511. return getFixed64_cl();
  512. }
  513. #endif // WITH_OPENCL