ClGenerators.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. #include "ClGenerators.h"
  2. #include "Mandel.h"
  3. #include "OpenClInternal.h"
  4. #include "OpenClCode.h"
  5. #ifdef WITH_OPENCL
  6. #include <iostream>
  7. #include <iterator>
  8. #include <utility>
  9. using namespace cl;
  10. using mnd::ClGenerator;
  11. using mnd::ClGeneratorFloat;
  12. using mnd::ClGeneratorDoubleFloat;
  13. using mnd::ClGeneratorDouble;
  14. using mnd::ClGeneratorDoubleDouble;
  15. using mnd::ClGeneratorQuadDouble;
  16. using mnd::ClGenerator128;
  17. using mnd::ClGenerator64;
  18. Platform getPlatform() {
  19. /* Returns the first platform found. */
  20. std::vector<Platform> all_platforms;
  21. Platform::get(&all_platforms);
  22. if (all_platforms.size() == 0) {
  23. std::cout << "No platforms found. Check OpenCL installation!\n";
  24. exit(1);
  25. }
  26. for (auto& p : all_platforms) {
  27. std::string name = p.getInfo<CL_PLATFORM_NAME>();
  28. std::string profile = p.getInfo<CL_PLATFORM_PROFILE>();
  29. printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
  30. }
  31. return all_platforms[0];
  32. }
  33. Device getDevice(Platform& platform, int i, bool display = false) {
  34. /* Returns the deviced specified by the index i on platform.
  35. * If display is true, then all of the platforms are listed.
  36. */
  37. std::vector<Device> all_devices;
  38. platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
  39. if (all_devices.size() == 0) {
  40. std::cout << "No devices found. Check OpenCL installation!\n";
  41. exit(1);
  42. }
  43. if (display) {
  44. for (::size_t j = 0; j < all_devices.size(); j++) {
  45. printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
  46. printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  47. printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
  48. }
  49. }
  50. return all_devices[i];
  51. }
  52. ClGenerator::ClGenerator(mnd::MandelDevice& device, const std::string& source, mnd::Precision type) :
  53. MandelGenerator{ type },
  54. device{ device },
  55. context{ device.getClDevice().context }
  56. {
  57. const cl::Device& dev = device.getClDevice().device;
  58. Program::Sources sources;
  59. sources.push_back({ source.c_str(), source.length() });
  60. program = Program{ context, sources };
  61. if (program.build({ dev }) != CL_SUCCESS) {
  62. printf("code -> %s\n", source.c_str());
  63. throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev));
  64. }
  65. queue = CommandQueue(context, dev);
  66. /*Platform p = getPlatform();
  67. device = getDevice(p, 0, true);
  68. context = Context{ device };
  69. Program::Sources sources;
  70. std::string kcode = this->getKernelCode();
  71. sources.push_back({ kcode.c_str(), kcode.length() });
  72. program = Program{ context, sources };
  73. if (program.build({ device }) != CL_SUCCESS) {
  74. std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
  75. exit(1);
  76. }
  77. queue = CommandQueue(context, device);*/
  78. }
  79. ClGenerator::~ClGenerator(void)
  80. {
  81. queue.flush();
  82. queue.finish();
  83. }
  84. mnd::MandelDevice* ClGenerator::getDevice(void)
  85. {
  86. return &device;
  87. }
  88. ClGeneratorFloat::ClGeneratorFloat(mnd::MandelDevice& device, const std::string& code) :
  89. ClGenerator{ device, code, mnd::Precision::FLOAT }
  90. {
  91. const cl::Device& dev = device.getClDevice().device;
  92. useVec = dev.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() >= 4;
  93. kernel = Kernel(program, useVec ? "iterate_vec4" : "iterate");
  94. }
  95. void ClGeneratorFloat::generate(const mnd::MandelInfo& info, float* data)
  96. {
  97. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  98. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  99. float pixelScaleX = float(info.view.width / info.bWidth);
  100. float pixelScaleY = float(info.view.height / info.bHeight);
  101. kernel.setArg(0, buffer_A);
  102. kernel.setArg(1, int(info.bWidth));
  103. kernel.setArg(2, float(info.view.x));
  104. kernel.setArg(3, float(info.view.y));
  105. kernel.setArg(4, float(pixelScaleX));
  106. kernel.setArg(5, float(pixelScaleY));
  107. kernel.setArg(6, int(info.maxIter));
  108. kernel.setArg(7, int(info.smooth ? 1 : 0));
  109. kernel.setArg(8, int(info.julia ? 1 : 0));
  110. kernel.setArg(9, float(info.juliaX));
  111. kernel.setArg(10, float(info.juliaY));
  112. if (useVec) {
  113. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight / 4));
  114. } else {
  115. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  116. }
  117. queue.flush();
  118. queue.finish();
  119. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  120. }
  121. std::string ClGeneratorFloat::getKernelCode(bool smooth) const
  122. {
  123. return mnd::getFloat_cl();
  124. }
  125. ClGeneratorDoubleFloat::ClGeneratorDoubleFloat(mnd::MandelDevice& device) :
  126. ClGenerator{ device, this->getKernelCode(false), mnd::Precision::DOUBLE_FLOAT }
  127. {
  128. kernel = Kernel(program, "iterate");
  129. }
  130. std::pair<float, float> twoSum(float a, float b) {
  131. float s = a + b;
  132. float v = s - a;
  133. float r = (a - (s - v)) + (b - v);
  134. return { s, r };
  135. }
  136. std::pair<float, float> split(float a) {
  137. float c = (4096 + 1) * a;
  138. float abig = c - a;
  139. float ahi = c - abig;
  140. float alo = a - ahi;
  141. return { ahi, alo };
  142. }
  143. std::pair<float, float> twoProd(float a, float b) {
  144. float x = a * b;
  145. auto aex = split(a);
  146. auto bex = split(b);
  147. float errx = x - (aex.first * bex.first);
  148. float erry = errx - (aex.second * bex.first);
  149. float errz = erry - (aex.first * bex.second);
  150. float y = (aex.second * bex.second) - errz;
  151. return { x, y };
  152. }
  153. std::pair<float, float> add(std::pair<float, float> a, std::pair<float, float> b) {
  154. float r = a.first + b.first;
  155. float s;
  156. if (fabs(a.first) >= fabs(b.first)) {
  157. s = (((a.first - r) + b.first) + b.second) + a.second;
  158. }
  159. else {
  160. s = (((b.first - r) + a.first) + a.second) + b.second;
  161. }
  162. return twoSum(r, s);
  163. }
  164. std::pair<float, float> mul(std::pair<float, float> a, std::pair<float, float> b) {
  165. auto t = twoProd(a.first, b.first);
  166. t.second += ((a.first * b.second) + (a.second * b.first));
  167. return twoSum(t.first, t.second);
  168. }
  169. std::pair<float, float> mulFloat(std::pair<float, float> a, float b) {
  170. std::pair<float, float> t = twoProd(a.first, b);
  171. float t3 = (a.second * b) + t.second;
  172. return twoSum(t.first, t.second);
  173. }
  174. void ClGeneratorDoubleFloat::generate(const mnd::MandelInfo& info, float* data)
  175. {
  176. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  177. auto splitDouble = [] (double x) {
  178. /*uint64_t xl = *((uint64_t*)&x);
  179. uint64_t mantissa = xl & 0x000FFFFFFFFFFFFFULL;
  180. uint64_t exp = (xl & 0x7FF0000000000000ULL) >> 53;
  181. bool sign = (xl & 0x1000000000000000ULL) != 0;
  182. uint32_t floathi = exp << 23;*/
  183. float hi = float(x);
  184. float lo = float(x - double(hi));
  185. if (abs(lo) >= 1.0e-10f) {
  186. //printf("hi: %.10ef, lo: %.10ef\n", hi, lo);
  187. //fflush(stdout);
  188. }
  189. return std::pair{ hi, lo };
  190. };
  191. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  192. double pixelScX = double(info.view.width / info.bWidth);
  193. double pixelScY = double(info.view.height / info.bHeight);
  194. auto[x1, x2] = splitDouble(double(info.view.x));
  195. auto[y1, y2] = splitDouble(double(info.view.y));
  196. auto[w1, w2] = splitDouble(pixelScX);
  197. auto[h1, h2] = splitDouble(pixelScY);
  198. /*
  199. for (int px = 0; px < info.bWidth; px++) {
  200. for (int py = 0; py < info.bHeight; py++) {
  201. std::pair<float, float> xl = { x1, x2 };
  202. std::pair<float, float> yt = { y1, y2 };
  203. std::pair<float, float> pixelScaleX = { w1, w2 };
  204. std::pair<float, float> pixelScaleY = { h1, h2 };
  205. std::pair<float, float> a = add(mulFloat(pixelScaleX, (float) px), xl); // pixelScaleX * px + xl
  206. std::pair<float, float> b = add(mulFloat(pixelScaleY, (float) py), yt); // pixelScaleY * py + yt
  207. std::pair<float, float> ca = a;
  208. std::pair<float, float> cb = b;
  209. int n = 0;
  210. while (n < info.maxIter - 1) {
  211. std::pair<float, float> aa = mul(a, a);
  212. std::pair<float, float> bb = mul(b, b);
  213. std::pair<float, float> ab = mul(a, b);
  214. if (aa.first + bb.first > 16) break;
  215. std::pair<float, float> minusbb = { -bb.first, -bb.second };
  216. a = add(add(aa, minusbb), ca);
  217. b = add(add(ab, ab), cb);
  218. n++;
  219. }
  220. // N + 1 - log (log |Z(N)|) / log 2
  221. if (n >= info.maxIter - 1)
  222. data[px + py * info.bWidth] = info.maxIter;
  223. else {
  224. if (info.smooth)
  225. data[px + py * info.bWidth] = ((float) n) + 1 - log(log(a.first * a.first + b.first * b.first ) / 2) / log(2.0f);
  226. else
  227. data[px + py * info.bWidth] = ((float)n);
  228. }
  229. }
  230. }
  231. return;
  232. */
  233. kernel.setArg(0, buffer_A);
  234. kernel.setArg(1, int(info.bWidth));
  235. kernel.setArg(2, x1);
  236. kernel.setArg(3, x2);
  237. kernel.setArg(4, y1);
  238. kernel.setArg(5, y2);
  239. kernel.setArg(6, w1);
  240. kernel.setArg(7, w2);
  241. kernel.setArg(8, h1);
  242. kernel.setArg(9, h2);
  243. kernel.setArg(10, int(info.maxIter));
  244. kernel.setArg(11, int(info.smooth ? 1 : 0));
  245. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  246. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  247. }
  248. std::string ClGeneratorDoubleFloat::getKernelCode(bool smooth) const
  249. {
  250. return getDoubleFloat_cl();
  251. }
  252. ClGeneratorDouble::ClGeneratorDouble(mnd::MandelDevice& device, const std::string& source) :
  253. ClGenerator{ device, source, mnd::Precision::DOUBLE }
  254. {
  255. kernel = Kernel(program, "iterate");
  256. }
  257. void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
  258. {
  259. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  260. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  261. double pixelScaleX = double(info.view.width / info.bWidth);
  262. double pixelScaleY = double(info.view.height / info.bHeight);
  263. kernel.setArg(0, buffer_A);
  264. kernel.setArg(1, int(info.bWidth));
  265. kernel.setArg(2, double(info.view.x));
  266. kernel.setArg(3, double(info.view.y));
  267. kernel.setArg(4, double(pixelScaleX));
  268. kernel.setArg(5, double(pixelScaleY));
  269. kernel.setArg(6, int(info.maxIter));
  270. kernel.setArg(7, int(info.smooth ? 1 : 0));
  271. kernel.setArg(8, int(info.julia ? 1 : 0));
  272. kernel.setArg(9, double(info.juliaX));
  273. kernel.setArg(10, double(info.juliaY));
  274. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  275. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  276. }
  277. std::string ClGeneratorDouble::getKernelCode(bool smooth) const
  278. {
  279. return
  280. "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
  281. "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max, int smooth) {\n"
  282. " int index = get_global_id(0);\n"
  283. " int x = index % width;"
  284. " int y = index / width;"
  285. " double a = x * pixelScaleX + xl;"
  286. " double b = y * pixelScaleY + yt;"
  287. " double ca = a;"
  288. " double cb = b;"
  289. ""
  290. " int n = 0;"
  291. " while (n < max - 1) {"
  292. " double aa = a * a;"
  293. " double bb = b * b;"
  294. " double ab = a * b;"
  295. " if (aa + bb > 16) break;"
  296. " a = aa - bb + ca;"
  297. " b = ab + ab + cb;"
  298. " n++;"
  299. " }\n"
  300. // N + 1 - log (log |Z(N)|) / log 2
  301. " if (n >= max - 1)\n"
  302. " A[index] = max;\n"
  303. " else {"
  304. " if (smooth != 0)\n"
  305. " A[index] = ((float)n) + 1 - log(log((float)(a * a + b * b)) / 2) / log(2.0f);\n"
  306. " else\n"
  307. " A[index] = ((float)n);\n"
  308. " }"
  309. "}";
  310. }
  311. ClGeneratorDoubleDouble::ClGeneratorDoubleDouble(mnd::MandelDevice& device) :
  312. ClGenerator{ device, getDoubleDouble_cl(), mnd::Precision::DOUBLE_DOUBLE }
  313. {
  314. kernel = Kernel(program, "iterate");
  315. }
  316. void ClGeneratorDoubleDouble::generate(const mnd::MandelInfo& info, float* data)
  317. {
  318. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  319. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  320. mnd::DoubleDouble x = mnd::convert<mnd::DoubleDouble>(info.view.x);
  321. mnd::DoubleDouble y = mnd::convert<mnd::DoubleDouble>(info.view.y);
  322. mnd::DoubleDouble psx = mnd::convert<mnd::DoubleDouble>(info.view.width / info.bWidth);
  323. mnd::DoubleDouble psy = mnd::convert<mnd::DoubleDouble>(info.view.height / info.bHeight);
  324. kernel.setArg(0, buffer_A);
  325. kernel.setArg(1, int(info.bWidth));
  326. kernel.setArg(2, x.x[0]);
  327. kernel.setArg(3, x.x[1]);
  328. kernel.setArg(4, y.x[0]);
  329. kernel.setArg(5, y.x[1]);
  330. kernel.setArg(6, psx.x[0]);
  331. kernel.setArg(7, psx.x[1]);
  332. kernel.setArg(8, psy.x[0]);
  333. kernel.setArg(9, psy.x[1]);
  334. kernel.setArg(10, int(info.maxIter));
  335. kernel.setArg(11, int(info.smooth ? 1 : 0));
  336. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  337. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  338. }
  339. std::string ClGeneratorDoubleDouble::getKernelCode(bool smooth) const
  340. {
  341. return getDoubleDouble_cl();
  342. }
  343. ClGeneratorQuadDouble::ClGeneratorQuadDouble(mnd::MandelDevice& device) :
  344. ClGenerator{ device, getQuadDouble_cl(), mnd::Precision::QUAD_DOUBLE }
  345. {
  346. kernel = Kernel(program, "iterate");
  347. }
  348. void ClGeneratorQuadDouble::generate(const mnd::MandelInfo& info, float* data)
  349. {
  350. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  351. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  352. mnd::QuadDouble x = mnd::convert<mnd::QuadDouble>(info.view.x);
  353. mnd::QuadDouble y = mnd::convert<mnd::QuadDouble>(info.view.y);
  354. mnd::QuadDouble psx = mnd::convert<mnd::QuadDouble>(info.view.width / info.bWidth);
  355. mnd::QuadDouble psy = mnd::convert<mnd::QuadDouble>(info.view.height / info.bHeight);
  356. kernel.setArg(0, buffer_A);
  357. kernel.setArg(1, int(info.bWidth));
  358. kernel.setArg(2, x.x[0]);
  359. kernel.setArg(3, x.x[1]);
  360. kernel.setArg(4, x.x[2]);
  361. kernel.setArg(5, x.x[3]);
  362. kernel.setArg(6, y.x[0]);
  363. kernel.setArg(7, y.x[1]);
  364. kernel.setArg(8, y.x[2]);
  365. kernel.setArg(9, y.x[3]);
  366. kernel.setArg(10, psx.x[0]);
  367. kernel.setArg(11, psx.x[1]);
  368. kernel.setArg(12, psx.x[2]);
  369. kernel.setArg(13, psx.x[3]);
  370. kernel.setArg(14, psy.x[0]);
  371. kernel.setArg(15, psy.x[1]);
  372. kernel.setArg(16, psy.x[2]);
  373. kernel.setArg(17, psy.x[3]);
  374. kernel.setArg(18, int(info.maxIter));
  375. kernel.setArg(19, int(info.smooth ? 1 : 0));
  376. cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  377. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  378. }
  379. std::string ClGeneratorQuadDouble::getKernelCode(bool smooth) const
  380. {
  381. return getQuadDouble_cl();
  382. }
  383. ClGenerator128::ClGenerator128(mnd::MandelDevice& device) :
  384. ClGenerator{ device, getFixed512_cl(), mnd::Precision::FIXED128 }
  385. {
  386. kernel = Kernel(program, "iterate");
  387. }
  388. void ClGenerator128::generate(const mnd::MandelInfo& info, float* data)
  389. {
  390. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  391. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  392. float pixelScaleX = float(info.view.width / info.bWidth);
  393. float pixelScaleY = float(info.view.height / info.bHeight);
  394. using ull = unsigned long long;
  395. ull x1 = ull(double(info.view.x) * 0x100000000ULL);
  396. ull x2 = 0;
  397. ull y1 = ull(double(info.view.y) * 0x100000000ULL);
  398. ull y2 = 0;
  399. ull w1 = ull(double(pixelScaleX) * 0x100000000ULL);
  400. ull w2 = 0;
  401. ull h1 = ull(double(pixelScaleY) * 0x100000000ULL);
  402. ull h2 = 0;
  403. kernel.setArg(0, buffer_A);
  404. kernel.setArg(1, int(info.bWidth));
  405. kernel.setArg(2, ull(x1));
  406. kernel.setArg(3, ull(x2));
  407. kernel.setArg(4, ull(y1));
  408. kernel.setArg(5, ull(y2));
  409. kernel.setArg(6, ull(w1));
  410. kernel.setArg(7, ull(w2));
  411. kernel.setArg(8, ull(h1));
  412. kernel.setArg(9, ull(h2));
  413. kernel.setArg(10, int(info.maxIter));
  414. kernel.setArg(11, int(info.smooth ? 1 : 0));
  415. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  416. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  417. }
  418. std::string ClGenerator128::getKernelCode(bool smooth) const
  419. {
  420. /*//fprintf(stderr, "starting file read\n");
  421. std::ifstream t("mandel128.cl");
  422. std::string str((std::istreambuf_iterator<char>(t)),
  423. std::istreambuf_iterator<char>());
  424. //fprintf(stderr, "%s\n", str);
  425. return str;*/
  426. return getFixed512_cl();
  427. }
  428. ClGenerator64::ClGenerator64(mnd::MandelDevice& device) :
  429. ClGenerator{ device, getFixed64_cl(), mnd::Precision::FIXED64 }
  430. {
  431. kernel = Kernel(program, "iterate");
  432. }
  433. #include "CpuGenerators.h"
  434. void ClGenerator64::generate(const mnd::MandelInfo& info, float* data)
  435. {
  436. ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
  437. Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
  438. float pixelScaleX = float(info.view.width / info.bWidth);
  439. float pixelScaleY = float(info.view.height / info.bHeight);
  440. using ull = uint64_t;
  441. ull x = ull(::round(double(info.view.x) * (1LL << 48)));
  442. ull y = ull(::round(double(info.view.y) * (1LL << 48)));
  443. ull w = ull(::round(double(pixelScaleX) * (1LL << 48)));
  444. ull h = ull(::round(double(pixelScaleY) * (1LL << 48)));
  445. //x = 0;
  446. //y = 0;
  447. kernel.setArg(0, buffer_A);
  448. kernel.setArg(1, int(info.bWidth));
  449. kernel.setArg(2, ull(x));
  450. kernel.setArg(3, ull(y));
  451. kernel.setArg(4, ull(w));
  452. kernel.setArg(5, ull(h));
  453. kernel.setArg(6, int(info.maxIter));
  454. kernel.setArg(7, int(info.smooth ? 1 : 0));
  455. queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
  456. queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
  457. //CpuGenerator<Fixed64> fx;
  458. //fx.generate(info, data);
  459. }
  460. std::string ClGenerator64::getKernelCode(bool smooth) const
  461. {
  462. /*//fprintf(stderr, "starting file read\n");
  463. std::ifstream t("mandel128.cl");
  464. std::string str((std::istreambuf_iterator<char>(t)),
  465. std::istreambuf_iterator<char>());
  466. //fprintf(stderr, "%s\n", str);
  467. return str;*/
  468. return getFixed64_cl();
  469. }
  470. #endif // WITH_OPENCL