Mandel.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. #include "Mandel.h"
  2. #include "Fixed.h"
  3. #include "CpuGenerators.h"
  4. #include "ClGenerators.h"
  5. #include "OpenClInternal.h"
  6. #include "OpenClCode.h"
  7. #ifdef WITH_ASMJIT
  8. #include <asmjit/asmjit.h>
  9. #endif // WITH_ASMJIT
  10. #include <map>
  11. #include <array>
  12. using mnd::MandelDevice;
  13. using mnd::MandelContext;
  14. using mnd::MandelGenerator;
  15. using mnd::AdaptiveGenerator;
  16. MandelContext mnd::initializeContext(void)
  17. {
  18. return MandelContext();
  19. }
  20. MandelDevice::MandelDevice(mnd::ClDeviceWrapper device, const std::string& platformName) :
  21. clDevice{ std::make_unique<ClDeviceWrapper>(std::move(device)) },
  22. platformName{ platformName }
  23. {
  24. #ifdef WITH_OPENCL
  25. extensions = clDevice->device.getInfo<CL_DEVICE_EXTENSIONS>();
  26. name = clDevice->device.getInfo<CL_DEVICE_NAME>();
  27. vendor = clDevice->device.getInfo<CL_DEVICE_VENDOR>();
  28. #endif // WITH_OPENCL
  29. }
  30. mnd::MandelGenerator* MandelDevice::getGenerator(mnd::Precision type) const
  31. {
  32. auto it = mandelGenerators.find(type);
  33. if (it != mandelGenerators.end())
  34. return it->second.get();
  35. else
  36. return nullptr;
  37. }
  38. std::vector<mnd::Precision> MandelDevice::getSupportedTypes(void) const
  39. {
  40. std::vector<Precision> types;
  41. for (auto& [type, gen] : mandelGenerators) {
  42. types.push_back(type);
  43. }
  44. return types;
  45. }
  46. bool MandelDevice::supportsDouble(void) const
  47. {
  48. return extensions.find("cl_khr_fp64") != std::string::npos;
  49. }
  50. MandelContext::MandelContext(void)
  51. #ifdef WITH_ASMJIT
  52. : jitRuntime{ std::make_unique<asmjit::JitRuntime>() }
  53. #endif // WITH_ASMJIT
  54. {
  55. #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
  56. # if defined(WITH_AVX512)
  57. if (cpuInfo.hasAvx512()) {
  58. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX_512, true>>();
  59. //auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX_512, true>>();
  60. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_AVX_512 }, std::move(fl) });
  61. //cpuGenerators.insert({ { Precision::DOUBLE, CpuExtension::X86_AVX_512 }, std::move(db) });
  62. }
  63. # endif
  64. if (cpuInfo.hasAvx()) {
  65. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX, true>>();
  66. auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX, true>>();
  67. auto ddb = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX, true>>();
  68. auto tdb = std::make_unique<CpuGenerator<TripleDouble, mnd::X86_AVX, true>>();
  69. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_AVX }, std::move(fl) });
  70. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::X86_AVX }, std::move(db) });
  71. cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, CpuExtension::X86_AVX }, std::move(ddb) });
  72. cpuGenerators.insert({ std::pair{ Precision::TRIPLE_DOUBLE, CpuExtension::X86_AVX }, std::move(tdb) });
  73. }
  74. if (cpuInfo.hasAvx2() && cpuInfo.hasFma()) {
  75. auto favxfma = std::make_unique<CpuGenerator<float, mnd::X86_AVX_FMA, true>>();
  76. auto davxfma = std::make_unique<CpuGenerator<double, mnd::X86_AVX_FMA, true>>();
  77. auto ddavxfma = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX_FMA, true>>();
  78. auto qdavxfma = std::make_unique<CpuGenerator<QuadDouble, mnd::X86_AVX_FMA, true>>();
  79. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_AVX_FMA }, std::move(favxfma) });
  80. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::X86_AVX_FMA }, std::move(davxfma) });
  81. cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, CpuExtension::X86_AVX_FMA }, std::move(ddavxfma) });
  82. cpuGenerators.insert({ std::pair{ Precision::QUAD_DOUBLE, CpuExtension::X86_AVX_FMA }, std::move(qdavxfma) });
  83. }
  84. if (cpuInfo.hasSse2()) {
  85. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_SSE2, true>>();
  86. auto db = std::make_unique<CpuGenerator<double, mnd::X86_SSE2, true>>();
  87. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_SSE2 }, std::move(fl) });
  88. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::X86_SSE2 }, std::move(db) });
  89. }
  90. #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
  91. if (cpuInfo.hasNeon()) {
  92. auto fl = std::make_unique<CpuGenerator<float, mnd::ARM_NEON, true>>();
  93. auto db = std::make_unique<CpuGenerator<double, mnd::ARM_NEON, true>>();
  94. auto ddb = std::make_unique<CpuGenerator<mnd::DoubleDouble, mnd::ARM_NEON, true>>();
  95. cpuGenerators.insert({ { Precision::FLOAT, CpuExtension::ARM_NEON }, std::move(fl) });
  96. cpuGenerators.insert({ { Precision::DOUBLE, CpuExtension::ARM_NEON }, std::move(db) });
  97. cpuGenerators.insert({ { Precision::DOUBLE_DOUBLE, CpuExtension::ARM_NEON }, std::move(ddb) });
  98. }
  99. #endif
  100. {
  101. auto fl = std::make_unique<CpuGenerator<float, mnd::NONE, true>>();
  102. auto db = std::make_unique<CpuGenerator<double, mnd::NONE, true>>();
  103. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::NONE }, std::move(fl) });
  104. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::NONE }, std::move(db) });
  105. auto fx64 = std::make_unique<CpuGenerator<Fixed64, mnd::NONE, true>>();
  106. auto fx128 = std::make_unique<CpuGenerator<Fixed128, mnd::NONE, true>>();
  107. cpuGenerators.insert({ std::pair{ Precision::FIXED64, CpuExtension::NONE }, std::move(fx64) });
  108. cpuGenerators.insert({ std::pair{ Precision::FIXED128, CpuExtension::NONE }, std::move(fx128) });
  109. }
  110. #ifdef WITH_BOOST
  111. auto quad = std::make_unique<CpuGenerator<Float128, mnd::NONE, true>>();
  112. auto oct = std::make_unique<CpuGenerator<Float256, mnd::NONE, true>>();
  113. auto f512 = std::make_unique<CpuGenerator<Float512, mnd::NONE, true>>();
  114. cpuGenerators.insert({ std::pair{ Precision::FLOAT128, CpuExtension::NONE }, std::move(quad) });
  115. cpuGenerators.insert({ std::pair{ Precision::FLOAT256, CpuExtension::NONE }, std::move(oct) });
  116. cpuGenerators.insert({ std::pair{ Precision::FLOAT512, CpuExtension::NONE }, std::move(f512) });
  117. #endif // WITH_BOOST
  118. auto dd = std::make_unique<CpuGenerator<DoubleDouble, mnd::NONE, true>>();
  119. auto td = std::make_unique<CpuGenerator<TripleDouble, mnd::NONE, true>>();
  120. auto qd = std::make_unique<CpuGenerator<QuadDouble, mnd::NONE, true>>();
  121. auto hd = std::make_unique<CpuGenerator<HexDouble, mnd::NONE, true>>();
  122. auto od = std::make_unique<CpuGenerator<OctaDouble, mnd::NONE, true>>();
  123. cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, CpuExtension::NONE }, std::move(dd) });
  124. cpuGenerators.insert({ std::pair{ Precision::TRIPLE_DOUBLE, CpuExtension::NONE }, std::move(td) });
  125. cpuGenerators.insert({ std::pair{ Precision::QUAD_DOUBLE, CpuExtension::NONE }, std::move(qd) });
  126. cpuGenerators.insert({ std::pair{ Precision::HEX_DOUBLE, CpuExtension::NONE }, std::move(hd) });
  127. cpuGenerators.insert({ std::pair{ Precision::OCTA_DOUBLE, CpuExtension::NONE }, std::move(od) });
  128. auto fix512 = std::make_unique<CpuGenerator<Fixed512, mnd::NONE, true>>();
  129. cpuGenerators.insert({ std::pair{ Precision::FIXED512, CpuExtension::NONE }, std::move(fix512) });
  130. devices = createDevices();
  131. adaptiveGenerator = createAdaptiveGenerator();
  132. }
  133. std::unique_ptr<mnd::AdaptiveGenerator> MandelContext::createAdaptiveGenerator(void)
  134. {
  135. std::vector<Precision> types {
  136. Precision::FLOAT,
  137. Precision::DOUBLE_FLOAT,
  138. Precision::DOUBLE,
  139. Precision::DOUBLE_DOUBLE,
  140. Precision::TRIPLE_DOUBLE,
  141. Precision::QUAD_DOUBLE,
  142. Precision::HEX_DOUBLE,
  143. Precision::OCTA_DOUBLE,
  144. Precision::FLOAT512
  145. };
  146. auto ag = std::make_unique<AdaptiveGenerator>();
  147. for (auto type : types) {
  148. MandelGenerator* chosenGen = nullptr;
  149. auto generators = getCpuGenerators(type);
  150. CpuExtension ex = CpuExtension::NONE;
  151. for (auto* generator : generators) {
  152. if (generator->getExtension() >= ex) {
  153. ex = generator->getExtension();
  154. chosenGen = generator;
  155. }
  156. }
  157. for (auto& device : getDevices()) {
  158. auto* clGen = device->getGenerator(type);
  159. if (clGen != nullptr) {
  160. chosenGen = clGen;
  161. }
  162. }
  163. if (chosenGen != nullptr) {
  164. ag->addGenerator(mnd::getPrecision(type), *chosenGen);
  165. }
  166. }
  167. return ag;
  168. }
  169. std::vector<std::unique_ptr<MandelDevice>> MandelContext::createDevices(void)
  170. {
  171. std::vector<std::unique_ptr<MandelDevice>> mandelDevices;
  172. #ifdef WITH_OPENCL
  173. std::vector<cl::Platform> platforms;
  174. cl::Platform::get(&platforms);
  175. //platforms.erase(platforms.begin() + 1);
  176. for (auto& platform : platforms) {
  177. std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
  178. std::string profile = platform.getInfo<CL_PLATFORM_PROFILE>();
  179. //printf("using opencl platform: %s\n", platformName.c_str());
  180. std::string ext = platform.getInfo<CL_PLATFORM_EXTENSIONS>();
  181. //printf("Platform extensions: %s\n", ext.c_str());
  182. //printf("Platform: %s, %s\n", platformName.c_str(), profile.c_str());
  183. std::vector<cl::Device> devices;
  184. platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
  185. auto onError = [] (const char* errinfo,
  186. const void* private_info,
  187. size_t cb,
  188. void* user_data) {
  189. printf("opencl error: %s\n", errinfo);
  190. };
  191. cl::Context context{ devices, nullptr, onError };
  192. for (auto& device : devices) {
  193. auto mandelDevice = std::make_unique<mnd::MandelDevice>(
  194. ClDeviceWrapper{ device, context }, platformName);
  195. MandelDevice& md = *mandelDevice;
  196. auto supportsDouble = md.supportsDouble();
  197. //#pragma omp parallel
  198. #pragma omp sections
  199. {
  200. #pragma omp section
  201. try {
  202. md.mandelGenerators.insert({ Precision::FLOAT, std::make_unique<ClGeneratorFloat>(md) });
  203. //md.mandelGenerators.insert({ Precision::FIXED64, std::make_unique<ClGenerator64>(md) });
  204. //md.mandelGenerators.insert({ GeneratorType::FIXED128, std::make_unique<ClGenerator128>(md) });
  205. }
  206. catch (const std::string& err) {
  207. printf("err: %s", err.c_str());
  208. }
  209. #pragma omp section
  210. try {
  211. md.mandelGenerators.insert({ Precision::DOUBLE_FLOAT, std::make_unique<ClGeneratorDoubleFloat>(md) });
  212. }
  213. catch (const std::string& err) {
  214. printf("err: %s", err.c_str());
  215. }
  216. #pragma omp section
  217. try {
  218. md.mandelGenerators.insert({ Precision::TRIPLE_FLOAT, std::make_unique<ClGeneratorTripleFloat>(md) });
  219. }
  220. catch (const std::string& err) {
  221. printf("err: %s", err.c_str());
  222. }
  223. #pragma omp section
  224. if (supportsDouble) {
  225. try {
  226. md.mandelGenerators.insert({ Precision::DOUBLE, std::make_unique<ClGeneratorDouble>(md) });
  227. md.mandelGenerators.insert({ Precision::DOUBLE_DOUBLE, std::make_unique<ClGeneratorDoubleDouble>(md) });
  228. }
  229. catch (const std::string& err) {
  230. printf("err: %s", err.c_str());
  231. fflush(stdout);
  232. }
  233. }
  234. #pragma omp section
  235. if (supportsDouble) {
  236. try {
  237. md.mandelGenerators.insert({ Precision::TRIPLE_DOUBLE, std::make_unique<ClGeneratorTripleDouble>(md) });
  238. }
  239. catch (const std::string& err) {
  240. printf("err: %s", err.c_str());
  241. fflush(stdout);
  242. }
  243. }
  244. #pragma omp section
  245. if (supportsDouble) {
  246. try {
  247. md.mandelGenerators.insert({ Precision::QUAD_DOUBLE, std::make_unique<ClGeneratorQuadDouble>(md) });
  248. }
  249. catch (const std::string& err) {
  250. printf("err: %s", err.c_str());
  251. fflush(stdout);
  252. }
  253. }
  254. #pragma omp section
  255. if (supportsDouble) {
  256. try {
  257. md.mandelGenerators.insert({ Precision::HEX_DOUBLE, std::make_unique<ClGeneratorHexDouble>(md) });
  258. }
  259. catch (const std::string& err) {
  260. printf("err: %s", err.c_str());
  261. fflush(stdout);
  262. }
  263. }
  264. #pragma omp section
  265. if (supportsDouble) {
  266. try {
  267. md.mandelGenerators.insert({ Precision::OCTA_DOUBLE, std::make_unique<ClGeneratorOctaDouble>(md) });
  268. }
  269. catch (const std::string& err) {
  270. printf("err: %s", err.c_str());
  271. fflush(stdout);
  272. }
  273. }
  274. }
  275. mandelDevices.push_back(std::move(mandelDevice));
  276. }
  277. }
  278. #endif // WITH_OPENCL
  279. return mandelDevices;
  280. }
  281. MandelContext::~MandelContext(void)
  282. {
  283. }
  284. AdaptiveGenerator& MandelContext::getDefaultGenerator(void)
  285. {
  286. return *adaptiveGenerator;
  287. }
  288. std::vector<std::unique_ptr<mnd::MandelDevice>>& MandelContext::getDevices(void)
  289. {
  290. return devices;
  291. }
  292. asmjit::JitRuntime& MandelContext::getJitRuntime(void)
  293. {
  294. return *jitRuntime;
  295. }
  296. MandelGenerator* MandelContext::getCpuGenerator(mnd::Precision type, mnd::CpuExtension ex)
  297. {
  298. auto it = cpuGenerators.find({ type, ex });
  299. if (it != cpuGenerators.end())
  300. return it->second.get();
  301. else
  302. return nullptr;
  303. }
  304. std::vector<MandelContext::GeneratorType> MandelContext::getSupportedTypes(void) const
  305. {
  306. std::vector<GeneratorType> types;
  307. for (auto& [type, gen] : cpuGenerators) {
  308. types.push_back(type);
  309. }
  310. return types;
  311. }
  312. std::vector<MandelGenerator*> MandelContext::getCpuGenerators(mnd::Precision prec) const
  313. {
  314. std::vector<MandelGenerator*> generators;
  315. for (const auto& [type, gen] : cpuGenerators) {
  316. if (type.first == prec)
  317. generators.push_back(gen.get());
  318. }
  319. return generators;
  320. }