Mandel.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. #include "Mandel.h"
  2. #include "Fixed.h"
  3. #include "CpuGenerators.h"
  4. #include "ClGenerators.h"
  5. #include "OpenClInternal.h"
  6. #include "OpenClCode.h"
  7. #ifdef WITH_ASMJIT
  8. #include <asmjit/asmjit.h>
  9. #endif // WITH_ASMJIT
  10. #include <map>
  11. #include <array>
  12. using mnd::MandelDevice;
  13. using mnd::MandelContext;
  14. using mnd::MandelGenerator;
  15. using mnd::AdaptiveGenerator;
  16. MandelContext mnd::initializeContext(void)
  17. {
  18. return MandelContext();
  19. }
  20. MandelDevice::MandelDevice(mnd::ClDeviceWrapper device, const std::string& platformName) :
  21. clDevice{ std::make_unique<ClDeviceWrapper>(std::move(device)) },
  22. platformName{ platformName }
  23. {
  24. extensions = clDevice->device.getInfo<CL_DEVICE_EXTENSIONS>();
  25. name = clDevice->device.getInfo<CL_DEVICE_NAME>();
  26. vendor = clDevice->device.getInfo<CL_DEVICE_VENDOR>();
  27. }
  28. mnd::MandelGenerator* MandelDevice::getGenerator(mnd::Precision type) const
  29. {
  30. auto it = mandelGenerators.find(type);
  31. if (it != mandelGenerators.end())
  32. return it->second.get();
  33. else
  34. return nullptr;
  35. }
  36. std::vector<mnd::Precision> MandelDevice::getSupportedTypes(void) const
  37. {
  38. std::vector<Precision> types;
  39. for (auto& [type, gen] : mandelGenerators) {
  40. types.push_back(type);
  41. }
  42. return types;
  43. }
  44. bool MandelDevice::supportsDouble(void) const
  45. {
  46. return extensions.find("cl_khr_fp64") != std::string::npos;
  47. }
  48. MandelContext::MandelContext(void)
  49. #ifdef WITH_ASMJIT
  50. : jitRuntime{ std::make_unique<asmjit::JitRuntime>() }
  51. #endif // WITH_ASMJIT
  52. {
  53. #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
  54. # if defined(WITH_AVX512)
  55. if (cpuInfo.hasAvx512()) {
  56. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX_512, true>>();
  57. //auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX_512, true>>();
  58. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_AVX_512 }, std::move(fl) });
  59. //cpuGenerators.insert({ { Precision::DOUBLE, CpuExtension::X86_AVX_512 }, std::move(db) });
  60. }
  61. # endif
  62. if (cpuInfo.hasAvx()) {
  63. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX, true>>();
  64. auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX, true>>();
  65. auto ddb = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX, true>>();
  66. auto tdb = std::make_unique<CpuGenerator<TripleDouble, mnd::X86_AVX, true>>();
  67. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_AVX }, std::move(fl) });
  68. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::X86_AVX }, std::move(db) });
  69. cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, CpuExtension::X86_AVX }, std::move(ddb) });
  70. cpuGenerators.insert({ std::pair{ Precision::TRIPLE_DOUBLE, CpuExtension::X86_AVX }, std::move(tdb) });
  71. }
  72. if (cpuInfo.hasAvx2() && cpuInfo.hasFma()) {
  73. auto favxfma = std::make_unique<CpuGenerator<float, mnd::X86_AVX_FMA, true>>();
  74. auto davxfma = std::make_unique<CpuGenerator<double, mnd::X86_AVX_FMA, true>>();
  75. auto ddavxfma = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX_FMA, true>>();
  76. auto qdavxfma = std::make_unique<CpuGenerator<QuadDouble, mnd::X86_AVX_FMA, true>>();
  77. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_AVX_FMA }, std::move(favxfma) });
  78. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::X86_AVX_FMA }, std::move(davxfma) });
  79. cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, CpuExtension::X86_AVX_FMA }, std::move(ddavxfma) });
  80. cpuGenerators.insert({ std::pair{ Precision::QUAD_DOUBLE, CpuExtension::X86_AVX_FMA }, std::move(qdavxfma) });
  81. }
  82. if (cpuInfo.hasSse2()) {
  83. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_SSE2, true>>();
  84. auto db = std::make_unique<CpuGenerator<double, mnd::X86_SSE2, true>>();
  85. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::X86_SSE2 }, std::move(fl) });
  86. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::X86_SSE2 }, std::move(db) });
  87. }
  88. #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
  89. if (cpuInfo.hasNeon()) {
  90. auto fl = std::make_unique<CpuGenerator<float, mnd::ARM_NEON, true>>();
  91. auto db = std::make_unique<CpuGenerator<double, mnd::ARM_NEON, true>>();
  92. auto ddb = std::make_unique<CpuGenerator<mnd::DoubleDouble, mnd::ARM_NEON, true>>();
  93. cpuGenerators.insert({ { Precision::FLOAT, CpuExtension::ARM_NEON }, std::move(fl) });
  94. cpuGenerators.insert({ { Precision::DOUBLE, CpuExtension::ARM_NEON }, std::move(db) });
  95. cpuGenerators.insert({ { Precision::DOUBLE_DOUBLE, CpuExtension::ARM_NEON }, std::move(ddb) });
  96. }
  97. #endif
  98. {
  99. auto fl = std::make_unique<CpuGenerator<float, mnd::NONE, true>>();
  100. auto db = std::make_unique<CpuGenerator<double, mnd::NONE, true>>();
  101. cpuGenerators.insert({ std::pair{ Precision::FLOAT, CpuExtension::NONE }, std::move(fl) });
  102. cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::NONE }, std::move(db) });
  103. auto fx64 = std::make_unique<CpuGenerator<Fixed64, mnd::NONE, true>>();
  104. auto fx128 = std::make_unique<CpuGenerator<Fixed128, mnd::NONE, true>>();
  105. cpuGenerators.insert({ std::pair{ Precision::FIXED64, CpuExtension::NONE }, std::move(fx64) });
  106. cpuGenerators.insert({ std::pair{ Precision::FIXED128, CpuExtension::NONE }, std::move(fx128) });
  107. }
  108. #ifdef WITH_BOOST
  109. auto quad = std::make_unique<CpuGenerator<Float128, mnd::NONE, true>>();
  110. auto oct = std::make_unique<CpuGenerator<Float256, mnd::NONE, true>>();
  111. cpuGenerators.insert({ std::pair{ Precision::FLOAT128, CpuExtension::NONE }, std::move(quad) });
  112. cpuGenerators.insert({ std::pair{ Precision::FLOAT256, CpuExtension::NONE }, std::move(oct) });
  113. #endif // WITH_BOOST
  114. auto dd = std::make_unique<CpuGenerator<DoubleDouble, mnd::NONE, true>>();
  115. auto td = std::make_unique<CpuGenerator<TripleDouble, mnd::NONE, true>>();
  116. auto qd = std::make_unique<CpuGenerator<QuadDouble, mnd::NONE, true>>();
  117. auto hd = std::make_unique<CpuGenerator<HexDouble, mnd::NONE, true>>();
  118. cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, CpuExtension::NONE }, std::move(dd) });
  119. cpuGenerators.insert({ std::pair{ Precision::TRIPLE_DOUBLE, CpuExtension::NONE }, std::move(td) });
  120. cpuGenerators.insert({ std::pair{ Precision::QUAD_DOUBLE, CpuExtension::NONE }, std::move(qd) });
  121. cpuGenerators.insert({ std::pair{ Precision::HEX_DOUBLE, CpuExtension::NONE }, std::move(hd) });
  122. auto fix512 = std::make_unique<CpuGenerator<Fixed512, mnd::NONE, true>>();
  123. cpuGenerators.insert({ std::pair{ Precision::FIXED512, CpuExtension::NONE }, std::move(fix512) });
  124. devices = createDevices();
  125. adaptiveGenerator = createAdaptiveGenerator();
  126. }
  127. std::unique_ptr<mnd::AdaptiveGenerator> MandelContext::createAdaptiveGenerator(void)
  128. {
  129. std::vector<Precision> types {
  130. Precision::FLOAT,
  131. Precision::DOUBLE_FLOAT,
  132. Precision::DOUBLE,
  133. Precision::DOUBLE_DOUBLE,
  134. Precision::TRIPLE_DOUBLE,
  135. Precision::QUAD_DOUBLE,
  136. Precision::HEX_DOUBLE,
  137. Precision::FIXED512
  138. };
  139. auto ag = std::make_unique<AdaptiveGenerator>();
  140. for (auto type : types) {
  141. MandelGenerator* chosenGen = nullptr;
  142. auto generators = getCpuGenerators(type);
  143. CpuExtension ex = CpuExtension::NONE;
  144. for (auto* generator : generators) {
  145. if (generator->getExtension() >= ex) {
  146. ex = generator->getExtension();
  147. chosenGen = generator;
  148. }
  149. }
  150. for (auto& device : getDevices()) {
  151. auto* clGen = device->getGenerator(type);
  152. if (clGen != nullptr) {
  153. chosenGen = clGen;
  154. }
  155. }
  156. if (chosenGen != nullptr) {
  157. ag->addGenerator(mnd::getPrecision(type), *chosenGen);
  158. }
  159. }
  160. return ag;
  161. }
  162. std::vector<std::unique_ptr<MandelDevice>> MandelContext::createDevices(void)
  163. {
  164. std::vector<std::unique_ptr<MandelDevice>> mandelDevices;
  165. #ifdef WITH_OPENCL
  166. std::vector<cl::Platform> platforms;
  167. cl::Platform::get(&platforms);
  168. //platforms.erase(platforms.begin() + 1);
  169. for (auto& platform : platforms) {
  170. std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
  171. std::string profile = platform.getInfo<CL_PLATFORM_PROFILE>();
  172. //printf("using opencl platform: %s\n", platformName.c_str());
  173. std::string ext = platform.getInfo<CL_PLATFORM_EXTENSIONS>();
  174. //printf("Platform extensions: %s\n", ext.c_str());
  175. //printf("Platform: %s, %s\n", platformName.c_str(), profile.c_str());
  176. std::vector<cl::Device> devices;
  177. platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
  178. auto onError = [] (const char* errinfo,
  179. const void* private_info,
  180. size_t cb,
  181. void* user_data) {
  182. printf("opencl error: %s\n", errinfo);
  183. };
  184. cl::Context context{ devices, nullptr, onError };
  185. for (auto& device : devices) {
  186. //printf("Device: %s\n", device.getInfo<CL_DEVICE_NAME>().c_str());
  187. //printf("preferred float width: %d\n", device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  188. //printf("vendor: %s\n", device.getInfo<CL_DEVICE_VENDOR>().c_str());
  189. //printf("Device extensions: %s\n", ext.c_str());
  190. auto mandelDevice = std::make_unique<mnd::MandelDevice>(
  191. ClDeviceWrapper{ device, context }, platformName);
  192. MandelDevice& md = *mandelDevice;
  193. auto supportsDouble = md.supportsDouble();
  194. //printf("clock: %d", device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>());
  195. //printf(" using opencl device: %s\n", md.name.c_str());
  196. try {
  197. md.mandelGenerators.insert({ Precision::FLOAT, std::make_unique<ClGeneratorFloat>(md) });
  198. //md.mandelGenerators.insert({ Precision::FIXED64, std::make_unique<ClGenerator64>(md) });
  199. //md.mandelGenerators.insert({ GeneratorType::FIXED128, std::make_unique<ClGenerator128>(md) });
  200. }
  201. catch (const std::string& err) {
  202. printf("err: %s", err.c_str());
  203. }
  204. try {
  205. md.mandelGenerators.insert({ Precision::DOUBLE_FLOAT, std::make_unique<ClGeneratorDoubleFloat>(md) });
  206. }
  207. catch (const std::string& err) {
  208. printf("err: %s", err.c_str());
  209. }
  210. if (supportsDouble) {
  211. try {
  212. md.mandelGenerators.insert({ Precision::DOUBLE, std::make_unique<ClGeneratorDouble>(md) });
  213. md.mandelGenerators.insert({ Precision::DOUBLE_DOUBLE, std::make_unique<ClGeneratorDoubleDouble>(md) });
  214. md.mandelGenerators.insert({ Precision::TRIPLE_DOUBLE, std::make_unique<ClGeneratorTripleDouble>(md) });
  215. md.mandelGenerators.insert({ Precision::QUAD_DOUBLE, std::make_unique<ClGeneratorQuadDouble>(md) });
  216. md.mandelGenerators.insert({ Precision::HEX_DOUBLE, std::make_unique<ClGeneratorHexDouble>(md) });
  217. }
  218. catch (const std::string& err) {
  219. printf("err: %s", err.c_str());
  220. fflush(stdout);
  221. }
  222. }
  223. try {
  224. //md.generator128 = std::make_unique<ClGenerator128>(device);
  225. }
  226. catch (const std::string& /*err*/) {
  227. //fprintf(stderr, "error creating 128bit cl generator: %s\n", err.c_str());
  228. }
  229. mandelDevices.push_back(std::move(mandelDevice));
  230. }
  231. }
  232. #endif // WITH_OPENCL
  233. return mandelDevices;
  234. }
  235. MandelContext::~MandelContext(void)
  236. {
  237. }
  238. AdaptiveGenerator& MandelContext::getDefaultGenerator(void)
  239. {
  240. return *adaptiveGenerator;
  241. }
  242. std::vector<std::unique_ptr<mnd::MandelDevice>>& MandelContext::getDevices(void)
  243. {
  244. return devices;
  245. }
  246. asmjit::JitRuntime& MandelContext::getJitRuntime(void)
  247. {
  248. return *jitRuntime;
  249. }
  250. MandelGenerator* MandelContext::getCpuGenerator(mnd::Precision type, mnd::CpuExtension ex)
  251. {
  252. auto it = cpuGenerators.find({ type, ex });
  253. if (it != cpuGenerators.end())
  254. return it->second.get();
  255. else
  256. return nullptr;
  257. }
  258. std::vector<MandelContext::GeneratorType> MandelContext::getSupportedTypes(void) const
  259. {
  260. std::vector<GeneratorType> types;
  261. for (auto& [type, gen] : cpuGenerators) {
  262. types.push_back(type);
  263. }
  264. return types;
  265. }
  266. std::vector<MandelGenerator*> MandelContext::getCpuGenerators(mnd::Precision prec) const
  267. {
  268. std::vector<MandelGenerator*> generators;
  269. for (const auto& [type, gen] : cpuGenerators) {
  270. if (type.first == prec)
  271. generators.push_back(gen.get());
  272. }
  273. return generators;
  274. }