Mandel.cpp 14 KB


  1. #include "Mandel.h"
  2. #include "Fixed.h"
  3. #include "CpuGenerators.h"
  4. #include "ClGenerators.h"
  5. #include "OpenClInternal.h"
  6. #include "OpenClCode.h"
  7. #ifdef WITH_ASMJIT
  8. #include <asmjit/asmjit.h>
  9. #endif // WITH_ASMJIT
  10. #include <map>
  11. using mnd::MandelDevice;
  12. using mnd::MandelContext;
  13. using mnd::MandelGenerator;
  14. using mnd::AdaptiveGenerator;
  15. template<typename T, typename U>
  16. static std::map<U, T> invertMap(const std::map<T, U>& m)
  17. {
  18. std::map<U, T> res;
  19. std::transform(m.begin(), m.end(), std::inserter(res, res.end()), [](auto& pair) {
  20. return std::pair{ pair.second, pair.first };
  21. });
  22. return res;
  23. }
  24. static const std::map<mnd::GeneratorType, std::string> typeNames =
  25. {
  26. { mnd::GeneratorType::FLOAT, "float" },
  27. { mnd::GeneratorType::FLOAT_SSE2, "float SSE2" },
  28. { mnd::GeneratorType::FLOAT_AVX, "float AVX" },
  29. { mnd::GeneratorType::FLOAT_AVX_FMA, "float AVX+FMA" },
  30. { mnd::GeneratorType::FLOAT_AVX512, "float AVX512" },
  31. { mnd::GeneratorType::FLOAT_NEON, "float NEON" },
  32. { mnd::GeneratorType::DOUBLE_FLOAT, "double float" },
  33. { mnd::GeneratorType::DOUBLE, "double" },
  34. { mnd::GeneratorType::DOUBLE_SSE2, "double SSE2" },
  35. { mnd::GeneratorType::DOUBLE_AVX, "double AVX" },
  36. { mnd::GeneratorType::DOUBLE_AVX_FMA, "double AVX+FMA" },
  37. { mnd::GeneratorType::DOUBLE_AVX512, "double AVX512" },
  38. { mnd::GeneratorType::DOUBLE_NEON, "double NEON" },
  39. { mnd::GeneratorType::DOUBLE_DOUBLE, "double double" },
  40. { mnd::GeneratorType::DOUBLE_DOUBLE_AVX, "double double AVX" },
  41. { mnd::GeneratorType::DOUBLE_DOUBLE_AVX_FMA, "double double AVX+FMA" },
  42. { mnd::GeneratorType::DOUBLE_DOUBLE_NEON, "double double NEON" },
  43. { mnd::GeneratorType::QUAD_DOUBLE, "quad double" },
  44. { mnd::GeneratorType::QUAD_DOUBLE_AVX_FMA, "quad double AVX+FMA" },
  45. { mnd::GeneratorType::FLOAT128, "float128" },
  46. { mnd::GeneratorType::FLOAT256, "float256" },
  47. { mnd::GeneratorType::FIXED64, "fixed64" },
  48. { mnd::GeneratorType::FIXED128, "fixed128" },
  49. { mnd::GeneratorType::FIXED512, "fixed512" },
  50. };
  51. static const std::map<std::string, mnd::GeneratorType> nameTypes = invertMap(typeNames);
  52. namespace mnd
  53. {
  54. const std::string& getGeneratorName(mnd::GeneratorType type)
  55. {
  56. return typeNames.at(type);
  57. }
  58. mnd::GeneratorType getTypeFromName(const std::string& name)
  59. {
  60. return nameTypes.at(name);
  61. }
  62. }
  63. MandelContext mnd::initializeContext(void)
  64. {
  65. return MandelContext();
  66. }
  67. MandelDevice::MandelDevice(mnd::ClDeviceWrapper device, const std::string& platformName) :
  68. clDevice{ std::make_unique<ClDeviceWrapper>(std::move(device)) },
  69. platformName{ platformName }
  70. {
  71. extensions = clDevice->device.getInfo<CL_DEVICE_EXTENSIONS>();
  72. name = clDevice->device.getInfo<CL_DEVICE_NAME>();
  73. vendor = clDevice->device.getInfo<CL_DEVICE_VENDOR>();
  74. }
  75. mnd::MandelGenerator* MandelDevice::getGenerator(mnd::GeneratorType type) const
  76. {
  77. auto it = mandelGenerators.find(type);
  78. if (it != mandelGenerators.end())
  79. return it->second.get();
  80. else
  81. return nullptr;
  82. }
  83. std::vector<mnd::GeneratorType> MandelDevice::getSupportedTypes(void) const
  84. {
  85. std::vector<GeneratorType> types;
  86. for (auto& [type, gen] : mandelGenerators) {
  87. types.push_back(type);
  88. }
  89. return types;
  90. }
  91. bool MandelDevice::supportsDouble(void) const
  92. {
  93. return extensions.find("cl_khr_fp64") != std::string::npos;
  94. }
  95. MandelContext::MandelContext(void)
  96. #ifdef WITH_ASMJIT
  97. : jitRuntime{ std::make_unique<asmjit::JitRuntime>() }
  98. #endif // WITH_ASMJIT
  99. {
  100. #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
  101. # if defined(WITH_AVX512)
  102. if (cpuInfo.hasAvx512()) {
  103. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX_512, true>>();
  104. auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX_512, true>>();
  105. cpuGenerators.insert({ GeneratorType::FLOAT_AVX512, std::move(fl) });
  106. cpuGenerators.insert({ GeneratorType::DOUBLE_AVX512, std::move(db) });
  107. }
  108. # endif
  109. if (cpuInfo.hasAvx()) {
  110. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX, true>>();
  111. auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX, true>>();
  112. auto ddb = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX, true>>();
  113. cpuGenerators.insert({ GeneratorType::FLOAT_AVX, std::move(fl) });
  114. cpuGenerators.insert({ GeneratorType::DOUBLE_AVX, std::move(db) });
  115. cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE_AVX, std::move(ddb) });
  116. }
  117. if (cpuInfo.hasAvx2() && cpuInfo.hasFma()) {
  118. auto favxfma = std::make_unique<CpuGenerator<float, mnd::X86_AVX_FMA, true>>();
  119. auto davxfma = std::make_unique<CpuGenerator<double, mnd::X86_AVX_FMA, true>>();
  120. auto ddavxfma = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX_FMA, true>>();
  121. auto qdavxfma = std::make_unique<CpuGenerator<QuadDouble, mnd::X86_AVX_FMA, true>>();
  122. cpuGenerators.insert({ GeneratorType::FLOAT_AVX_FMA, std::move(favxfma) });
  123. cpuGenerators.insert({ GeneratorType::DOUBLE_AVX_FMA, std::move(davxfma) });
  124. cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE_AVX_FMA, std::move(ddavxfma) });
  125. cpuGenerators.insert({ GeneratorType::QUAD_DOUBLE_AVX_FMA, std::move(qdavxfma) });
  126. }
  127. if (cpuInfo.hasSse2()) {
  128. auto fl = std::make_unique<CpuGenerator<float, mnd::X86_SSE2, true>>();
  129. auto db = std::make_unique<CpuGenerator<double, mnd::X86_SSE2, true>>();
  130. cpuGenerators.insert({ GeneratorType::FLOAT_SSE2, std::move(fl) });
  131. cpuGenerators.insert({ GeneratorType::DOUBLE_SSE2, std::move(db) });
  132. }
  133. #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
  134. if (cpuInfo.hasNeon()) {
  135. auto fl = std::make_unique<CpuGenerator<float, mnd::ARM_NEON, true>>();
  136. auto db = std::make_unique<CpuGenerator<double, mnd::ARM_NEON, true>>();
  137. auto ddb = std::make_unique<CpuGenerator<mnd::DoubleDouble, mnd::ARM_NEON, true>>();
  138. cpuGenerators.insert({ GeneratorType::FLOAT_NEON, std::move(fl) });
  139. cpuGenerators.insert({ GeneratorType::DOUBLE_NEON, std::move(db) });
  140. cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE_NEON, std::move(ddb) });
  141. }
  142. #endif
  143. {
  144. auto fl = std::make_unique<CpuGenerator<float, mnd::NONE, true>>();
  145. auto db = std::make_unique<CpuGenerator<double, mnd::NONE, true>>();
  146. cpuGenerators.insert({ GeneratorType::FLOAT, std::move(fl) });
  147. cpuGenerators.insert({ GeneratorType::DOUBLE, std::move(db) });
  148. auto fx64 = std::make_unique<CpuGenerator<Fixed64, mnd::NONE, true>>();
  149. auto fx128 = std::make_unique<CpuGenerator<Fixed128, mnd::NONE, true>>();
  150. cpuGenerators.insert({ GeneratorType::FIXED64, std::move(fx64) });
  151. cpuGenerators.insert({ GeneratorType::FIXED128, std::move(fx128) });
  152. }
  153. #ifdef WITH_BOOST
  154. auto quad = std::make_unique<CpuGenerator<Float128, mnd::NONE, true>>();
  155. auto oct = std::make_unique<CpuGenerator<Float256, mnd::NONE, true>>();
  156. cpuGenerators.insert({ GeneratorType::FLOAT128, std::move(quad) });
  157. cpuGenerators.insert({ GeneratorType::FLOAT256, std::move(oct) });
  158. #endif // WITH_BOOST
  159. auto dd = std::make_unique<CpuGenerator<DoubleDouble, mnd::NONE, true>>();
  160. auto qd = std::make_unique<CpuGenerator<QuadDouble, mnd::NONE, true>>();
  161. cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE, std::move(dd) });
  162. cpuGenerators.insert({ GeneratorType::QUAD_DOUBLE, std::move(qd) });
  163. auto fix512 = std::make_unique<CpuGenerator<Fixed512, mnd::NONE, true>>();
  164. cpuGenerators.insert({ GeneratorType::FIXED512, std::move(fix512) });
  165. devices = createDevices();
  166. adaptiveGenerator = createAdaptiveGenerator();
  167. }
  168. std::unique_ptr<mnd::AdaptiveGenerator> MandelContext::createAdaptiveGenerator(void)
  169. {
  170. auto* floatGen = getCpuGenerator(GeneratorType::FLOAT);
  171. auto* doubleGen = getCpuGenerator(GeneratorType::DOUBLE);
  172. auto* doubleDoubleGen = getCpuGenerator(GeneratorType::DOUBLE_DOUBLE);
  173. auto* quadDoubleGen = getCpuGenerator(GeneratorType::QUAD_DOUBLE);
  174. auto* f256Gen = getCpuGenerator(GeneratorType::FLOAT256);
  175. auto* fix512 = getCpuGenerator(GeneratorType::FIXED512);
  176. if (cpuInfo.hasAvx()) {
  177. floatGen = getCpuGenerator(GeneratorType::FLOAT_AVX);
  178. doubleGen = getCpuGenerator(GeneratorType::DOUBLE_AVX);
  179. }
  180. else if (cpuInfo.hasSse2()) {
  181. floatGen = getCpuGenerator(GeneratorType::FLOAT_SSE2);
  182. doubleGen = getCpuGenerator(GeneratorType::DOUBLE_SSE2);
  183. }
  184. if (cpuInfo.hasAvx2() && cpuInfo.hasFma()) {
  185. floatGen = getCpuGenerator(GeneratorType::FLOAT_AVX_FMA);
  186. doubleGen = getCpuGenerator(GeneratorType::DOUBLE_AVX_FMA);
  187. doubleDoubleGen = getCpuGenerator(GeneratorType::DOUBLE_DOUBLE_AVX_FMA);
  188. quadDoubleGen = getCpuGenerator(GeneratorType::QUAD_DOUBLE_AVX_FMA);
  189. }
  190. if (cpuInfo.hasAvx512()) {
  191. floatGen = getCpuGenerator(GeneratorType::FLOAT_AVX512);
  192. doubleGen = getCpuGenerator(GeneratorType::DOUBLE_AVX512);
  193. }
  194. if (cpuInfo.hasNeon()) {
  195. floatGen = getCpuGenerator(GeneratorType::FLOAT_NEON);
  196. doubleGen = getCpuGenerator(GeneratorType::DOUBLE_NEON);
  197. doubleDoubleGen = getCpuGenerator(GeneratorType::DOUBLE_DOUBLE_NEON);
  198. }
  199. if (!devices.empty()) {
  200. auto& device = devices[0];
  201. auto* fGen = device->getGenerator(GeneratorType::FLOAT);
  202. auto* dGen = device->getGenerator(GeneratorType::DOUBLE);
  203. auto* ddGen = device->getGenerator(GeneratorType::DOUBLE_DOUBLE);
  204. auto* qdGen = device->getGenerator(GeneratorType::QUAD_DOUBLE);
  205. if (fGen)
  206. floatGen = fGen;
  207. if (dGen)
  208. doubleGen = dGen;
  209. if (ddGen)
  210. doubleDoubleGen = ddGen;
  211. if (qdGen)
  212. quadDoubleGen = qdGen;
  213. }
  214. auto ag = std::make_unique<AdaptiveGenerator>();
  215. ag->addGenerator(getPrecision<float>(), *floatGen);
  216. ag->addGenerator(getPrecision<double>(), *doubleGen);
  217. ag->addGenerator(getPrecision<DoubleDouble>(), *doubleDoubleGen);
  218. ag->addGenerator(getPrecision<QuadDouble>(), *quadDoubleGen);
  219. ag->addGenerator(getPrecision<Float256>(), *f256Gen);
  220. ag->addGenerator(Precision::INF_PREC, *fix512);
  221. return ag;
  222. }
  223. std::vector<std::unique_ptr<MandelDevice>> MandelContext::createDevices(void)
  224. {
  225. std::vector<std::unique_ptr<MandelDevice>> mandelDevices;
  226. #ifdef WITH_OPENCL
  227. std::vector<cl::Platform> platforms;
  228. cl::Platform::get(&platforms);
  229. //platforms.erase(platforms.begin() + 1);
  230. for (auto& platform : platforms) {
  231. std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
  232. std::string profile = platform.getInfo<CL_PLATFORM_PROFILE>();
  233. //printf("using opencl platform: %s\n", platformName.c_str());
  234. std::string ext = platform.getInfo<CL_PLATFORM_EXTENSIONS>();
  235. //printf("Platform extensions: %s\n", ext.c_str());
  236. //printf("Platform: %s, %s\n", platformName.c_str(), profile.c_str());
  237. std::vector<cl::Device> devices;
  238. platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
  239. auto onError = [] (const char* errinfo,
  240. const void* private_info,
  241. size_t cb,
  242. void* user_data) {
  243. printf("opencl error: %s\n", errinfo);
  244. };
  245. cl::Context context{ devices, nullptr, onError };
  246. for (auto& device : devices) {
  247. //printf("Device: %s\n", device.getInfo<CL_DEVICE_NAME>().c_str());
  248. //printf("preferred float width: %d\n", device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
  249. //printf("vendor: %s\n", device.getInfo<CL_DEVICE_VENDOR>().c_str());
  250. //printf("Device extensions: %s\n", ext.c_str());
  251. auto mandelDevice = std::make_unique<mnd::MandelDevice>(
  252. ClDeviceWrapper{ device, context }, platformName);
  253. MandelDevice& md = *mandelDevice;
  254. auto supportsDouble = md.supportsDouble();
  255. //printf("clock: %d", device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>());
  256. //printf(" using opencl device: %s\n", md.name.c_str());
  257. try {
  258. md.mandelGenerators.insert({ GeneratorType::FLOAT, std::make_unique<ClGeneratorFloat>(md) });
  259. md.mandelGenerators.insert({ GeneratorType::FIXED64, std::make_unique<ClGenerator64>(md) });
  260. //md.mandelGenerators.insert({ GeneratorType::FIXED128, std::make_unique<ClGenerator128>(md) });
  261. md.mandelGenerators.insert({ GeneratorType::DOUBLE_FLOAT, std::make_unique<ClGeneratorDoubleFloat>(md) });
  262. }
  263. catch (const std::string& err) {
  264. printf("err: %s", err.c_str());
  265. }
  266. if (supportsDouble) {
  267. try {
  268. md.mandelGenerators.insert({ GeneratorType::DOUBLE, std::make_unique<ClGeneratorDouble>(md) });
  269. md.mandelGenerators.insert({ GeneratorType::DOUBLE_DOUBLE, std::make_unique<ClGeneratorDoubleDouble>(md) });
  270. md.mandelGenerators.insert({ GeneratorType::QUAD_DOUBLE, std::make_unique<ClGeneratorQuadDouble>(md) });
  271. }
  272. catch (const std::string& err) {
  273. printf("err: %s", err.c_str());
  274. fflush(stdout);
  275. }
  276. }
  277. try {
  278. //md.generator128 = std::make_unique<ClGenerator128>(device);
  279. }
  280. catch (const std::string& /*err*/) {
  281. //fprintf(stderr, "error creating 128bit cl generator: %s\n", err.c_str());
  282. }
  283. mandelDevices.push_back(std::move(mandelDevice));
  284. }
  285. }
  286. #endif // WITH_OPENCL
  287. return mandelDevices;
  288. }
  289. MandelContext::~MandelContext(void)
  290. {
  291. }
  292. AdaptiveGenerator& MandelContext::getDefaultGenerator(void)
  293. {
  294. return *adaptiveGenerator;
  295. }
  296. std::vector<std::unique_ptr<mnd::MandelDevice>>& MandelContext::getDevices(void)
  297. {
  298. return devices;
  299. }
  300. asmjit::JitRuntime& MandelContext::getJitRuntime(void)
  301. {
  302. return *jitRuntime;
  303. }
  304. MandelGenerator* MandelContext::getCpuGenerator(mnd::GeneratorType type)
  305. {
  306. auto it = cpuGenerators.find(type);
  307. if (it != cpuGenerators.end())
  308. return it->second.get();
  309. else
  310. return nullptr;
  311. }
  312. std::vector<mnd::GeneratorType> MandelContext::getSupportedTypes(void) const
  313. {
  314. std::vector<GeneratorType> types;
  315. for (auto& [type, gen] : cpuGenerators) {
  316. types.push_back(type);
  317. }
  318. return types;
  319. }