4 năm trước cách đây · d2b3b8a525
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,6 +49,8 @@ ENDIF()
 
															 target_include_directories(Almond PUBLIC include)
														
 
															+set(MANDEL_PLUGIN_DIR ${CMAKE_BINARY_DIR}/plugins)
														
 
															+set(CMAKE_ENABLE_EXPORTS ON)
														
 
															 add_subdirectory(libalmond)
														
 
															 target_include_directories(Almond SYSTEM PUBLIC ${FFMPEG_INCLUDE_DIRS})
														
--- a/libmandel/CMakeLists.txt
+++ b/libmandel/CMakeLists.txt
@@ -9,7 +9,7 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
 
															 else()
														
 
															     set(MANDEL_TARGET_ARCHITECTURE "x86_64" CACHE STRING "Target Architecture")
														
 
															 endif()
														
 
															-option(MANDEL_AVX512 "generate code that can make use of avx-512-instructions" ON)
														
 
															+option(MANDEL_AVX512 "generate plugin that can make use of avx-512-instructions" ON)
														
 
															 option(MANDEL_ASMJIT "use just-in-time-compilation library asmjit" ON)
														
 
															 option(MANDEL_OPENCL "use opencl to offload calculations on GPU devices" ON)
														
 
															 option(MANDEL_BUILD_NATIVE
														
@@ -44,6 +44,7 @@ SET(MandelSources
 
															     src/NaiveIRGenerator.cpp
														
 
															     src/FloatLog.cpp
														
 
															     src/Benchmark.cpp
														
 
															+    src/CalcPlugin.cpp
														
 
															 )
														
 
															 FILE(GLOB MandelHeaders include/*.h)
														
@@ -59,7 +60,7 @@ elseif(MANDEL_TARGET_ARCHITECTURE STREQUAL "aarch64")
 
															 endif()
														
 
															-# use both flags just to be sure
														
 
															+# use both flags (mtune & march) just to be sure
														
 
															 CHECK_CXX_COMPILER_FLAG("-march=native" MARCH_NATIVE_SUPPORTED)
														
 
															 CHECK_CXX_COMPILER_FLAG("-mtune=native" MTUNE_NATIVE_SUPPORTED)
														
 
															 if(MARCH_NATIVE_SUPPORTED AND MANDEL_BUILD_NATIVE)
														
@@ -72,22 +73,39 @@ endif()
 
															 add_executable(resourcec resourcec/resourcec.cpp)
														
 
															 add_custom_command(
														
 
															-    OUTPUT OpenClCode.cpp
														
 
															+    OUTPUT  ${CMAKE_CURRENT_BINARY_DIR}/OpenClCode.cpp
														
 
															     BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/OpenClCode.h
														
 
															-    COMMAND resourcec ARGS -n mnd::cl_src -d ${CMAKE_CURRENT_BINARY_DIR}/OpenClCode.h -o OpenClCode.cpp
														
 
															+    COMMAND resourcec ARGS -n mnd::cl_src -d ${CMAKE_CURRENT_BINARY_DIR}/OpenClCode.h -o ${CMAKE_CURRENT_BINARY_DIR}/OpenClCode.cpp
														
 
															     SOURCES ${MandelClSources}
														
 
															     WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}
														
 
															     COMMENT "Packaging Opencl Resources"
														
 
															+    VERBATIM
														
 
															 )
														
 
															+set(CMAKE_ENABLE_EXPORTS ON)
														
 
															 if(OPENCL_FOUND AND MANDEL_OPENCL)
														
 
															     add_library(mandel STATIC ${MandelSources} OpenClCode.cpp)
														
 
															 else()
														
 
															     add_library(mandel STATIC ${MandelSources})
														
 
															 endif()
														
 
															+target_link_libraries(mandel PUBLIC ${CMAKE_DL_LIBS})
														
 
															+
														
 
															+
														
 
															+# avx+fma plugin
														
 
															+add_library(avxfma MODULE src/plugins/CpuGeneratorsAVXFMA.cpp)
														
 
															+set_target_properties(avxfma PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${MANDEL_PLUGIN_DIR})
														
 
															+target_include_directories(avxfma PUBLIC "include")
														
 
															+add_dependencies(mandel avxfma)
														
 
															+
														
 
															+# avx512 plugin
														
 
															+add_library(avx512 MODULE src/plugins/CpuGeneratorsAVX512.cpp)
														
 
															+set_target_properties(avx512 PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${MANDEL_PLUGIN_DIR})
														
 
															+target_include_directories(avx512 PUBLIC "include")
														
 
															+add_dependencies(mandel avx512)
														
 
															+
														
 
															 target_include_directories(mandel PUBLIC "include")
														
 
															 target_include_directories(mandel PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
														
@@ -104,9 +122,6 @@ if(OPENCL_FOUND AND MANDEL_OPENCL)
 
															     target_include_directories(mandel SYSTEM PUBLIC "include_cl")
														
 
															     link_directories(${OpenCL_LIBRARY})
														
 
															     target_link_libraries(mandel PUBLIC OpenCL::OpenCL)
														
 
															-
														
 
															-    #add_subdirectory(resourcec)
														
 
															-
														
 
															 else()
														
 
															 endif()
														
@@ -117,6 +132,8 @@ endif()
 
															 if(OpenMP_CXX_FOUND)
														
 
															     target_link_libraries(mandel PUBLIC OpenMP::OpenMP_CXX)
														
 
															+    target_link_libraries(avx512 PUBLIC OpenMP::OpenMP_CXX)
														
 
															+    target_link_libraries(avxfma PUBLIC OpenMP::OpenMP_CXX)
														
 
															 endif()
														
 
															 if(Boost_FOUND)
														
@@ -130,18 +147,22 @@ if (MANDEL_TARGET_ARCHITECTURE STREQUAL "x86_64" OR MANDEL_TARGET_ARCHITECTURE S
 
															         target_compile_definitions(mandel PUBLIC WITH_AVX512)
														
 
															         if (MSVC)
														
 
															             set_source_files_properties(src/CpuGeneratorsAVX512.cpp PROPERTIES COMPILE_FLAGS /arch:AVX512F)
														
 
															+            set_source_files_properties(src/plugins/CpuGeneratorsAVX512.cpp PROPERTIES COMPILE_FLAGS /arch:AVX512F)
														
 
															         else()
														
 
															             set_source_files_properties(src/CpuGeneratorsAVX512.cpp PROPERTIES COMPILE_FLAGS -mavx512f)
														
 
															+            set_source_files_properties(src/plugins/CpuGeneratorsAVX512.cpp PROPERTIES COMPILE_FLAGS -mavx512f)
														
 
															         endif(MSVC)
														
 
															     endif()
														
 
															     if (MSVC)
														
 
															         set_source_files_properties(src/CpuGeneratorsAVX.cpp PROPERTIES COMPILE_FLAGS /arch:AVX)
														
 
															         set_source_files_properties(src/CpuGeneratorsAVXFMA.cpp PROPERTIES COMPILE_FLAGS /arch:AVX2)
														
 
															+        set_source_files_properties(src/plugins/CpuGeneratorsAVXFMA.cpp PROPERTIES COMPILE_FLAGS /arch:AVX2)
														
 
															         set_source_files_properties(src/CpuGeneratorsSSE2.cpp PROPERTIES COMPILE_FLAGS /arch:SSE2)
														
 
															     else()
														
 
															         set_source_files_properties(src/CpuGeneratorsAVX.cpp PROPERTIES COMPILE_FLAGS -mavx)
														
 
															         set_source_files_properties(src/CpuGeneratorsAVXFMA.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
														
 
															+        set_source_files_properties(src/plugins/CpuGeneratorsAVXFMA.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
														
 
															         set_source_files_properties(src/CpuGeneratorsSSE2.cpp PROPERTIES COMPILE_FLAGS -msse2)
														
 
															     endif(MSVC)
														
--- a/libmandel/include/CalcPlugin.h
+++ b/libmandel/include/CalcPlugin.h
@@ -0,0 +1,51 @@
 
															+#ifndef MANDEL_CALCPLUGIN_H
														
 
															+#define MANDEL_CALCPLUGIN_H
														
 
															+
														
 
															+#include <string>
														
 
															+
														
 
															+#if defined(__GNUC__)
														
 
															+    #define MANDEL_EXPORT __attribute__((visibility("default")))
														
 
															+    #define MANDEL_IMPORT
														
 
															+#elif defined(_MSC_VER)
														
 
															+    #define MANDEL_EXPORT __declspec(dllexport)
														
 
															+    #define MANDEL_IMPORT __declspec(dllimport)
														
 
															+#else
														
 
															+    #define MANDEL_EXPORT
														
 
															+    #define MANDEL_IMPORT
														
 
															+#endif
														
 
															+
														
 
															+namespace mnd
														
 
															+{
														
 
															+    class CalcPlugin;
														
 
															+
														
 
															+    class MandelGenerator;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+class MANDEL_EXPORT mnd::CalcPlugin
														
 
															+{
														
 
															+    void* handle;
														
 
															+public:
														
 
															+    CalcPlugin(const std::string& path);
														
 
															+    CalcPlugin(const CalcPlugin&) = delete;
														
 
															+    CalcPlugin(CalcPlugin&&) = default;
														
 
															+    ~CalcPlugin(void);
														
 
															+    CalcPlugin& operator=(const CalcPlugin&) = delete;
														
 
															+    CalcPlugin& operator=(CalcPlugin&&) = default;
														
 
															+
														
 
															+    ///
														
 
															+    /// \brief gets the generators provided by this plugin
														
 
															+    /// \return a vector containing the generators provided by this
														
 
															+    ///         plugin. If no plugin was loaded, the vector is empty.
														
 
															+    ///
														
 
															+    /// \note The returned generators (vector incl.) have the same lifetime as
														
 
															+    ///       the plugin i.e. they are no longer valid should the plugin
														
 
															+    ///       struct be destroyed.
														
 
															+    ///
														
 
															+    const std::vector<mnd::MandelGenerator*>& getGenerators(void);
														
 
															+
														
 
															+    inline bool isValid(void) const { return handle != nullptr; }
														
 
															+};
														
 
															+
														
 
															+
														
 
															+#endif // MANDEL_CALCPLUGIN
														
--- a/libmandel/include/Generators.h
+++ b/libmandel/include/Generators.h
@@ -88,7 +88,7 @@ public:
 
															     {
														
 
															     }
														
 
															-    virtual ~MandelGenerator(void);
														
 
															+    virtual ~MandelGenerator(void) = default;
														
 
															     MandelGenerator(const MandelGenerator&) = default;
														
--- a/libmandel/include/Mandel.h
+++ b/libmandel/include/Mandel.h
@@ -21,6 +21,7 @@ namespace asmjit { class JitRuntime{}; }
 
															 #include "IterationGenerator.h"
														
 
															 #include "CpuGenerators.h"
														
 
															 #include "Hardware.h"
														
 
															+#include "CalcPlugin.h"
														
 
															 namespace mnd
														
 
															 {
														
@@ -77,7 +78,20 @@ private:
 
															     CpuInfo cpuInfo;
														
 
															     std::unique_ptr<asmjit::JitRuntime> jitRuntime;
														
 
															-    std::map<GeneratorType, std::unique_ptr<MandelGenerator>> cpuGenerators;
														
 
															+    ///
														
 
															+    /// \brief list of standard mandel generators implemented in c++
														
 
															+    ///
														
 
															+    /// This is an owning list of Generators that can be used regardless of
														
 
															+    /// Cpu type as they are implemented in standard c++ and are integrated
														
 
															+    /// into libmandel.
														
 
															+    ///
														
 
															+    std::vector<std::unique_ptr<MandelGenerator>> defaultGenerators;
														
 
															+    std::vector<std::unique_ptr<CalcPlugin>> loadedPlugins;
														
 
															+
														
 
															+    ///
														
 
															+    /// \brief all cpu generators currently available
														
 
															+    ///
														
 
															+    std::map<GeneratorType, MandelGenerator*> cpuGenerators;
														
 
															     std::unique_ptr<AdaptiveGenerator> adaptiveGenerator;
														
@@ -94,6 +108,8 @@ public:
 
															     MandelContext& operator=(const MandelContext&) = delete;
														
 
															     MandelContext& operator=(MandelContext&&) = default;
														
 
															+    void loadPlugin(std::unique_ptr<CalcPlugin> cp);
														
 
															+
														
 
															     AdaptiveGenerator& getDefaultGenerator(void);
														
 
															     std::vector<std::unique_ptr<mnd::MandelDevice>>& getDevices(void);
														
--- a/libmandel/src/CalcPlugin.cpp
+++ b/libmandel/src/CalcPlugin.cpp
@@ -0,0 +1,40 @@
 
															+#include "CalcPlugin.h"
														
 
															+#include <dlfcn.h>
														
 
															+#include <stdexcept>
														
 
															+#include <vector>
														
 
															+
														
 
															+
														
 
															+using mnd::CalcPlugin;
														
 
															+
														
 
															+CalcPlugin::CalcPlugin(const std::string& path)
														
 
															+{
														
 
															+    handle = dlopen(path.c_str(), RTLD_LAZY);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+CalcPlugin::~CalcPlugin(void)
														
 
															+{
														
 
															+    if (handle != nullptr) {
														
 
															+         dlclose(handle);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+
														
 
															+const std::vector<mnd::MandelGenerator*>& CalcPlugin::getGenerators(void)
														
 
															+{
														
 
															+    static std::vector<mnd::MandelGenerator*> empty = {};
														
 
															+    if (!isValid()) {
														
 
															+        return {};
														
 
															+    }
														
 
															+
														
 
															+    using GeneratorGetter =
														
 
															+        const typename std::vector<mnd::MandelGenerator*>& (*)(void);
														
 
															+    GeneratorGetter gg =
														
 
															+            GeneratorGetter(dlsym(handle, "mandel_get_generators"));
														
 
															+    if (gg != nullptr) {
														
 
															+        return gg();
														
 
															+    }
														
 
															+    else {
														
 
															+        return nullptr;
														
 
															+    }
														
 
															+}
														
--- a/libmandel/src/Generators.cpp
+++ b/libmandel/src/Generators.cpp
@@ -42,12 +42,6 @@ namespace mnd
 
															 }
														
 
															-
														
 
															-MandelGenerator::~MandelGenerator(void)
														
 
															-{
														
 
															-}
														
 
															-
														
 
															-
														
 
															 mnd::MandelDevice* MandelGenerator::getDevice(void)
														
 
															 {
														
 
															     return nullptr;
														
--- a/libmandel/src/Mandel.cpp
+++ b/libmandel/src/Mandel.cpp
@@ -74,7 +74,8 @@ MandelContext::MandelContext(void)
 
															     if (cpuInfo.hasAvx512()) {
														
 
															         auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX_512, true>>();
														
 
															         //auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX_512, true>>();
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::FLOAT, HardwareFeature::X86_AVX_512 }, std::move(fl) });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::FLOAT, HardwareFeature::X86_AVX_512 }, fl.get() });
														
 
															+        defaultGenerators.push_back(std::move(fl));
														
 
															         //cpuGenerators.insert({ { Precision::DOUBLE, CpuExtension::X86_AVX_512 }, std::move(db) });
														
 
															     }
														
 
															 #   endif
														
@@ -83,12 +84,17 @@ MandelContext::MandelContext(void)
 
															         auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX, true>>();
														
 
															         auto ddb = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX, true>>();
														
 
															         auto tdb = std::make_unique<CpuGenerator<TripleDouble, mnd::X86_AVX, true>>();
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::FLOAT, HardwareFeature::X86_AVX }, std::move(fl) });
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::DOUBLE, HardwareFeature::X86_AVX }, std::move(db) });
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, HardwareFeature::X86_AVX }, std::move(ddb) });
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::TRIPLE_DOUBLE, HardwareFeature::X86_AVX }, std::move(tdb) });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::FLOAT, HardwareFeature::X86_AVX }, fl.get() });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::DOUBLE, HardwareFeature::X86_AVX }, db.get() });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, HardwareFeature::X86_AVX }, ddb.get() });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::TRIPLE_DOUBLE, HardwareFeature::X86_AVX }, tdb.get() });
														
 
															+
														
 
															+        defaultGenerators.push_back(std::move(fl));
														
 
															+        defaultGenerators.push_back(std::move(db));
														
 
															+        defaultGenerators.push_back(std::move(ddb));
														
 
															+        defaultGenerators.push_back(std::move(tdb));
														
 
															     }
														
 
															-    if (cpuInfo.hasAvx2() && cpuInfo.hasFma()) {
														
 
															+    /*if (cpuInfo.hasAvx2() && cpuInfo.hasFma()) {
														
 
															         auto favxfma = std::make_unique<CpuGenerator<float, mnd::X86_AVX_FMA, true>>();
														
 
															         auto davxfma = std::make_unique<CpuGenerator<double, mnd::X86_AVX_FMA, true>>();
														
 
															         auto ddavxfma = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX_FMA, true>>();
														
@@ -115,19 +121,26 @@ MandelContext::MandelContext(void)
 
															         cpuGenerators.insert({ std::pair{ Precision::DOUBLE, CpuExtension::ARM_NEON }, std::move(db) });
														
 
															         cpuGenerators.insert({ std::pair{ Precision::DOUBLE_DOUBLE, CpuExtension::ARM_NEON }, std::move(ddb) });
														
 
															     }
														
 
															+    */
														
 
															 #endif
														
 
															     {
														
 
															         auto fl = std::make_unique<CpuGenerator<float, mnd::NONE, true>>();
														
 
															         auto db = std::make_unique<CpuGenerator<double, mnd::NONE, true>>();
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::FLOAT, HardwareFeature::NONE }, std::move(fl) });
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::DOUBLE, HardwareFeature::NONE }, std::move(db) });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::FLOAT, HardwareFeature::NONE }, fl.get() });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::DOUBLE, HardwareFeature::NONE }, db.get() });
														
 
															         auto fx64 = std::make_unique<CpuGenerator<Fixed64, mnd::NONE, true>>();
														
 
															         auto fx128 = std::make_unique<CpuGenerator<Fixed128, mnd::NONE, true>>();
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::FIXED64, HardwareFeature::NONE }, std::move(fx64) });
														
 
															-        cpuGenerators.insert({ std::pair{ Precision::FIXED128, HardwareFeature::NONE }, std::move(fx128) });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::FIXED64, HardwareFeature::NONE }, fx64.get() });
														
 
															+        cpuGenerators.insert({ std::pair{ Precision::FIXED128, HardwareFeature::NONE }, fx128.get() });
														
 
															+
														
 
															+        defaultGenerators.push_back(std::move(fl));
														
 
															+        defaultGenerators.push_back(std::move(db));
														
 
															+        defaultGenerators.push_back(std::move(fx64));
														
 
															+        defaultGenerators.push_back(std::move(fx128));
														
 
															     }
														
 
															+    /*
														
 
															 #ifdef WITH_BOOST
														
 
															     auto quad = std::make_unique<CpuGenerator<Float128, mnd::NONE, true>>();
														
 
															     auto oct = std::make_unique<CpuGenerator<Float256, mnd::NONE, true>>();
														
@@ -151,7 +164,7 @@ MandelContext::MandelContext(void)
 
															     auto fix512 = std::make_unique<CpuGenerator<Fixed512, mnd::NONE, true>>();
														
 
															     cpuGenerators.insert({ std::pair{ Precision::FIXED512, HardwareFeature::NONE }, std::move(fix512) });
														
 
															-
														
 
															+    */
														
 
															     devices = createDevices();
														
 
															     adaptiveGenerator = createAdaptiveGenerator();
														
@@ -335,6 +348,22 @@ MandelContext::~MandelContext(void)
 
															 }
														
 
															+void MandelContext::loadPlugin(std::unique_ptr<CalcPlugin> cp)
														
 
															+{
														
 
															+    auto&& gens = cp->getGenerators();
														
 
															+    if (auto&& gen : gens) {
														
 
															+        cpuGenerators.insert({ GeneratorType{ gen->getType(), gen->getExtension() }, gen });
														
 
															+        if (adaptiveGenerator) {
														
 
															+            adaptiveGenerator->addGenerator(*gen);
														
 
															+        }
														
 
															+        loadedPlugins.push_back(std::move(cp));
														
 
															+    }
														
 
															+    else {
														
 
															+        printf("ouh nouh\n");
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+
														
 
															 AdaptiveGenerator& MandelContext::getDefaultGenerator(void)
														
 
															 {
														
 
															     return *adaptiveGenerator;
														
@@ -357,7 +386,7 @@ MandelGenerator* MandelContext::getCpuGenerator(mnd::Precision type, mnd::Hardwa
 
															 {
														
 
															     auto it = cpuGenerators.find({ type, ex });
														
 
															     if (it != cpuGenerators.end())
														
 
															-        return it->second.get();
														
 
															+        return it->second;
														
 
															     else
														
 
															         return nullptr;
														
 
															 }
														
@@ -378,7 +407,7 @@ std::vector<MandelGenerator*> MandelContext::getCpuGenerators(mnd::Precision pre
 
															     std::vector<MandelGenerator*> generators;
														
 
															     for (const auto& [type, gen] : cpuGenerators) {
														
 
															         if (type.first == prec)
														
 
															-            generators.push_back(gen.get());
														
 
															+            generators.push_back(gen);
														
 
															     }
														
 
															     return generators;
														
 
															 }
														
--- a/libmandel/src/plugins/CpuGeneratorsAVX512.cpp
+++ b/libmandel/src/plugins/CpuGeneratorsAVX512.cpp
@@ -0,0 +1,30 @@
 
															+#include <immintrin.h>
														
 
															+#include <omp.h>
														
 
															+
														
 
															+#include <vector>
														
 
															+#include <cmath>
														
 
															+#include "CpuGenerators.h"
														
 
															+
														
 
															+class CpuGeneratorFloatAVX512 : public mnd::MandelGenerator
														
 
															+{
														
 
															+public:
														
 
															+    CpuGeneratorFloatAVX512(void) :
														
 
															+        MandelGenerator{ mnd::Precision::FLOAT, mnd::X86_AVX_512 }
														
 
															+    {
														
 
															+    }
														
 
															+
														
 
															+    virtual void generate(const mnd::MandelInfo& info, float* data) override;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+extern "C" const std::vector<mnd::MandelGenerator*>& mandel_get_generators(void)
														
 
															+{
														
 
															+    static CpuGeneratorFloatAVX512 instance;
														
 
															+    static std::vector<mnd::MandelGenerator*> vec { &instance };
														
 
															+    return vec;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+void CpuGeneratorFloatAVX512::generate(const mnd::MandelInfo& info, float* data)
														
 
															+{
														
 
															+}
														
--- a/libmandel/src/plugins/CpuGeneratorsAVXFMA.cpp
+++ b/libmandel/src/plugins/CpuGeneratorsAVXFMA.cpp
@@ -0,0 +1,204 @@
 
															+#include <immintrin.h>
														
 
															+#include <omp.h>
														
 
															+
														
 
															+#include <cmath>
														
 
															+#include <vector>
														
 
															+#include "CpuGenerators.h"
														
 
															+
														
 
															+#include "LightDoubleDouble.h"
														
 
															+#include "QuadDouble.h"
														
 
															+#include "HexDouble.h"
														
 
															+
														
 
															+class CpuGeneratorFloatAVXFMA : public mnd::MandelGenerator
														
 
															+{
														
 
															+public:
														
 
															+    CpuGeneratorFloatAVXFMA(void) :
														
 
															+        MandelGenerator{ mnd::Precision::FLOAT, mnd::X86_AVX_FMA }
														
 
															+    {
														
 
															+    }
														
 
															+
														
 
															+    virtual void generate(const mnd::MandelInfo& info, float* data) override;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+extern "C" const std::vector<mnd::MandelGenerator*>& mandel_get_generators(void)
														
 
															+{
														
 
															+    static CpuGeneratorFloatAVXFMA instance;
														
 
															+    static std::vector<mnd::MandelGenerator*> vec { &instance };
														
 
															+    return vec;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+void CpuGeneratorFloatAVXFMA::generate(const mnd::MandelInfo& info, float* data)
														
 
															+{
														
 
															+    const bool parallel = true;
														
 
															+
														
 
															+    using T = float;
														
 
															+
														
 
															+    const auto& view = info.view;
														
 
															+    const T vx = mnd::convert<T>(view.x);
														
 
															+    const T vy = mnd::convert<T>(view.y);
														
 
															+    const T vw = mnd::convert<T>(view.width);
														
 
															+    const T vh = mnd::convert<T>(view.height);
														
 
															+
														
 
															+    const T jX = mnd::convert<T>(info.juliaX);
														
 
															+    const T jY = mnd::convert<T>(info.juliaY);
														
 
															+
														
 
															+    const float dppf = float(vw / info.bWidth);
														
 
															+    const float viewxf = vx; 
														
 
															+    __m256 viewx = { viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf };
														
 
															+    __m256 dpp = { dppf, dppf, dppf, dppf, dppf, dppf, dppf, dppf };
														
 
															+
														
 
															+    __m256 juliaX = { jX, jX, jX, jX, jX, jX, jX, jX };
														
 
															+    __m256 juliaY = { jY, jY, jY, jY, jY, jY, jY, jY };
														
 
															+
														
 
															+#if defined(_OPENMP)
														
 
															+    if (parallel)
														
 
															+        omp_set_num_threads(omp_get_num_procs());
														
 
															+#   pragma omp parallel for schedule(static, 1) if (parallel)
														
 
															+#endif
														
 
															+    for (long j = 0; j < info.bHeight; j++) {
														
 
															+        T y = vy + T(j) * vw / info.bHeight;
														
 
															+        __m256 ys = {y, y, y, y, y, y, y, y};
														
 
															+        for (long i = 0; i < info.bWidth; i += 24) {
														
 
															+            __m256 pixc = { float(i), float(i + 1), float(i + 2), float(i + 3), float(i + 4), float(i + 5), float(i + 6), float(i + 7) };
														
 
															+            __m256 pixc2 = { float(i + 8), float(i + 9), float(i + 10), float(i + 11), float(i + 12), float(i + 13), float(i + 14), float(i + 15) };
														
 
															+            __m256 pixc3 = { float(i + 16), float(i + 17), float(i + 18), float(i + 19), float(i + 20), float(i + 21), float(i + 22), float(i + 23) };
														
 
															+
														
 
															+            __m256 xs = _mm256_add_ps(_mm256_mul_ps(dpp, pixc), viewx);
														
 
															+            __m256 xs2 = _mm256_add_ps(_mm256_mul_ps(dpp, pixc2), viewx);
														
 
															+            __m256 xs3 = _mm256_add_ps(_mm256_mul_ps(dpp, pixc3), viewx);
														
 
															+
														
 
															+            __m256 counter = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+            __m256 adder = { 1, 1, 1, 1, 1, 1, 1, 1 };
														
 
															+            __m256 resultsa = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+            __m256 resultsb = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+
														
 
															+            __m256 counter2 = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+            __m256 adder2 = { 1, 1, 1, 1, 1, 1, 1, 1 };
														
 
															+            __m256 resultsa2 = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+            __m256 resultsb2 = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+
														
 
															+            __m256 counter3 = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+            __m256 adder3 = { 1, 1, 1, 1, 1, 1, 1, 1 };
														
 
															+            __m256 resultsa3 = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+            __m256 resultsb3 = { 0, 0, 0, 0, 0, 0, 0, 0 };
														
 
															+
														
 
															+            __m256 threshold = { 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f };
														
 
															+            __m256 two = { 2, 2, 2, 2, 2, 2, 2, 2 };
														
 
															+
														
 
															+            __m256 a = xs;
														
 
															+            __m256 a2 = xs2;
														
 
															+            __m256 a3 = xs3;
														
 
															+            __m256 b = ys;
														
 
															+            __m256 b2 = ys;
														
 
															+            __m256 b3 = ys;
														
 
															+
														
 
															+            __m256 cx = info.julia ? juliaX : xs;
														
 
															+            __m256 cx2 = info.julia ? juliaX : xs2;
														
 
															+            __m256 cx3 = info.julia ? juliaX : xs3;
														
 
															+            __m256 cy = info.julia ? juliaY : ys;
														
 
															+
														
 
															+            if (info.smooth) {
														
 
															+                __m256 cmp = _mm256_cmp_ps(threshold, threshold, _CMP_LE_OQ);
														
 
															+                __m256 cmp2 = _mm256_cmp_ps(threshold, threshold, _CMP_LE_OQ);
														
 
															+                __m256 cmp3 = _mm256_cmp_ps(threshold, threshold, _CMP_LE_OQ);
														
 
															+                for (int k = 0; k < info.maxIter; k++) {
														
 
															+                    __m256 bb = _mm256_mul_ps(b, b);
														
 
															+                    __m256 bb2 = _mm256_mul_ps(b2, b2);
														
 
															+                    __m256 bb3 = _mm256_mul_ps(b3, b3);
														
 
															+                    __m256 ab = _mm256_mul_ps(a, b);
														
 
															+                    __m256 ab2 = _mm256_mul_ps(a2, b2);
														
 
															+                    __m256 ab3 = _mm256_mul_ps(a3, b3);
														
 
															+                    __m256 olda = a;
														
 
															+                    __m256 olda2 = a2;
														
 
															+                    __m256 olda3 = a3;
														
 
															+                    a = _mm256_add_ps(_mm256_fmsub_ps(a, a, bb), cx);
														
 
															+                    a2 = _mm256_add_ps(_mm256_fmsub_ps(a2, a2, bb2), cx2);
														
 
															+                    a3 = _mm256_add_ps(_mm256_fmsub_ps(a3, a3, bb3), cx3);
														
 
															+                    b = _mm256_fmadd_ps(two, ab, cy);
														
 
															+                    b2 = _mm256_fmadd_ps(two, ab2, cy);
														
 
															+                    b3 = _mm256_fmadd_ps(two, ab3, cy);
														
 
															+                    /*resultsa = _mm256_or_ps(_mm256_andnot_ps(cmp, resultsa), _mm256_and_ps(cmp, a));
														
 
															+                    resultsb = _mm256_or_ps(_mm256_andnot_ps(cmp, resultsb), _mm256_and_ps(cmp, b));
														
 
															+                    resultsa2 = _mm256_or_ps(_mm256_andnot_ps(cmp2, resultsa2), _mm256_and_ps(cmp2, a2));
														
 
															+                    resultsb2 = _mm256_or_ps(_mm256_andnot_ps(cmp2, resultsb2), _mm256_and_ps(cmp2, b2));
														
 
															+                    resultsa3 = _mm256_or_ps(_mm256_andnot_ps(cmp3, resultsa3), _mm256_and_ps(cmp3, a3));
														
 
															+                    resultsb3 = _mm256_or_ps(_mm256_andnot_ps(cmp3, resultsb3), _mm256_and_ps(cmp3, b3));*/
														
 
															+                    resultsa = _mm256_blendv_ps(resultsa, a, cmp);
														
 
															+                    resultsb = _mm256_blendv_ps(resultsb, b, cmp);
														
 
															+                    resultsa2 = _mm256_blendv_ps(resultsa2, a2, cmp2);
														
 
															+                    resultsb2 = _mm256_blendv_ps(resultsb2, b2, cmp2);
														
 
															+                    resultsa3 = _mm256_blendv_ps(resultsa3, a3, cmp3);
														
 
															+                    resultsb3 = _mm256_blendv_ps(resultsb3, b3, cmp3);
														
 
															+                    cmp = _mm256_cmp_ps(_mm256_fmadd_ps(olda, olda, bb), threshold, _CMP_LE_OQ);
														
 
															+                    cmp2 = _mm256_cmp_ps(_mm256_fmadd_ps(olda2, olda2, bb2), threshold, _CMP_LE_OQ);
														
 
															+                    cmp3 = _mm256_cmp_ps(_mm256_fmadd_ps(olda3, olda3, bb3), threshold, _CMP_LE_OQ);
														
 
															+                    adder = _mm256_and_ps(adder, cmp);
														
 
															+                    counter = _mm256_add_ps(counter, adder);
														
 
															+                    adder2 = _mm256_and_ps(adder2, cmp2);
														
 
															+                    counter2 = _mm256_add_ps(counter2, adder2);
														
 
															+                    adder3 = _mm256_and_ps(adder3, cmp3);
														
 
															+                    counter3 = _mm256_add_ps(counter3, adder3);
														
 
															+                    if ((k & 0x7) == 0 && _mm256_testz_ps(cmp, cmp) != 0 && _mm256_testz_ps(cmp2, cmp2) != 0 && _mm256_testz_ps(cmp3, cmp3) != 0) {
														
 
															+                        break;
														
 
															+                    }
														
 
															+                }
														
 
															+            }
														
 
															+            else {
														
 
															+                for (int k = 0; k < info.maxIter; k++) {
														
 
															+                    __m256 bb = _mm256_mul_ps(b, b);
														
 
															+                    __m256 bb2 = _mm256_mul_ps(b2, b2);
														
 
															+                    __m256 bb3 = _mm256_mul_ps(b3, b3);
														
 
															+                    __m256 ab = _mm256_mul_ps(a, b);
														
 
															+                    __m256 ab2 = _mm256_mul_ps(a2, b2);
														
 
															+                    __m256 ab3 = _mm256_mul_ps(a3, b3);
														
 
															+                    __m256 cmp = _mm256_cmp_ps(_mm256_fmadd_ps(a, a, bb), threshold, _CMP_LE_OQ);
														
 
															+                    __m256 cmp2 = _mm256_cmp_ps(_mm256_fmadd_ps(a2, a2, bb2), threshold, _CMP_LE_OQ);
														
 
															+                    __m256 cmp3 = _mm256_cmp_ps(_mm256_fmadd_ps(a3, a3, bb3), threshold, _CMP_LE_OQ);
														
 
															+                    a = _mm256_add_ps(_mm256_fmsub_ps(a, a, bb), cx);
														
 
															+                    a2 = _mm256_add_ps(_mm256_fmsub_ps(a2, a2, bb2), cx2);
														
 
															+                    a3 = _mm256_add_ps(_mm256_fmsub_ps(a3, a3, bb3), cx3);
														
 
															+                    b = _mm256_fmadd_ps(two, ab, cy);
														
 
															+                    b2 = _mm256_fmadd_ps(two, ab2, cy);
														
 
															+                    b3 = _mm256_fmadd_ps(two, ab3, cy);
														
 
															+                    adder = _mm256_and_ps(adder, cmp);
														
 
															+                    counter = _mm256_add_ps(counter, adder);
														
 
															+                    adder2 = _mm256_and_ps(adder2, cmp2);
														
 
															+                    counter2 = _mm256_add_ps(counter2, adder2);
														
 
															+                    adder3 = _mm256_and_ps(adder3, cmp3);
														
 
															+                    counter3 = _mm256_add_ps(counter3, adder3);
														
 
															+                    if ((k & 0x7) == 0 && _mm256_testz_ps(cmp, cmp) != 0 && _mm256_testz_ps(cmp2, cmp2) != 0 && _mm256_testz_ps(cmp3, cmp3) != 0) {
														
 
															+                        break;
														
 
															+                    }
														
 
															+                }
														
 
															+            }
														
 
															+
														
 
															+            float resData[96];
														
 
															+            float* ftRes = resData;
														
 
															+            float* resa = ftRes + 24;
														
 
															+            float* resb = resa + 24;
														
 
															+
														
 
															+            _mm256_storeu_ps(ftRes, counter);
														
 
															+            _mm256_storeu_ps(ftRes + 8, counter2);
														
 
															+            _mm256_storeu_ps(ftRes + 16, counter3);
														
 
															+            _mm256_storeu_ps(resa, resultsa);
														
 
															+            _mm256_storeu_ps(resa + 8, resultsa2);
														
 
															+            _mm256_storeu_ps(resa + 16, resultsa3);
														
 
															+            _mm256_storeu_ps(resb, resultsb);
														
 
															+            _mm256_storeu_ps(resb + 8, resultsb2);
														
 
															+            _mm256_storeu_ps(resb + 16, resultsb3);
														
 
															+            for (int k = 0; k < 24 && i + k < info.bWidth; k++) {
														
 
															+                if (info.smooth) {
														
 
															+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
														
 
															+                        ftRes[k] >= info.maxIter ? info.maxIter :
														
 
															+                        ((float)ftRes[k]) + 1 - ::log2(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2);
														
 
															+                }
														
 
															+                else {
														
 
															+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter : ftRes[k];
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
--- a/libmandel/src/plugins/avx512.so
+++ b/libmandel/src/plugins/avx512.so
--- a/src/Almond.cpp
+++ b/src/Almond.cpp
@@ -16,6 +16,15 @@ Almond::Almond(QWidget* parent) :
 
															     QMainWindow{ parent, Qt::WindowFlags() },
														
 
															     mandelContext{ mnd::initializeContext() }
														
 
															 {
														
 
															+    std::unique_ptr<mnd::CalcPlugin> cp =
														
 
															+            std::make_unique<mnd::CalcPlugin>("./plugins/libavxfma.so");
														
 
															+    if (!cp->isValid()) {
														
 
															+        //exit(1);
														
 
															+    }
														
 
															+    else {
														
 
															+        mandelContext.loadPlugin(std::move(cp));
														
 
															+    }
														
 
															+
														
 
															     ui.setupUi(this);
														
 
															     fractalWidget = new FractalWidget(this);
														
 
															     fractalWidget->setGenerator(&mandelContext.getDefaultGenerator());