Преглед на файлове

asmjit and neon support

Nicolas Winkler преди 5 години
родител
ревизия
59498fc650

+ 2 - 2
CMakeLists.txt

@@ -16,7 +16,7 @@ find_package(Boost 1.65 REQUIRED)
 
 find_package(FFmpeg COMPONENTS AVCODEC AVDEVICE AVFORMAT AVUTIL SWSCALE REQUIRED)
 
-message( ${FFMPEG_INCLUDE_DIRS})
+#message(${FFMPEG_INCLUDE_DIRS})
 
 set(CMAKE_CXX_STANDARD 17)
 
@@ -34,7 +34,7 @@ add_subdirectory(libmandel)
 
 target_include_directories(Almond PUBLIC ${FFMPEG_INCLUDE_DIRS})
 
-target_link_libraries(Almond PUBLIC mandel asmjit qd)
+target_link_libraries(Almond PUBLIC mandel)
 target_link_libraries(Almond PUBLIC Qt5::Core Qt5::Widgets Qt5::OpenGL Qt5::Xml)
 target_link_libraries(Almond PUBLIC ${FFMPEG_LIBRARIES})
 target_link_libraries(Almond PUBLIC OpenGL::GL)

+ 13 - 3
libmandel/include/CpuGenerators.h

@@ -92,10 +92,9 @@ public:
 
 #else //if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) 
 template<bool parallel>
-class mnd::CpuGenerator<float, mnd::ARM_NEON, parallel> : public Generator
+class mnd::CpuGenerator<float, mnd::ARM_NEON, parallel> : public MandelGenerator
 {
 public:
-    CpuGenerator(void);
     inline CpuGenerator(void) :
         MandelGenerator{ mnd::Precision::FLOAT, mnd::ARM_NEON }
     {
@@ -104,7 +103,7 @@ public:
 };
 
 template<bool parallel>
-class mnd::CpuGenerator<double, mnd::ARM_NEON, parallel> : public Generator
+class mnd::CpuGenerator<double, mnd::ARM_NEON, parallel> : public MandelGenerator
 {
 public:
     inline CpuGenerator(void) :
@@ -113,6 +112,17 @@ public:
     }
     virtual void generate(const MandelInfo& info, float* data);
 };
+
+template<bool parallel>
+class mnd::CpuGenerator<mnd::DoubleDouble, mnd::ARM_NEON, parallel> : public MandelGenerator
+{
+public:
+    inline CpuGenerator(void) :
+        MandelGenerator{ mnd::Precision::DOUBLE_DOUBLE, mnd::ARM_NEON }
+    {
+    }
+    virtual void generate(const MandelInfo& info, float* data);
+};
 #endif
 
 

+ 1 - 0
libmandel/include/Generators.h

@@ -99,6 +99,7 @@ enum class mnd::GeneratorType : int
     DOUBLE_DOUBLE,
     DOUBLE_DOUBLE_AVX,
     DOUBLE_DOUBLE_AVX_FMA,
+    DOUBLE_DOUBLE_NEON,
     QUAD_DOUBLE,
     FLOAT128,
     FLOAT256,

+ 2 - 0
libmandel/include/IterationGenerator.h

@@ -53,6 +53,7 @@ private:
 };
 
 
+#ifdef WITH_ASMJIT
 #if defined(__x86_64__) || defined(_M_X64)
 class mnd::CompiledGenerator : public mnd::MandelGenerator
 {
@@ -81,6 +82,7 @@ public:
     virtual void generate(const MandelInfo& info, float* data) override;
 };
 #endif
+#endif // WITH_ASMJIT
 
 
 #ifdef WITH_OPENCL

+ 5 - 0
libmandel/include/Mandel.h

@@ -5,6 +5,11 @@
 //#include <asmjit/asmjit.h>
 namespace asmjit { class JitRuntime; }
 
+#ifndef WITH_ASMJIT
+// if no asmjit, use dummy implementation
+namespace asmjit { class JitRuntime{}; }
+#endif // WITH_ASMJITH
+
 #include <vector>
 #include <map>
 #include <string>

+ 10 - 1
libmandel/src/IterationCompiler.cpp

@@ -1,8 +1,11 @@
 #include "IterationCompiler.h"
 #include "NaiveIRGenerator.h"
 
-#include "ExecData.h"
 #include "Mandel.h"
+#ifdef WITH_ASMJIT
+#include "ExecData.h"
+#endif // WITH_ASMJIT
+
 #include "OpenClInternal.h"
 #include "OpenClCode.h"
 
@@ -14,6 +17,7 @@
 using namespace std::string_literals;
 namespace mnd
 {
+#ifdef WITH_ASMJIT
     struct CompileVisitor
     {
         using Reg = asmjit::x86::Xmm;
@@ -560,6 +564,7 @@ namespace mnd
         return CompiledGeneratorVec{ std::move(ed) };
     }
 
+#endif // WITH_ASMJIT
 
     struct OpenClVisitor
     {
@@ -775,6 +780,9 @@ namespace mnd
 
         ir::Formula irf = mnd::expand(z0o, zio);
         irf.optimize();
+
+
+#ifdef WITH_ASMJIT
         printf("ir: %s\n", irf.toString().c_str()); fflush(stdout);
         auto dg = std::make_unique<CompiledGenerator>(compile(irf));
         printf("asm: %s\n", dg->dump().c_str()); fflush(stdout);
@@ -784,6 +792,7 @@ namespace mnd
             printf("asm avxvec: %s\n", dgavx->dump().c_str()); fflush(stdout);
             vec.push_back(std::move(dgavx));
         }
+#endif // WITH_ASMJIT
 
         //vec.push_back(std::make_unique<NaiveIRGenerator<mnd::DoubleDouble>>(irf));
         //vec.push_back(std::make_unique<NaiveIRGenerator<mnd::QuadDouble>>(irf));

+ 7 - 3
libmandel/src/IterationGenerator.cpp

@@ -1,5 +1,4 @@
 #include "IterationGenerator.h"
-#include "ExecData.h"
 #include "Mandel.h"
 
 #include "OpenClInternal.h"
@@ -125,10 +124,12 @@ std::complex<double> NaiveGenerator::calc(mnd::Expression& expr, std::complex<do
     return result;
 }
 
+#ifdef WITH_ASMJIT
+
+#include "ExecData.h"
+
 using mnd::CompiledGenerator;
 using mnd::CompiledGeneratorVec;
-using mnd::CompiledClGenerator;
-using mnd::CompiledClGeneratorDouble;
 
 
 CompiledGenerator::CompiledGenerator(std::unique_ptr<mnd::ExecData> execData,
@@ -236,8 +237,11 @@ void CompiledGeneratorVec::generate(const mnd::MandelInfo& info, float* data)
     }
 }
 
+#endif // WITH_ASMJIT
 
 #ifdef WITH_OPENCL
+using mnd::CompiledClGenerator;
+using mnd::CompiledClGeneratorDouble;
 CompiledClGenerator::CompiledClGenerator(mnd::MandelDevice& device, const std::string& code) :
     ClGeneratorFloat{ device, code }
 {

+ 10 - 2
libmandel/src/Mandel.cpp

@@ -6,7 +6,9 @@
 #include "OpenClInternal.h"
 #include "OpenClCode.h"
 
+#ifdef WITH_ASMJIT
 #include <asmjit/asmjit.h>
+#endif // WITH_ASMJIT
 
 #include <map>
 
@@ -44,6 +46,7 @@ static const std::map<mnd::GeneratorType, std::string> typeNames =
     { mnd::GeneratorType::DOUBLE_DOUBLE, "double double" },
     { mnd::GeneratorType::DOUBLE_DOUBLE_AVX, "double double AVX" },
     { mnd::GeneratorType::DOUBLE_DOUBLE_AVX_FMA, "double double AVX+FMA" },
+    { mnd::GeneratorType::DOUBLE_DOUBLE_NEON, "double double NEON" },
     { mnd::GeneratorType::QUAD_DOUBLE, "quad double" },
     { mnd::GeneratorType::FLOAT128, "float128" },
     { mnd::GeneratorType::FLOAT256, "float256" },
@@ -112,8 +115,10 @@ bool MandelDevice::supportsDouble(void) const
 }
 
 
-MandelContext::MandelContext(void) :
-    jitRuntime{ std::make_unique<asmjit::JitRuntime>() }
+MandelContext::MandelContext(void)
+#ifdef WITH_ASMJIT
+    : jitRuntime{ std::make_unique<asmjit::JitRuntime>() }
+#endif // WITH_ASMJIT
 {
 
 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) 
@@ -151,8 +156,10 @@ MandelContext::MandelContext(void) :
     if (cpuInfo.hasNeon()) {
         auto fl = std::make_unique<CpuGenerator<float, mnd::ARM_NEON, true>>();
         auto db = std::make_unique<CpuGenerator<double, mnd::ARM_NEON, true>>();
+        auto ddb = std::make_unique<CpuGenerator<mnd::DoubleDouble, mnd::ARM_NEON, true>>();
         cpuGenerators.insert({ GeneratorType::FLOAT_NEON, std::move(fl) });
         cpuGenerators.insert({ GeneratorType::DOUBLE_NEON, std::move(db) });
+        cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE_NEON, std::move(ddb) });
     }
 #endif
     {
@@ -218,6 +225,7 @@ std::unique_ptr<mnd::AdaptiveGenerator> MandelContext::createAdaptiveGenerator(v
     if (cpuInfo.hasNeon()) {
         floatGen = getCpuGenerator(GeneratorType::FLOAT_NEON);
         doubleGen = getCpuGenerator(GeneratorType::DOUBLE_NEON);
+        doubleDoubleGen = getCpuGenerator(GeneratorType::DOUBLE_DOUBLE_NEON);
     }
 
     if (!devices.empty()) {