Преглед изворни кода

Merge branch 'master' of http://192.168.1.47/nicolas/Almond

Nicolas Winkler пре 5 година
родитељ
комит
9c41054647

+ 1 - 1
libmandel/CMakeLists.txt

@@ -40,7 +40,7 @@ FILE(GLOB MandelHeaders include/*.h)
 if (ARCH STREQUAL "X86_64" OR ARCH STREQUAL "X86")
     list(APPEND MandelSources src/CpuGeneratorsAVX.cpp src/CpuGeneratorsAVXFMA.cpp src/CpuGeneratorsSSE2.cpp)
 elseif(ARCH STREQUAL "ARM")
-    #list(APPEND MandelSources src/CpuGeneratorsNeon.cpp)
+    list(APPEND MandelSources src/CpuGeneratorsNeon.cpp)
 endif()
 
 #    message(${MandelSources})

+ 10 - 3
libmandel/include/CpuGenerators.h

@@ -93,9 +93,16 @@ public:
     virtual void generate(const MandelInfo& info, float* data);
 };
 
-#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) 
-template<typename T, bool parallel>
-class mnd::CpuGenerator<T, mnd::ARM_NEON, parallel> : public Generator
+#else //if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) 
+template<bool parallel>
+class mnd::CpuGenerator<float, mnd::ARM_NEON, parallel> : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+template<bool parallel>
+class mnd::CpuGenerator<double, mnd::ARM_NEON, parallel> : public Generator
 {
 public:
     virtual void generate(const MandelInfo& info, float* data);

+ 16 - 6
libmandel/src/CpuGeneratorsNeon.cpp

@@ -4,11 +4,20 @@
 #include <arm_neon.h>
 #include <memory>
 
-using mnd::CpuGeneratorNeonFloat;
-using mnd::CpuGeneratorNeonDouble;
+using mnd::CpuGenerator;
+
+namespace mnd
+{
+    template class CpuGenerator<float, mnd::ARM_NEON, false>;
+    template class CpuGenerator<float, mnd::ARM_NEON, true>;
+
+    template class CpuGenerator<double, mnd::ARM_NEON, false>;
+    template class CpuGenerator<double, mnd::ARM_NEON, true>;
+}
 
 
-void CpuGeneratorNeonFloat::generate(const mnd::MandelInfo& info, float* data)
+template<bool parallel>
+void CpuGenerator<float, mnd::ARM_NEON, parallel>::generate(const mnd::MandelInfo& info, float* data)
 {
     using T = float;
     const MandelViewport& view = info.view;
@@ -71,7 +80,8 @@ void CpuGeneratorNeonFloat::generate(const mnd::MandelInfo& info, float* data)
 }
 
 
-void CpuGeneratorNeonDouble::generate(const mnd::MandelInfo& info, float* data)
+template<bool parallel>
+void CpuGenerator<double, mnd::ARM_NEON, parallel>::generate(const mnd::MandelInfo& info, float* data)
 {
     using T = double;
     const MandelViewport& view = info.view;
@@ -82,8 +92,8 @@ void CpuGeneratorNeonDouble::generate(const mnd::MandelInfo& info, float* data)
         long i = 0;
         for (i; i < info.bWidth; i += 2) {
             double xsvals[] = {
-                (view.x + double(i) * view.width / info.bWidth),
-                (view.x + double(i + 1) * view.width / info.bWidth),
+                double(view.x + double(i) * view.width / info.bWidth),
+                double(view.x + double(i + 1) * view.width / info.bWidth),
             };
 
             float64x2_t xs = vld1q_f64(xsvals);

+ 1 - 1
libmandel/src/Mandel.cpp

@@ -33,7 +33,7 @@ static const std::map<mnd::GeneratorType, std::string> typeNames =
     { mnd::GeneratorType::DOUBLE_SSE2, "double SSE2" },
     { mnd::GeneratorType::DOUBLE_AVX, "double AVX" },
     { mnd::GeneratorType::DOUBLE_AVX512, "double AVX512" },
-    { mnd::GeneratorType::DOUBLE_NEON, "double Neon" },
+    { mnd::GeneratorType::DOUBLE_NEON, "double NEON" },
     { mnd::GeneratorType::DOUBLE_DOUBLE, "double double" },
     { mnd::GeneratorType::DOUBLE_DOUBLE_AVX, "double double AVX" },
     { mnd::GeneratorType::DOUBLE_DOUBLE_AVX_FMA, "double double AVX+FMA" },