Bläddra i källkod

removing avx instructions from sse generator by using ::logf only in compilation units that don't use avx

Nicolas Winkler 5 år sedan
förälder
incheckning
60cca48f8a

+ 1 - 4
libmandel/CMakeLists.txt

@@ -42,6 +42,7 @@ endif()
 #    message(${MandelSources})
 
 add_library(mandel STATIC ${MandelSources})
+set_source_files_properties(${MandelSources} PROPERTIES COMPILE_FLAGS -march=native)
 
 FILE(GLOB QdSources qd-2.3.22/src/*.cpp)
 
@@ -130,7 +131,3 @@ endif()
 if(OpenCL_FOUND)
     target_link_libraries(mandel PUBLIC OpenCL::OpenCL)
 endif()
-if(MPFR_FOUND)
-    target_link_libraries(mandel PUBLIC MPFR_LIBRARIES)
-endif()
-

+ 3 - 3
libmandel/src/CpuGeneratorsAVX.cpp

@@ -276,7 +276,7 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
                 if (info.smooth)
                     data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? float(info.maxIter) :
                         ftRes[k] >= info.maxIter ? float(info.maxIter) :
-                        float(((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::logf(2.0f));
+                        float(((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f));
                 else
                     data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
             }
@@ -289,7 +289,7 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
                 if (info.smooth)
                     data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? float(info.maxIter) :
                         ftRes[k] >= info.maxIter ? float(info.maxIter) :
-                        float(((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::logf(2.0f));
+                        float(((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f));
                 else
                     data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
             }
@@ -504,7 +504,7 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX, parallel>::generate(const mnd
                 if (info.smooth)
                     data[i + k + j * info.bWidth] = float(ftRes[k] <= 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
-                        ((float)ftRes[k]) + 1 - ::logf(::logf(float(resa[k] * resa[k] + resb[k] * resb[k])) / 2) / ::logf(2.0f));
+                        ((float)ftRes[k]) + 1 - ::log(::log(float(resa[k] * resa[k] + resb[k] * resb[k])) / 2) / ::log(2.0f));
                 else
                     data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
             }

+ 10 - 5
libmandel/src/CpuGeneratorsAVXFMA.cpp

@@ -366,6 +366,10 @@ struct AvxDoubleDouble
         x{ a, b }
     {}
 
+    inline AvxDoubleDouble(double a, double b) :
+        x{ _mm256_set1_pd(a), _mm256_set1_pd(b) }
+    {}
+
 
     inline AvxDoubleDouble operator + (const AvxDoubleDouble& sm) const
     {
@@ -400,7 +404,7 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const
 {
     const MandelViewport& view = info.view;
 
-    using T = DoubleDouble;
+    using T = LightDoubleDouble;
 
     T viewx = mnd::convert<T>(view.x);
     T viewy = mnd::convert<T>(view.y);
@@ -410,8 +414,9 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const
 
     T jX = mnd::convert<T>(info.juliaX);
     T jY = mnd::convert<T>(info.juliaY);
-    AvxDoubleDouble juliaX = { __m256d{ jX.x[0], jX.x[0], jX.x[0], jX.x[0] }, __m256d{ jX.x[1], jX.x[1], jX.x[1], jX.x[1] } };
-    AvxDoubleDouble juliaY = { __m256d{ jY.x[0], jY.x[0], jY.x[0], jY.x[0] }, __m256d{ jY.x[1], jY.x[1], jY.x[1], jY.x[1] } };
+
+    AvxDoubleDouble juliaX = { jX[0], jX[1] };
+    AvxDoubleDouble juliaY = { jY[0], jY[1] };
 
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
@@ -428,11 +433,11 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const
             T x4 = x3 + wpp;
 
             __m256d x0s = {
-                x1.x[0], x2.x[0], x3.x[0], x4.x[0],
+                x1[0], x2[0], x3[0], x4[0],
             };
 
             __m256d x1s = {
-                x1.x[1], x2.x[1], x3.x[1], x4.x[1],
+                x1[1], x2[1], x3[1], x4[1],
             };
 
             AvxDoubleDouble xs{ x0s, x1s };

+ 2 - 2
libmandel/src/CpuGeneratorsSSE2.cpp

@@ -112,7 +112,7 @@ void CpuGenerator<float, mnd::X86_SSE2, parallel>::generate(const mnd::MandelInf
                 if (info.smooth)
                     data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
                     ftRes[k] >= info.maxIter ? info.maxIter :
-                    ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
+                    ((float)ftRes[k]) + 1 - ::logf(::logf(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::logf(2.0f);
                 else
                     data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
             }
@@ -219,7 +219,7 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
                 if (info.smooth)
                     data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
                     ftRes[k] >= info.maxIter ? info.maxIter :
-                    ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
+                    ((float)ftRes[k]) + 1 - ::logf(::logf(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::logf(2.0f);
                 else
                     data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
             }