1
0
Pārlūkot izejas kodu

openmp improvements

Nicolas Winkler 5 gadi atpakaļ
vecāks
revīzija
ea90e9ff63

+ 1 - 1
CMakeLists.txt

@@ -47,7 +47,7 @@ endif(Boost_FOUND)
 
 
 if(OpenMP_CXX_FOUND)
-    target_link_libraries(mandel PUBLIC OpenMP::OpenMP_CXX)
+    target_link_libraries(Almond PUBLIC OpenMP::OpenMP_CXX)
 endif()
 
 install(TARGETS Almond RUNTIME DESTINATION "bin")

+ 1 - 2
libmandel/CMakeLists.txt

@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.12)
 
 set(ARCH "X86_64" CACHE STRING "Target Architecture")
 
@@ -87,7 +87,6 @@ if (APPLE AND OpenCL_FOUND)
     SET(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -framework OpenCL")
 endif()
 
-    target_compile_definitions(mandel PUBLIC WITH_BOOST)
 if(Boost_FOUND)
     target_compile_definitions(mandel PUBLIC WITH_BOOST)
     target_include_directories(mandel PRIVATE ${Boost_INCLUDE_DIRS})

+ 6 - 2
libmandel/src/CpuGenerators.cpp

@@ -67,9 +67,11 @@ void CpuGenerator<T, mnd::NONE, parallel>::generate(const mnd::MandelInfo& info,
     T juliaX = mnd::convert<T>(info.juliaX);
     T juliaY = mnd::convert<T>(info.juliaY);
 
+#if defined(_OPENMP)
     if constexpr (parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = viewy + T(double(j)) * hpp;
         for (long i = 0; i < info.bWidth; i++) {
@@ -215,9 +217,11 @@ void CpuGenerator<mnd::MpfrFloat<bits>, mnd::NONE, parallel>::generate(const mnd
     const MandelViewport& view = info.view;
     using T = mnd::MpfrFloat<bits>;
 
+#if defined(_OPENMP)
     if constexpr (parallel)
         omp_set_num_threads(2 * omp_get_num_procs());
-#pragma omp parallel for if (parallel)
+#   pragma omp parallel for if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
         long i = 0;

+ 9 - 3
libmandel/src/CpuGeneratorsAVX.cpp

@@ -37,9 +37,11 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
     __m256 juliaX = { jX, jX, jX, jX, jX, jX, jX, jX };
     __m256 juliaY = { jY, jY, jY, jY, jY, jY, jY, jY };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
         __m256 ys = _mm256_set1_ps(y);
@@ -172,9 +174,11 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
     __m256d juliaX = { jX, jX, jX, jX };
     __m256d juliaY = { jY, jY, jY, jY };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y + T(j) * view.height / info.bHeight);
         __m256d ys = { y, y, y, y };
@@ -430,9 +434,11 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX, parallel>::generate(const mnd
     AvxDoubleDouble juliaX = { jX[0], jX[1] };
     AvxDoubleDouble juliaY = { jY[0], jY[1] };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = viewy + T(double(j)) * hpp;
         AvxDoubleDouble ys{ y[0], y[1] };

+ 5 - 1
libmandel/src/CpuGeneratorsAVX512.cpp

@@ -29,9 +29,11 @@ void CpuGenerator<float, mnd::X86_AVX_512, parallel>::generate(const mnd::Mandel
     __m512 enumerate = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     __m512 two = _mm512_set1_ps(2);
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
 #pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y + double(j) * view.height / info.bHeight);
         __m512 ys = _mm512_set1_ps(y);
@@ -171,9 +173,11 @@ void CpuGenerator<double, mnd::X86_AVX_512, parallel>::generate(const mnd::Mande
     __m512d viewx = { viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf };
     __m512d dpp = { dppf, dppf, dppf, dppf, dppf, dppf, dppf, dppf };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y + double(j) * view.height / info.bHeight);
         __m512d ys = { y, y, y, y, y, y, y, y };

+ 10 - 3
libmandel/src/CpuGeneratorsAVXFMA.cpp

@@ -37,9 +37,11 @@ void CpuGenerator<float, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mandel
     __m256 juliaX = { jX, jX, jX, jX, jX, jX, jX, jX };
     __m256 juliaY = { jY, jY, jY, jY, jY, jY, jY, jY };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
         __m256 ys = {y, y, y, y, y, y, y, y};
@@ -204,9 +206,12 @@ void CpuGenerator<double, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mande
     __m256d juliaX = { jX, jX, jX, jX };
     __m256d juliaY = { jY, jY, jY, jY };
 
+
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y + T(j) * view.height / info.bHeight);
         __m256d ys = { y, y, y, y };
@@ -418,9 +423,11 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const
     AvxDoubleDouble juliaX = { jX[0], jX[1] };
     AvxDoubleDouble juliaY = { jY[0], jY[1] };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = viewy + T(double(j)) * hpp;
         __m256d y0s = { y.x[0], y.x[0], y.x[0], y.x[0] };

+ 6 - 2
libmandel/src/CpuGeneratorsSSE2.cpp

@@ -32,9 +32,11 @@ void CpuGenerator<float, mnd::X86_SSE2, parallel>::generate(const mnd::MandelInf
     __m128 juliaX = { jX, jX, jX, jX };
     __m128 juliaY = { jY, jY, jY, jY };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
         __m128 ys = {y, y, y, y};
@@ -137,9 +139,11 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
     __m128d juliaX = { jX, jX };
     __m128d juliaY = { jY, jY };
 
+#if defined(_OPENMP)
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
         __m128d ys = { y, y };

+ 13 - 5
libmandel/src/IterationGenerator.cpp

@@ -51,9 +51,11 @@ void NaiveGenerator::generate(const mnd::MandelInfo& info, float* data)
     T wpp = mnd::convert<T>(view.width / info.bWidth);
     T hpp = mnd::convert<T>(view.height / info.bHeight);
 
-    if constexpr (parallel)
+#if defined(_OPENMP)
+   if constexpr (parallel)
         omp_set_num_threads(omp_get_num_procs());
-//#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = viewy + T(double(j)) * hpp;
         long i = 0;
@@ -155,9 +157,11 @@ void NaiveIRGenerator<U>::generate(const mnd::MandelInfo& info, float* data)
     T wpp = mnd::convert<T>(view.width / info.bWidth);
     T hpp = mnd::convert<T>(view.height / info.bHeight);
 
+#if defined(_OPENMP)
     if constexpr (parallel)
         omp_set_num_threads(omp_get_num_procs());
-//#pragma omp parallel for schedule(static, 1) if (parallel)
+#   pragma omp parallel for schedule(static, 1) if (parallel)
+#endif
     for (long j = 0; j < info.bHeight; j++) {
         T y = viewy + T(double(j)) * hpp;
         long i = 0;
@@ -323,8 +327,10 @@ void CompiledGenerator::generate(const mnd::MandelInfo& info, float* data)
 {
     using IterFunc = int (*)(double, double, int);
 
+#if defined(_OPENMP)
     omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1)
+#   pragma omp parallel for schedule(static, 1)
+#endif
     for (int i = 0; i < info.bHeight; i++) {
         double y = mnd::convert<double>(info.view.y + info.view.height * i / info.bHeight);
         for (int j = 0; j < info.bWidth; j++) {
@@ -365,8 +371,10 @@ void CompiledGeneratorVec::generate(const mnd::MandelInfo& info, float* data)
 
     double dx = mnd::convert<double>(info.view.width / info.bWidth);
 
+#if defined(_OPENMP)
     omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1)
+#   pragma omp parallel for schedule(static, 1)
+#endif
     for (int i = 0; i < info.bHeight; i++) {
         double y = mnd::convert<double>(info.view.y + info.view.height * i / info.bHeight);
         for (int j = 0; j < info.bWidth; j += 8) {