Selaa lähdekoodia

remove collapse

Nicolas Winkler 5 vuotta sitten
vanhempi
commit
b90a5bd788
1 muutettua tiedostoa jossa 4 lisäystä ja 4 poistoa
  1. 4 4
      libmandel/src/CpuGeneratorsAVX.cpp

+ 4 - 4
libmandel/src/CpuGeneratorsAVX.cpp

@@ -28,8 +28,8 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
     const MandelViewport& view = info.view;
     const float dppf = float(view.width / info.bWidth);
     const float viewxf = float(view.x);
-    __m256 viewx = { viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf };
-    __m256 dpp = { dppf, dppf, dppf, dppf, dppf, dppf, dppf, dppf };
+    __m256 viewx = _mm256_set1_ps(viewxf);
+    __m256 dpp = _mm256_set1_ps(dppf);
 
     T jX = mnd::convert<T>(info.juliaX);
     T jY = mnd::convert<T>(info.juliaY);
@@ -38,10 +38,10 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
 
     if constexpr(parallel)
         omp_set_num_threads(omp_get_num_procs());
-#pragma omp parallel for schedule(static, 1) collapse(2) if (parallel)
+#pragma omp parallel for schedule(static, 1) if (parallel)
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
-        __m256 ys = {y, y, y, y, y, y, y, y};
+        __m256 ys = _mm256_set1_ps(y);
         for (long i = 0; i < info.bWidth; i += 16) {
             __m256 pixc = { float(i), float(i + 1), float(i + 2), float(i + 3), float(i + 4), float(i + 5), float(i + 6), float(i + 7) };
             __m256 pixc2 = { float(i + 8), float(i + 9), float(i + 10), float(i + 11), float(i + 12), float(i + 13), float(i + 14), float(i + 15) };