Browse Source

julia for SSE2

Nicolas Winkler 5 years ago
parent
commit
2ce08f7ed7
1 changed files with 27 additions and 8 deletions
  1. 27 8
      libmandel/src/CpuGeneratorsSSE2.cpp

+ 27 - 8
libmandel/src/CpuGeneratorsSSE2.cpp

@@ -58,6 +58,15 @@ void CpuGenerator<float, mnd::X86_SSE2, parallel>::generate(const mnd::MandelInf
             __m128 a2 = xs2;
             __m128 b2 = ys;
 
+            __m128 cx = xs;
+            __m128 cy = ys;
+            __m128 cx2 = xs2;
+	    if (info.julia) {
+		cx = juliaX;
+		cx2 = juliaX;
+		cy = juliaY;
+	    }
+
             __m128 resulta = { 0, 0, 0, 0 };
             __m128 resultb = { 0, 0, 0, 0 };
             __m128 resulta2 = { 0, 0, 0, 0 };
@@ -70,10 +79,10 @@ void CpuGenerator<float, mnd::X86_SSE2, parallel>::generate(const mnd::MandelInf
                 __m128 bb2 = _mm_mul_ps(b2, b2);
                 __m128 abab = _mm_mul_ps(a, b); abab = _mm_add_ps(abab, abab);
                 __m128 abab2 = _mm_mul_ps(a2, b2); abab2 = _mm_add_ps(abab2, abab2);
-                a = _mm_add_ps(_mm_sub_ps(aa, bb), xs);
-                b = _mm_add_ps(abab, ys);
-                a2 = _mm_add_ps(_mm_sub_ps(aa2, bb2), xs2);
-                b2 = _mm_add_ps(abab2, ys);
+                a = _mm_add_ps(_mm_sub_ps(aa, bb), cx);
+                b = _mm_add_ps(abab, cy);
+                a2 = _mm_add_ps(_mm_sub_ps(aa2, bb2), cx2);
+                b2 = _mm_add_ps(abab2, cy);
                 __m128 cmp = _mm_cmple_ps(_mm_add_ps(aa, bb), threshold);
                 __m128 cmp2 = _mm_cmple_ps(_mm_add_ps(aa2, bb2), threshold);
                 if (info.smooth) {
@@ -164,6 +173,16 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
             __m128d b = ys;
             __m128d a2 = xs2;
             __m128d b2 = ys;
+
+            __m128d cx = xs;
+            __m128d cy = ys;
+            __m128d cx2 = xs2;
+	    if (info.julia) {
+		cx = juliaX;
+		cx2 = juliaX;
+		cy = juliaY;
+	    }
+
             __m128d resulta = { 0, 0 };
             __m128d resultb = { 0, 0 };
             __m128d resulta2 = { 0, 0 };
@@ -176,10 +195,10 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
                 __m128d bb2 = _mm_mul_pd(b2, b2);
                 __m128d abab = _mm_mul_pd(a, b); abab = _mm_add_pd(abab, abab);
                 __m128d abab2 = _mm_mul_pd(a2, b2); abab2 = _mm_add_pd(abab2, abab2);
-                a = _mm_add_pd(_mm_sub_pd(aa, bb), xs);
-                b = _mm_add_pd(abab, ys);
-                a2 = _mm_add_pd(_mm_sub_pd(aa2, bb2), xs2);
-                b2 = _mm_add_pd(abab2, ys);
+                a = _mm_add_pd(_mm_sub_pd(aa, bb), cx);
+                b = _mm_add_pd(abab, cy);
+                a2 = _mm_add_pd(_mm_sub_pd(aa2, bb2), cx2);
+                b2 = _mm_add_pd(abab2, cy);
                 __m128d cmp = _mm_cmple_pd(_mm_add_pd(aa, bb), threshold);
                 __m128d cmp2 = _mm_cmple_pd(_mm_add_pd(aa2, bb2), threshold);
                 if (info.smooth) {