|
@@ -43,8 +43,7 @@ void CpuGenerator<float, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mandel
|
|
for (long j = 0; j < info.bHeight; j++) {
|
|
for (long j = 0; j < info.bHeight; j++) {
|
|
T y = T(view.y) + T(j) * T(view.height / info.bHeight);
|
|
T y = T(view.y) + T(j) * T(view.height / info.bHeight);
|
|
__m256 ys = {y, y, y, y, y, y, y, y};
|
|
__m256 ys = {y, y, y, y, y, y, y, y};
|
|
- long i = 0;
|
|
|
|
- for (i; i < info.bWidth; i += 24) {
|
|
|
|
|
|
+ for (long i = 0; i < info.bWidth; i += 24) {
|
|
__m256 pixc = { float(i), float(i + 1), float(i + 2), float(i + 3), float(i + 4), float(i + 5), float(i + 6), float(i + 7) };
|
|
__m256 pixc = { float(i), float(i + 1), float(i + 2), float(i + 3), float(i + 4), float(i + 5), float(i + 6), float(i + 7) };
|
|
__m256 pixc2 = { float(i + 8), float(i + 9), float(i + 10), float(i + 11), float(i + 12), float(i + 13), float(i + 14), float(i + 15) };
|
|
__m256 pixc2 = { float(i + 8), float(i + 9), float(i + 10), float(i + 11), float(i + 12), float(i + 13), float(i + 14), float(i + 15) };
|
|
__m256 pixc3 = { float(i + 16), float(i + 17), float(i + 18), float(i + 19), float(i + 20), float(i + 21), float(i + 22), float(i + 23) };
|
|
__m256 pixc3 = { float(i + 16), float(i + 17), float(i + 18), float(i + 19), float(i + 20), float(i + 21), float(i + 22), float(i + 23) };
|
|
@@ -211,8 +210,7 @@ void CpuGenerator<double, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mande
|
|
for (long j = 0; j < info.bHeight; j++) {
|
|
for (long j = 0; j < info.bHeight; j++) {
|
|
T y = T(view.y + T(j) * view.height / info.bHeight);
|
|
T y = T(view.y + T(j) * view.height / info.bHeight);
|
|
__m256d ys = { y, y, y, y };
|
|
__m256d ys = { y, y, y, y };
|
|
- long i = 0;
|
|
|
|
- for (i; i < info.bWidth; i += 8) {
|
|
|
|
|
|
+ for (long i = 0; i < info.bWidth; i += 8) {
|
|
__m256d pixc = { double(i), double(i + 1), double(i + 2), double(i + 3) };
|
|
__m256d pixc = { double(i), double(i + 1), double(i + 2), double(i + 3) };
|
|
__m256d pixc2 = { double(i + 4), double(i + 5), double(i + 6), double(i + 7) };
|
|
__m256d pixc2 = { double(i + 4), double(i + 5), double(i + 6), double(i + 7) };
|
|
__m256d xs = _mm256_fmadd_pd(dpp, pixc, viewx);
|
|
__m256d xs = _mm256_fmadd_pd(dpp, pixc, viewx);
|
|
@@ -423,8 +421,7 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const
|
|
__m256d y0s = { y.x[0], y.x[0], y.x[0], y.x[0] };
|
|
__m256d y0s = { y.x[0], y.x[0], y.x[0], y.x[0] };
|
|
__m256d y1s = { y.x[1], y.x[1], y.x[1], y.x[1] };
|
|
__m256d y1s = { y.x[1], y.x[1], y.x[1], y.x[1] };
|
|
AvxDoubleDouble ys{ y0s, y1s };
|
|
AvxDoubleDouble ys{ y0s, y1s };
|
|
- long i = 0;
|
|
|
|
- for (i; i < info.bWidth; i += 4) {
|
|
|
|
|
|
+ for (long i = 0; i < info.bWidth; i += 4) {
|
|
T x1 = viewx + T(double(i)) * wpp;
|
|
T x1 = viewx + T(double(i)) * wpp;
|
|
T x2 = x1 + wpp;
|
|
T x2 = x1 + wpp;
|
|
T x3 = x2 + wpp;
|
|
T x3 = x2 + wpp;
|