5 년 전 · 11732cff05
--- a/choosegenerators.cpp
+++ b/choosegenerators.cpp
@@ -11,12 +11,14 @@
 
				 #include <QRegExpValidator>
			
 
				 #include <QMessageBox>
			
 
				 
			
 
				+#include <cstring>
			
 
				 
			
 
				 
			
 
				 mnd::MandelViewport Benchmarker::benchViewport(void)
			
 
				 {
			
 
				     //return mnd::MandelViewport{ -1.250000598933854152929, 0.0001879894057291665530, 0.0000003839916666666565, 0.0000003839916666666565 };
			
 
				-    return mnd::MandelViewport::centerView();
			
 
				+    //return mnd::MandelViewport::centerView();
			
 
				+    return mnd::MandelViewport{ 0, 0, 0.0000003839916666666565, 0.0000003839916666666565 };
			
 
				 }
			
 
				 
			
 
				 
			
@@ -25,22 +27,19 @@ static std::vector<mnd::MandelInfo> createBenches()
 
				     std::vector<mnd::MandelInfo> vec;
			
 
				     for (int i = 0; i < 50; i++) {
			
 
				         int expo = i + 14;
			
 
				-        int whe = 5;
			
 
				-
			
 
				-        if (expo > 18)
			
 
				-            whe = 6;
			
 
				-        if (expo > 19)
			
 
				-            whe = 7;
			
 
				-        if (expo > 21)
			
 
				-            whe = 8;
			
 
				-        if (expo > 24)
			
 
				-            whe = 9;
			
 
				-        if (expo > 25)
			
 
				-            whe = 10;
			
 
				-
			
 
				-        long wh = 1L << whe;
			
 
				-        long iter = 1L << (expo - 2 * whe);
			
 
				-        vec.push_back(mnd::MandelInfo{ mnd::MandelViewport::centerView(), wh, wh, iter, false, false, 0.0, 0.0 });
			
 
				+        int w = 5;
			
 
				+        int h = 5;
			
 
				+
			
 
				+        while (int(expo * 1) - w - h > 15 && w <= 10 && h <= 10) {
			
 
				+            w++;
			
 
				+            if (int(expo * 1) - w - h > 15)
			
 
				+                h++;
			
 
				+        }
			
 
				+
			
 
				+        long wi = 1L << w;
			
 
				+        long he = 1L << h;
			
 
				+        long iter = 1L << (expo - w - h);
			
 
				+        vec.push_back(mnd::MandelInfo{ mnd::MandelViewport::centerView(), wi, he, iter, false, false, 0.0, 0.0 });
			
 
				     }
			
 
				     return vec;
			
 
				 }
			
@@ -120,10 +119,11 @@ double Benchmarker::benchmarkResult(mnd::MandelGenerator& mg) const
 
				             mg.generate(mi, bmp.pixels.get());
			
 
				             return &bmp;
			
 
				         });
			
 
				-        if (time > std::chrono::milliseconds(500)) {
			
 
				-            testIndex = i + 2;
			
 
				-            //printf("testing index %d\n", testIndex);
			
 
				-            fflush(stdout);
			
 
				+        if (time > std::chrono::milliseconds(200)) {
			
 
				+            testIndex = i + 4;
			
 
				+            printf("testing index for generator %s: %d\n", (mnd::toString(mg.getType()) + ", " + mnd::toString(mg.getExtension())).c_str(), testIndex);
			
 
				+            printf("    w: %d, h: %d, iter: %d\n", benches[testIndex].bWidth, benches[testIndex].bHeight, benches[testIndex].maxIter);
			
 
				+            fflush(stdout);fflush(stdout);fflush(stdout);fflush(stdout);fflush(stdout);fflush(stdout);fflush(stdout);
			
 
				             break;
			
 
				         }
			
 
				         else if (time < std::chrono::milliseconds(10)) {
			
@@ -131,15 +131,23 @@ double Benchmarker::benchmarkResult(mnd::MandelGenerator& mg) const
 
				         }
			
 
				     }
			
 
				 
			
 
				+    try {
			
 
				+        const mnd::MandelInfo& mi = benches[(testIndex >= benches.size()) ? (benches.size() - 1) : testIndex];
			
 
				+        Bitmap<float> bmp(mi.bWidth, mi.bHeight);
			
 
				+        auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
			
 
				+            mg.generate(mi, bmp.pixels.get());
			
 
				+            return &bmp;
			
 
				+        });
			
 
				 
			
 
				-    const mnd::MandelInfo& mi = benches[(testIndex >= benches.size()) ? (benches.size() - 1) : testIndex];
			
 
				-    Bitmap<float> bmp(mi.bWidth, mi.bHeight);
			
 
				-    auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
			
 
				-        mg.generate(mi, bmp.pixels.get());
			
 
				-        return &bmp;
			
 
				-    });
			
 
				-
			
 
				-    return double(iters) / time.count() * 1000;
			
 
				+        return double(iters) / time.count() * 1000;
			
 
				+    }
			
 
				+    catch(const std::string& c) {
			
 
				+        printf("error benchmarking: %s\n", c.c_str());
			
 
				+    }
			
 
				+    catch(...) {
			
 
				+        printf("error benchmarking\n");
			
 
				+    }
			
 
				+    return 0;
			
 
				 }
			
 
				 
			
 
				 
			
--- a/libmandel/CMakeLists.txt
+++ b/libmandel/CMakeLists.txt
@@ -42,7 +42,6 @@ endif()
 
				 #    message(${MandelSources})
			
 
				 
			
 
				 add_library(mandel STATIC ${MandelSources})
			
 
				-set_source_files_properties(${MandelSources} PROPERTIES COMPILE_FLAGS -march=native)
			
 
				 
			
 
				 FILE(GLOB QdSources qd-2.3.22/src/*.cpp)
			
 
				 
			
--- a/libmandel/src/ClGenerators.cpp
+++ b/libmandel/src/ClGenerators.cpp
@@ -145,9 +145,10 @@ void ClGeneratorFloat::generate(const mnd::MandelInfo& info, float* data)
 
				     } else {
			
 
				         queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
			
 
				     }
			
 
				+    cl::Event event;
			
 
				+    queue.enqueueReadBuffer(buffer_A, CL_FALSE, 0, bufferSize, data, nullptr, &event);
			
 
				     queue.flush();
			
 
				-    queue.finish();
			
 
				-    queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
			
 
				+    event.wait();
			
 
				 }
			
 
				 
			
 
				 
			
@@ -336,7 +337,10 @@ void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
 
				     kernel.setArg(10, double(info.juliaY));
			
 
				 
			
 
				     cl_int result = queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
			
 
				-    queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
			
 
				+    cl::Event event;
			
 
				+    queue.enqueueReadBuffer(buffer_A, CL_FALSE, 0, bufferSize, data, nullptr, &event);
			
 
				+    queue.flush();
			
 
				+    event.wait();
			
 
				 }
			
 
				 
			
 
				 
			
--- a/libmandel/src/CpuGeneratorsAVX.cpp
+++ b/libmandel/src/CpuGeneratorsAVX.cpp
@@ -435,9 +435,7 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX, parallel>::generate(const mnd
 
				 #pragma omp parallel for schedule(static, 1) if (parallel)
			
 
				     for (long j = 0; j < info.bHeight; j++) {
			
 
				         T y = viewy + T(double(j)) * hpp;
			
 
				-        __m256d y0s = { y.x[0], y.x[0], y.x[0], y.x[0] };
			
 
				-        __m256d y1s = { y.x[1], y.x[1], y.x[1], y.x[1] };
			
 
				-        AvxDoubleDouble ys{ y0s, y1s };
			
 
				+        AvxDoubleDouble ys{ y[0], y[1] };
			
 
				         for (long i = 0; i < info.bWidth; i += 4) {
			
 
				             T x1 = viewx + T(double(i)) * wpp;
			
 
				             T x2 = x1 + wpp;
			
@@ -466,8 +464,8 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX, parallel>::generate(const mnd
 
				             AvxDoubleDouble a = xs;
			
 
				             AvxDoubleDouble b = ys;
			
 
				 
			
 
				-            __m256d resultsa;
			
 
				-            __m256d resultsb;
			
 
				+            __m256d resultsa = _mm256_set1_pd(0);
			
 
				+            __m256d resultsb = _mm256_set1_pd(0);
			
 
				 
			
 
				             for (int k = 0; k < info.maxIter; k++) {
			
 
				                 AvxDoubleDouble aa = a * a;
			
--- a/libmandel/src/CpuGeneratorsSSE2.cpp
+++ b/libmandel/src/CpuGeneratorsSSE2.cpp
@@ -188,33 +188,22 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
 
				                 counter = _mm_add_pd(counter, adder);
			
 
				                 adder2 = _mm_and_pd(adder2, cmp2);
			
 
				                 counter2 = _mm_add_pd(counter2, adder2);
			
 
				-                if ((k & 0x7 == 0) && _mm_movemask_epi8(_mm_castpd_si128(cmp)) == 0 &&
			
 
				-                    _mm_movemask_epi8(_mm_castpd_si128(cmp)) == 0) {
			
 
				+                if (((k & 0x7) == 0) && _mm_movemask_epi8(_mm_castpd_si128(cmp)) == 0 &&
			
 
				+                    _mm_movemask_epi8(_mm_castpd_si128(cmp2)) == 0) {
			
 
				                     break;
			
 
				                 }
			
 
				             }
			
 
				 
			
 
				-            auto alignVec = [](double* data) -> double* {
			
 
				-                void* aligned = data;
			
 
				-                ::size_t length = 64;
			
 
				-                std::align(32, 4 * sizeof(double), aligned, length);
			
 
				-                return static_cast<double*>(aligned);
			
 
				-            };
			
 
				-
			
 
				-            double resData[24];
			
 
				-            double* ftRes = alignVec(resData);
			
 
				+            double ftRes[24];
			
 
				             double* resa = ftRes + 4;
			
 
				             double* resb = ftRes + 8;
			
 
				 
			
 
				-            _mm_store_pd(ftRes, counter);
			
 
				-            _mm_store_pd(ftRes + 2, counter2);
			
 
				-            _mm_store_pd(resa, resulta);
			
 
				-            _mm_store_pd(resa + 2, resulta2);
			
 
				-            _mm_store_pd(resb, resultb);
			
 
				-            _mm_store_pd(resb + 2, resultb2);
			
 
				-            //for (int k = 0; k < 2 && i + k < info.bWidth; k++)
			
 
				-            //    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? ftRes[k] : info.maxIter;
			
 
				-
			
 
				+            _mm_storeu_pd(ftRes, counter);
			
 
				+            _mm_storeu_pd(ftRes + 2, counter2);
			
 
				+            _mm_storeu_pd(resa, resulta);
			
 
				+            _mm_storeu_pd(resa + 2, resulta2);
			
 
				+            _mm_storeu_pd(resb, resultb);
			
 
				+            _mm_storeu_pd(resb + 2, resultb2);
			
 
				             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
			
 
				                 if (info.smooth)
			
 
				                     data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :