Просмотр исходного кода

improved benching and fixed cl fixed64 generator

Nicolas Winkler 5 лет назад
Родитель
Сommit
0982a2c9b0
3 измененных файлов с 21 добавлено и 14 удалено
  1. 15 13
      choosegenerators.cpp
  2. 1 1
      choosegenerators.h
  3. 5 0
      libmandel/src/ClGenerators.cpp

+ 15 - 13
choosegenerators.cpp

@@ -90,16 +90,17 @@ Benchmarker::~Benchmarker(void)
 }
 
 
-std::pair<long long, std::chrono::nanoseconds> Benchmarker::measureMips(const std::function<Bitmap<float>*()>& bench) const
+std::pair<long long, std::chrono::nanoseconds> Benchmarker::measureMips(
+        std::function<void(Bitmap<float>&)> bench, Bitmap<float>& bmp) const
 {
     using namespace std::chrono;
     auto before = high_resolution_clock::now();
-    auto* bitmap = bench();
+    bench(bmp);
     auto after = high_resolution_clock::now();
 
     long long sum = 0;
-    for (int i = 0; i < bitmap->width * bitmap->height; i++) {
-        sum += static_cast<long long>(std::floor(bitmap->pixels[size_t(i)]));
+    for (int i = 0; i < bmp.width * bmp.height; i++) {
+        sum += static_cast<long long>(std::floor(bmp.pixels[size_t(i)]));
     }
 
     return std::make_pair(sum, duration_cast<nanoseconds>(after - before));
@@ -112,29 +113,30 @@ double Benchmarker::benchmarkResult(mnd::MandelGenerator& mg) const
     for (size_t i = 0; i < benches.size(); i++) {
         const mnd::MandelInfo& mi = benches[i];
         Bitmap<float> bmp(mi.bWidth, mi.bHeight);
-        auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
+        auto [iters, time] = measureMips([&mg, mi](Bitmap<float>& bmp) {
             mg.generate(mi, bmp.pixels.get());
-            return &bmp;
-        });
-        if (time > std::chrono::milliseconds(200)) {
-            testIndex = i + 2;
+        }, bmp);
+        if (time > std::chrono::milliseconds(120)) {
+            testIndex = i + 4;
             printf("testing index for generator %s: %d\n", (mnd::toString(mg.getType()) + ", " + mnd::toString(mg.getExtension())).c_str(), testIndex);
             printf("    w: %d, h: %d, iter: %d\n", benches[testIndex].bWidth, benches[testIndex].bHeight, benches[testIndex].maxIter);
             fflush(stdout);
             break;
         }
-        else if (time < std::chrono::milliseconds(10)) {
+        else if (time < std::chrono::milliseconds(3)) {
             i += 7;
         }
+        else if (time < std::chrono::milliseconds(20)) {
+            i += 3;
+        }
     }
 
     try {
         const mnd::MandelInfo& mi = benches[(testIndex >= benches.size()) ? (benches.size() - 1) : testIndex];
         Bitmap<float> bmp(mi.bWidth, mi.bHeight);
-        auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
+        auto [iters, time] = measureMips([&mg, mi](Bitmap<float>& bmp) {
             mg.generate(mi, bmp.pixels.get());
-            return &bmp;
-        });
+        }, bmp);
 
         printf("%lld iterations in %lld microseconds\n\n", iters, time.count() / 1000);
 

+ 1 - 1
choosegenerators.h

@@ -47,7 +47,7 @@ public:
 
     static mnd::MandelViewport benchViewport(void);
 
-    std::pair<long long, std::chrono::nanoseconds> measureMips(const std::function<Bitmap<float>*()>& bench) const;
+    std::pair<long long, std::chrono::nanoseconds> measureMips(std::function<void(Bitmap<float>&)> bench, Bitmap<float>& bmp) const;
     double benchmarkResult(mnd::MandelGenerator& mg) const;
 
     void run(void) override;

+ 5 - 0
libmandel/src/ClGenerators.cpp

@@ -508,6 +508,8 @@ void ClGenerator64::generate(const mnd::MandelInfo& info, float* data)
     ull y = ull(::round(double(info.view.y) * (1LL << 48)));
     ull w = ull(::round(double(pixelScaleX) * (1LL << 48)));
     ull h = ull(::round(double(pixelScaleY) * (1LL << 48)));
+    ull jx = ull(::round(double(info.juliaX) * (1LL << 48)));
+    ull jy = ull(::round(double(info.juliaY) * (1LL << 48)));
     //x = 0;
     //y = 0;
     
@@ -519,6 +521,9 @@ void ClGenerator64::generate(const mnd::MandelInfo& info, float* data)
     kernel.setArg(5, ull(h));
     kernel.setArg(6, int(info.maxIter));
     kernel.setArg(7, int(info.smooth ? 1 : 0));
+    kernel.setArg(8, int(info.julia ? 1 : 0));
+    kernel.setArg(9, ull(jx));
+    kernel.setArg(10, ull(jy));
 
     queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
     queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);