Jelajahi Sumber

vec or no vec?

Nicolas Winkler 4 tahun lalu
induk
melakukan
12024ac187
3 mengubah file dengan 21 tambahan dan 18 penghapusan
  1. 4 6
      libmandel/src/ClGenerators.cpp
  2. 15 10
      libmandel/src/opencl/float.cl
  3. 2 2
      src/choosegenerators.cpp

+ 4 - 6
libmandel/src/ClGenerators.cpp

@@ -149,8 +149,8 @@ ClGeneratorFloat::ClGeneratorFloat(mnd::MandelDevice& device, const std::string&
     const cl::Device& dev = device.getClDevice().device;
     useVec = dev.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() >= 4;
     // often still slower than non-vec variation
-    //useVec = false;
-    kernel = Kernel(program, useVec ? "iterate_vec4" : "iterate2");
+    useVec = false;
+    kernel = Kernel(program, useVec ? "iterate_vec4" : "iterate");
 }
 
 
@@ -177,12 +177,10 @@ void ClGeneratorFloat::generate(const mnd::MandelInfo& info, float* data)
     if (useVec) {
         queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight / 4));
     } else {
-        queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight / 2));
+        queue.enqueueNDRangeKernel(kernel, 0, NDRange(info.bWidth * info.bHeight));
     }
     cl::Event event;
-    queue.enqueueReadBuffer(buffer_A, CL_FALSE, 0, bufferSize, data, nullptr, &event);
-    queue.flush();
-    event.wait();
+    queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data, nullptr, &event);
 }
 
 

+ 15 - 10
libmandel/src/opencl/float.cl

@@ -99,6 +99,18 @@ __kernel void iterate2(__global float* A, const int width, float xl, float yt, f
 }
 
 
+float ith(float4 v, int index) {
+    switch(index) {
+        case 0:
+        return v.s0;
+        case 1:
+        return v.s1;
+        case 2:
+        return v.s2;
+        case 3:
+        return v.s3;
+    }
+}
 
 __kernel void iterate_vec4(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int maxIter, int smooth, int julia, float juliaX, float juliaY) {
    int index = get_global_id(0) * 4;
@@ -141,20 +153,13 @@ __kernel void iterate_vec4(__global float* A, const int width, float xl, float y
        }
     }
 
-    float4 res;
-    if (smooth != 0) {
-        if (count.s0 >= 0)
-            res = ((float4) count) + ((float4)(1.0f, 1.0f, 1.0f, 1.0f)) - log2(log(fma(resa, resa, resb * resb)) / 2);
-    }
-
-
     for (int i = 0; i < 4 && i + x < width; i++) {
     if (smooth != 0) {
-        if (count[i] >= 0)
-            A[index + i] = ((float) count[i]) + 1 - log(log(fma(resa[i], resa[i], resb[i] * resb[i])) / 2) / log(2.0f);
+        if (ith(count, i) >= 0)
+            A[index + i] = ith(count, i) + 1 - log(log(fma(ith(resa, i), ith(resa, i), ith(resb, i) * ith(resb, i))) / 2) / log(2.0f);
     }
     else
-        A[index + i] = ((float) count[i]);
+        A[index + i] = ith(count, i);
    }
 }
 

+ 2 - 2
src/choosegenerators.cpp

@@ -74,14 +74,14 @@ const std::vector<mnd::MandelInfo> Benchmarker::benches = {
     mnd::MandelInfo{ benchViewport(), 512, 512, 2048000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 4096000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 8192000, false },
-    mnd::MandelInfo{ benchViewport(), 512, 512, 16384000, false },
+/*    mnd::MandelInfo{ benchViewport(), 512, 512, 16384000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 32768000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 65536000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 131072000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 262144000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 524288000, false },
     mnd::MandelInfo{ benchViewport(), 512, 512, 1048576000, false },
-    mnd::MandelInfo{ benchViewport(), 512, 512, 2097152000, false },
+    mnd::MandelInfo{ benchViewport(), 512, 512, 2097152000, false },*/
 };