Nicolas Winkler 5 jaren geleden
bovenliggende
commit
6e83b59c45
4 gewijzigde bestanden met toevoegingen van 188 en 0 verwijderingen
  1. 12 0
      libmandel/include/OpenClCode.h
  2. 9 0
      libmandel/src/OpenClCode.cpp
  3. 49 0
      libmandel/src/opencl/fixed128.cl
  4. 118 0
      libmandel/src/opencl/fixed128.h

+ 12 - 0
libmandel/include/OpenClCode.h

@@ -0,0 +1,12 @@
+#ifndef MANDEL_OPENCLCODE_H
+#define MANDEL_OPENCLCODE_H
+
+namespace mnd
+{
+    unsigned char fixed64_cl[];
+    unsigned char fixed128_cl[];
+    unsigned char fixed512_cl[];
+}
+
+#endif // MANDEL_OPENCLCODE_H
+

+ 9 - 0
libmandel/src/OpenClCode.cpp

@@ -0,0 +1,9 @@
+
+namespace mnd
+{
+#   include "opencl/fixed64.h"
+#   include "opencl/fixed128.h"
+#   include "opencl/fixed512.h"
+}
+
+

+ 49 - 0
libmandel/src/opencl/fixed128.cl

@@ -0,0 +1,49 @@
+
+
+long2 mul(long2 a, long2 b) {
+    long upper = mul_hi(a, b);
+    long lower = a * b;
+    return (upper << 16) + ((lower >> 48) & 0xFFFF);
+}
+
+
+__kernel void iterate(__global float* A, const int width,
+                      ulong x, ulong y, ulong pw, ulong ph, int max, int smooth) {
+    int index = get_global_id(0);
+    long px = (index % width);
+    long py = (index / width);
+
+    long xl = x;
+    long yt = y;
+    long pixelScaleX = pw;
+    long pixelScaleY = ph;
+
+    long a = xl + pixelScaleX * px; // pixelScaleX * px + xl
+    long b = yt + pixelScaleY * py; // pixelScaleY * py + yt
+    long ca = a;
+    long cb = b;
+
+    int n = 0;
+    while (n < max - 1) {
+        long aa = mul(a, a);
+        long bb = mul(b, b);
+        long ab = mul(a, b);
+        if (aa + bb > (16LL << 48)) break;
+        a = aa - bb + ca;
+        b = ab + ab + cb;
+        n++;
+    }
+
+    // N + 1 - log (log  |Z(N)|) / log 2
+    if (n >= max - 1)
+        A[index] = max;
+    else {
+        if (smooth != 0) {
+            float aapprox = ((float) a) * (1.0f / (1LL << 48)); // 3.5527137e-15f;
+            float bapprox = ((float) b) * (1.0f / (1LL << 48)); // 3.5527137e-15f;
+            A[index] = ((float) n) + 1 - log(log(aapprox * aapprox + bapprox * bapprox) / 2) / log(2.0f);
+        }
+        else
+            A[index] = ((float)n);
+    }
+}

+ 118 - 0
libmandel/src/opencl/fixed128.h

@@ -0,0 +1,118 @@
+unsigned char fixed128_cl[] = {
+  0x0d, 0x0a, 0x0d, 0x0a, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x6d, 0x75, 0x6c,
+  0x28, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x61, 0x2c, 0x20, 0x6c, 0x6f, 0x6e,
+  0x67, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x75, 0x70, 0x70, 0x65, 0x72, 0x20, 0x3d,
+  0x20, 0x6d, 0x75, 0x6c, 0x5f, 0x68, 0x69, 0x28, 0x61, 0x2c, 0x20, 0x62,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67,
+  0x20, 0x6c, 0x6f, 0x77, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x61, 0x20, 0x2a,
+  0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74,
+  0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x70, 0x70, 0x65, 0x72, 0x20, 0x3c,
+  0x3c, 0x20, 0x31, 0x36, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x28, 0x6c, 0x6f,
+  0x77, 0x65, 0x72, 0x20, 0x3e, 0x3e, 0x20, 0x34, 0x38, 0x29, 0x20, 0x26,
+  0x20, 0x30, 0x78, 0x46, 0x46, 0x46, 0x46, 0x29, 0x3b, 0x0d, 0x0a, 0x7d,
+  0x0d, 0x0a, 0x0d, 0x0a, 0x0d, 0x0a, 0x5f, 0x5f, 0x6b, 0x65, 0x72, 0x6e,
+  0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x69, 0x74, 0x65, 0x72,
+  0x61, 0x74, 0x65, 0x28, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x41, 0x2c, 0x20, 0x63,
+  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x77, 0x69, 0x64,
+  0x74, 0x68, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x75, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x78, 0x2c, 0x20,
+  0x75, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x79, 0x2c, 0x20, 0x75, 0x6c, 0x6f,
+  0x6e, 0x67, 0x20, 0x70, 0x77, 0x2c, 0x20, 0x75, 0x6c, 0x6f, 0x6e, 0x67,
+  0x20, 0x70, 0x68, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x61, 0x78,
+  0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x73, 0x6d, 0x6f, 0x6f, 0x74, 0x68,
+  0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74,
+  0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74,
+  0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67,
+  0x20, 0x70, 0x78, 0x20, 0x3d, 0x20, 0x28, 0x69, 0x6e, 0x64, 0x65, 0x78,
+  0x20, 0x25, 0x20, 0x77, 0x69, 0x64, 0x74, 0x68, 0x29, 0x3b, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x70, 0x79, 0x20,
+  0x3d, 0x20, 0x28, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x2f, 0x20, 0x77,
+  0x69, 0x64, 0x74, 0x68, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x78, 0x6c, 0x20, 0x3d, 0x20,
+  0x78, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67,
+  0x20, 0x79, 0x74, 0x20, 0x3d, 0x20, 0x79, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c,
+  0x53, 0x63, 0x61, 0x6c, 0x65, 0x58, 0x20, 0x3d, 0x20, 0x70, 0x77, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x70,
+  0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59, 0x20, 0x3d,
+  0x20, 0x70, 0x68, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x78, 0x6c, 0x20,
+  0x2b, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65,
+  0x58, 0x20, 0x2a, 0x20, 0x70, 0x78, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x70,
+  0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58, 0x20, 0x2a,
+  0x20, 0x70, 0x78, 0x20, 0x2b, 0x20, 0x78, 0x6c, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x79,
+  0x74, 0x20, 0x2b, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61,
+  0x6c, 0x65, 0x59, 0x20, 0x2a, 0x20, 0x70, 0x79, 0x3b, 0x20, 0x2f, 0x2f,
+  0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59,
+  0x20, 0x2a, 0x20, 0x70, 0x79, 0x20, 0x2b, 0x20, 0x79, 0x74, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x63, 0x61, 0x20,
+  0x3d, 0x20, 0x61, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f,
+  0x6e, 0x67, 0x20, 0x63, 0x62, 0x20, 0x3d, 0x20, 0x62, 0x3b, 0x0d, 0x0a,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6e, 0x20,
+  0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68,
+  0x69, 0x6c, 0x65, 0x20, 0x28, 0x6e, 0x20, 0x3c, 0x20, 0x6d, 0x61, 0x78,
+  0x20, 0x2d, 0x20, 0x31, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x61, 0x61,
+  0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x29,
+  0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
+  0x6f, 0x6e, 0x67, 0x20, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c,
+  0x28, 0x62, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x61, 0x62,
+  0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x62, 0x29,
+  0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
+  0x66, 0x20, 0x28, 0x61, 0x61, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x20, 0x3e,
+  0x20, 0x28, 0x31, 0x36, 0x4c, 0x4c, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x38,
+  0x29, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61,
+  0x61, 0x20, 0x2d, 0x20, 0x62, 0x62, 0x20, 0x2b, 0x20, 0x63, 0x61, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20,
+  0x3d, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20,
+  0x63, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
+  0x0d, 0x0a, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x4e,
+  0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x20, 0x28,
+  0x6c, 0x6f, 0x67, 0x20, 0x20, 0x7c, 0x5a, 0x28, 0x4e, 0x29, 0x7c, 0x29,
+  0x20, 0x2f, 0x20, 0x6c, 0x6f, 0x67, 0x20, 0x32, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6e, 0x20, 0x3e, 0x3d, 0x20, 0x6d,
+  0x61, 0x78, 0x20, 0x2d, 0x20, 0x31, 0x29, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78,
+  0x5d, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x73, 0x6d,
+  0x6f, 0x6f, 0x74, 0x68, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x29, 0x20, 0x7b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x61, 0x70, 0x70,
+  0x72, 0x6f, 0x78, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x29, 0x20, 0x61, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x31, 0x2e, 0x30,
+  0x66, 0x20, 0x2f, 0x20, 0x28, 0x31, 0x4c, 0x4c, 0x20, 0x3c, 0x3c, 0x20,
+  0x34, 0x38, 0x29, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x33, 0x2e, 0x35,
+  0x35, 0x32, 0x37, 0x31, 0x33, 0x37, 0x65, 0x2d, 0x31, 0x35, 0x66, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x62, 0x61, 0x70, 0x70,
+  0x72, 0x6f, 0x78, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x29, 0x20, 0x62, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x31, 0x2e, 0x30,
+  0x66, 0x20, 0x2f, 0x20, 0x28, 0x31, 0x4c, 0x4c, 0x20, 0x3c, 0x3c, 0x20,
+  0x34, 0x38, 0x29, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x33, 0x2e, 0x35,
+  0x35, 0x32, 0x37, 0x31, 0x33, 0x37, 0x65, 0x2d, 0x31, 0x35, 0x66, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d,
+  0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x20, 0x6e, 0x29,
+  0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x28, 0x6c,
+  0x6f, 0x67, 0x28, 0x61, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x78, 0x20, 0x2a,
+  0x20, 0x61, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x78, 0x20, 0x2b, 0x20, 0x62,
+  0x61, 0x70, 0x70, 0x72, 0x6f, 0x78, 0x20, 0x2a, 0x20, 0x62, 0x61, 0x70,
+  0x70, 0x72, 0x6f, 0x78, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x29, 0x20, 0x2f,
+  0x20, 0x6c, 0x6f, 0x67, 0x28, 0x32, 0x2e, 0x30, 0x66, 0x29, 0x3b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d,
+  0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a
+};
+unsigned int fixed128_cl_len = 1380;