doubledouble.cl 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. #pragma OPENCL EXTENSION cl_khr_fp64 : enable
  2. inline double2 twoSum(double a, double b) {
  3. double s = a + b;
  4. double bb = s - a;
  5. double e = (a - (s - bb)) + (b - bb);
  6. return (double2)(s, e);
  7. }
  8. inline double2 quickTwoSum(double a, double b) {
  9. double s = a + b;
  10. double e = b - (s - a);
  11. return (double2)(s, e);
  12. }
  13. inline double2 twoProd(double a, double b) {
  14. //#ifdef QD_FMS
  15. double p = a * b;
  16. double e = fma(a, b, -p);
  17. return (double2)(p, e);
  18. //#else
  19. // double a_hi, a_lo, b_hi, b_lo;
  20. // double p = a * b;
  21. // split(a, a_hi, a_lo);
  22. // split(b, b_hi, b_lo);
  23. // err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo;
  24. // return p;
  25. //#endif
  26. }
  27. inline double2 mul(double2 a, double2 b) {
  28. double2 p = twoProd(a.s0, b.s0);
  29. p.s1 += (a.s0 * b.s1 + a.s1 * b.s0);
  30. return quickTwoSum(p.s0, p.s1);
  31. }
  32. inline double2 add(double2 a, double2 b) {
  33. double2 se = twoSum(a.s0, b.s0);
  34. se.s1 += a.s1 + b.s1;
  35. return quickTwoSum(se.s0, se.s1);
  36. }
  37. inline double2 mulDouble(double2 a, double b) {
  38. double2 p = twoProd(a.s0, b);
  39. p.s1 += a.s1 * b;
  40. return quickTwoSum(p.s0, p.s1);
  41. }
  42. __kernel void iterate(__global float* A, const int width,
  43. double x1, double x2, double y1, double y2,
  44. double pw1, double pw2, double ph1, double ph2, int max, int smooth) {
  45. int index = get_global_id(0);
  46. int px = index % width;
  47. int py = index / width;
  48. double2 xl = (double2)(x1, x2);
  49. double2 yt = (double2)(y1, y2);
  50. double2 pixelScaleX = (double2)(pw1, pw2);
  51. double2 pixelScaleY = (double2)(ph1, ph2);
  52. double2 a = add(mulDouble(pixelScaleX, (double) px), xl); // pixelScaleX * px + xl
  53. double2 b = add(mulDouble(pixelScaleY, (double) py), yt); // pixelScaleY * py + yt
  54. double2 ca = a;
  55. double2 cb = b;
  56. int n = 0;
  57. while (n < max - 1) {
  58. double2 aa = mul(a, a);
  59. double2 bb = mul(b, b);
  60. double2 ab = mul(a, b);
  61. if (aa.s0 + aa.s1 + bb.s0 + bb.s1 > 16) break;
  62. double2 minusbb = (double2)(-bb.s0, -bb.s1);
  63. a = add(add(aa, minusbb), ca);
  64. b = add(add(ab, ab), cb);
  65. n++;
  66. }
  67. // N + 1 - log (log |Z(N)|) / log 2
  68. if (n >= max - 1)
  69. A[index] = max;
  70. else {
  71. if (smooth != 0)
  72. A[index] = ((float) n) + 1 - log(log(a.s0 * a.s0 + b.s0 * b.s0) / 2) / log(2.0f);
  73. else
  74. A[index] = ((float)n);
  75. }
  76. // A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);
  77. // A[get_global_id(0)] = 5;
  78. }