1
0

doubledouble.cl 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. #pragma OPENCL EXTENSION cl_khr_fp64 : enable
  2. inline double2 twoSum(double a, double b) {
  3. double s = a + b;
  4. double bb = s - a;
  5. double e = (a - (s - bb)) + (b - bb);
  6. return (s, e);
  7. }
  8. inline double2 quickTwoSum(double a, double b) {
  9. double s = a + b;
  10. double e = b - (s - a);
  11. return (s, e);
  12. }
  13. inline double2 twoProd(double a, double b) {
  14. //#ifdef QD_FMS
  15. double p = a * b;
  16. double e = fma(a, b, -p);
  17. return (p, e);
  18. //#else
  19. // double a_hi, a_lo, b_hi, b_lo;
  20. // double p = a * b;
  21. // split(a, a_hi, a_lo);
  22. // split(b, b_hi, b_lo);
  23. // err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo;
  24. // return p;
  25. //#endif
  26. }
  27. inline double2 mul(double2 a, double2 b) {
  28. double2 p = twoProd(a.0, b.0);
  29. p.1 += a.0 * b.1 + a.1 * b.0;
  30. return quickTwoSum(p.0, p.1);
  31. }
  32. inline double2 add(double2 a, double2 b) {
  33. double se = twoSum(a.0, b.0);
  34. se.1 += a.1 + b.1;
  35. return quickTwoSum(se.0, se.1);
  36. }
  37. inline double2 mulDouble(double2 a, double b) {
  38. double2 p = twoProd(a.0, b);
  39. p.1 += a.1 * b;
  40. return quickTwoSum(p.0, p.1);
  41. }
  42. __kernel void iterate(__global float* A, const int width,
  43. double x1, double x2, double y1, double y2,
  44. double pw1, double pw2, double ph1, double ph2, int max) {
  45. int index = get_global_id(0);
  46. int px = index % width;
  47. int py = index / width;
  48. double2 xl = (x1, x2);
  49. double2 yt = (y1, y2);
  50. double2 pixelScaleX = (pw1, pw2);
  51. double2 pixelScaleY = (ph1, ph2);
  52. double2 a = add(mulDouble(pixelScaleX, (double) px), xl); // pixelScaleX * px + xl
  53. double2 b = add(mulDouble(pixelScaleY, (double) py), yt) // pixelScaleY * py + yt
  54. double2 ca = a;
  55. double2 cb = b;
  56. int n = 0;
  57. while (n < max - 1) {
  58. double2 aa = mul(a, a);
  59. double2 bb = mul(b, b);
  60. double2 ab = mul(a, b);
  61. if (aa.0 + aa.1 + bb.0 + bb.1 > 16) break;
  62. double2 minusbb = (-bb.0, -bb.1)
  63. a = add(add(aa, minusbb), ca);
  64. double2 halfb = add(ab + cb);
  65. b = add(halfb, halfb);
  66. n++;
  67. }
  68. // N + 1 - log (log |Z(N)|) / log 2
  69. if (n >= max - 1)
  70. A[index] = max;
  71. else
  72. A[index] = ((float)n);
  73. // A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);
  74. // A[get_global_id(0)] = 5;
  75. };