doublefloat.cl 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. // citation: Guillaume da Graçca, David Defour. Implementation of float-float operators on graphics hardware.
  2. // Real Numbers and Computers 7, Jul 2006, Nancy, France. pp.23-32. ffhal-00021443
  3. // https://hal.archives-ouvertes.fr/hal-00021443/document
  4. float2 twoSum(float a, float b) {
  5. float s = a + b;
  6. float v = s - a;
  7. float r = (a - (s - v)) + (b - v);
  8. return (float2)(s, r);
  9. }
  10. float2 split(float a) {
  11. float c = (4096 + 1) * a;
  12. float abig = c - a;
  13. float ahi = c - abig;
  14. float alo = a - ahi;
  15. return (float2)(ahi, alo);
  16. }
  17. float2 twoProd(float a, float b) {
  18. float x = a * b;
  19. float2 aex = split(a);
  20. float2 bex = split(b);
  21. float errx = x - (aex.s0 * bex.s0);
  22. float erry = errx - (aex.s1 * bex.s0);
  23. float errz = erry - (aex.s0 * bex.s1);
  24. float y = (aex.s1 * bex.s1) - errz;
  25. return (float2)(x, y);
  26. }
  27. float2 add(float2 a, float2 b) {
  28. float r = a.s0 + b.s0;
  29. float s;
  30. if (fabs(a.s0) >= fabs(b.s0)) {
  31. s = (((a.s0 - r) + b.s0) + b.s1) + a.s1;
  32. }
  33. else {
  34. s = (((b.s0 - r) + a.s0) + a.s1) + b.s1;
  35. }
  36. return twoSum(r, s);
  37. }
  38. float2 mul(float2 a, float2 b) {
  39. float2 t = twoProd(a.s0, b.s0);
  40. float t3 = ((a.s0 * b.s1) + (a.s1 * b.s0)) + t.s1;
  41. return twoSum(t.s0, t.s1);
  42. }
  43. float2 mulFloat(float2 a, float b) {
  44. float2 t = twoProd(a.s0, b);
  45. float t3 = (a.s1 * b) + t.s1;
  46. return twoSum(t.s0, t.s1);
  47. }
  48. __kernel void iterate(__global __write_only float* A, const int width,
  49. float x1, float x2, float y1, float y2,
  50. float pw1, float pw2, float ph1, float ph2, int max, int smooth) {
  51. int index = get_global_id(0);
  52. int px = index % width;
  53. int py = index / width;
  54. float2 xl = (float2)(x1, x2);
  55. float2 yt = (float2)(y1, y2);
  56. float2 pixelScaleX = (float2)(pw1, pw2);
  57. float2 pixelScaleY = (float2)(ph1, ph2);
  58. float2 a = add(mulFloat(pixelScaleX, (float) px), xl); // pixelScaleX * px + xl
  59. float2 b = add(mulFloat(pixelScaleY, (float) py), yt); // pixelScaleY * py + yt
  60. float2 ca = a;
  61. float2 cb = b;
  62. int n = 0;
  63. while (n < max - 1) {
  64. float2 aa = mul(a, a);
  65. float2 bb = mul(b, b);
  66. float2 ab = mul(a, b);
  67. if (aa.s0 + bb.s0 > 16) break;
  68. float2 minusbb = (float2)(-bb.s0, -bb.s1);
  69. a = add(add(aa, minusbb), ca);
  70. b = add(add(ab, ab), cb);
  71. n++;
  72. }
  73. // N + 1 - log (log |Z(N)|) / log 2
  74. if (n >= max - 1)
  75. A[index] = max;
  76. else {
  77. if (smooth != 0)
  78. A[index] = ((float) n) + 1 - log(log(a.s0 * a.s0 + b.s0 * b.s0) / 2) / log(2.0f);
  79. else
  80. A[index] = ((float)n);
  81. }
  82. }