fixed512.cl 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /*uint16 add(uint16 a, uint16 b) {
  2. uint mask = 0x00FFFFFF;
  3. }*/
  4. ulong2 add(ulong2 a, ulong2 b) {
  5. ulong2 result;
  6. uchar carry = 0;
  7. for (int i = 1; i >= 0; i--) {
  8. uchar tmpcr = carry;
  9. ulong temp = a[i] + b[i];
  10. carry = (hadd(a[i], b[i]) >> 63) | (hadd(temp, (ulong) carry) >> 63);
  11. temp += tmpcr;
  12. result[i] = temp;
  13. }
  14. return result;
  15. }
  16. ulong2 sub(ulong2 a, ulong2 b) {
  17. ulong lowerSubbed = a[1] - b[1];
  18. ulong upperSubbed = a[0] - b[1] - ((lowerSubbed > a[1]) ? 1 : 0);
  19. return (ulong2)(upperSubbed, lowerSubbed);
  20. }
  21. /*
  22. ulong4 mulPart(ulong2 a, ulong b) {
  23. ulong4 result;
  24. ulong carry = 0;
  25. for (int i = 1; i >= 0; i--) {
  26. ulong lower = a[i] * b[i];
  27. ulong upper = mul_hi(a[i], b[i]);
  28. ulong c = lower + carry;
  29. ulong cc = hadd(lower, carry) >> 63;
  30. carry = upper + cc;
  31. result[i] = c;
  32. }
  33. }*/
  34. ulong2 mulu64(ulong a, ulong b) {
  35. return (ulong2)(mul_hi(a, b), a * b);
  36. }
  37. ulong2 mul(ulong2 a, ulong2 b) {
  38. //uint4 avec = (uint4)(a[0] >> 32, a[0], a[1] >> 32, a[1]);
  39. //uint4 bvec = (uint4)(b[0] >> 32, b[0], b[1] >> 32, b[1]);
  40. ulong2 uu = mulu64(a[0], b[0]);
  41. ulong2 ul = mulu64(a[0], b[1]);
  42. ulong2 lu = mulu64(a[1], b[0]);
  43. ulong2 ll = mulu64(a[1], b[1]);
  44. ulong4 res = (ulong4)(0, 0, 0, 0);
  45. res[3] = ll[1];
  46. res[2] += lu[1];
  47. res[2] += ul[1];
  48. if (res[2] < ul[1])
  49. res[1]++;
  50. res[2] += ll[0];
  51. if (res[2] < ll[0])
  52. res[1]++;
  53. res[1] += uu[1];
  54. if (res[1] < uu[1])
  55. res[0]++;
  56. res[1] += ul[0];
  57. if (res[1] < ul[0])
  58. res[0]++;
  59. res[1] += lu[0];
  60. if (res[1] < lu[0])
  61. res[0]++;
  62. res[0] += uu[0];
  63. uint4 num = (uint4) (res[0] & 0xFFFFFFFF, ((long) res[1]) >> 32, res[1] & 0xFFFFFFFF, ((long) res[2]) >> 32);
  64. return (ulong2)((num[0] << 32) + num[1], (num[2] << 32) + num[3]);
  65. }
  66. __kernel void iterate(__global float* A, const int width,
  67. ulong x1, ulong x2, ulong y1, ulong y2,
  68. ulong pw1, ulong pw2, ulong ph1, ulong ph2, int max, int smooth) {
  69. int index = get_global_id(0);
  70. ulong2 px = (ulong2) ((index % width) << 32, 0);
  71. ulong2 py = (ulong2) ((index / width) << 32, 0);
  72. ulong2 xl = (ulong2)(x1, x2);
  73. ulong2 yt = (ulong2)(y1, y2);
  74. ulong2 pixelScaleX = (ulong2)(pw1, pw2);
  75. ulong2 pixelScaleY = (ulong2)(ph1, ph2);
  76. ulong2 a = add(mul(pixelScaleX, px), xl); // pixelScaleX * px + xl
  77. ulong2 b = add(mul(pixelScaleY, py), yt); // pixelScaleY * py + yt
  78. ulong2 ca = a;
  79. ulong2 cb = b;
  80. int n = 0;
  81. while (n < max - 1) {
  82. ulong2 aa = mul(a, a);
  83. ulong2 bb = mul(b, b);
  84. ulong2 ab = mul(a, b);
  85. a = add(sub(aa, bb), ca);
  86. b = add(add(ab, ab), cb);
  87. if (aa.s0 + aa.s1 + bb.s0 + bb.s1 > 16) break;
  88. n++;
  89. }
  90. // N + 1 - log (log |Z(N)|) / log 2
  91. if (n >= max - 1)
  92. A[index] = max;
  93. else {
  94. if (smooth != 0) {
  95. float aapprox = (float) a.s0 * 2.3283064e-10f;
  96. float bapprox = (float) b.s0 * 2.3283064e-10f;
  97. A[index] = ((float) n) + 1 - log(log(aapprox * aapprox + bapprox * bapprox) / 2) / log(2.0f);
  98. }
  99. else
  100. A[index] = ((float)n);
  101. }
  102. // A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);
  103. // A[get_global_id(0)] = 5;
  104. }