LightDoubleDouble.h 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. #ifndef MANDEL_LIGHTDOUBLEDOUBLE_H
  2. #define MANDEL_LIGHTDOUBLEDOUBLE_H
  3. namespace mnd
  4. {
  5. struct LightDoubleDouble;
  6. struct LightDoubleFloat;
  7. namespace ldd
  8. {
  9. template<typename T>
  10. struct Pair {
  11. T first;
  12. T second;
  13. };
  14. using DoublePair = Pair<double>;
  15. using FloatPair = Pair<float>;
  16. template<typename T>
  17. inline static Pair<T> quickTwoSum(T a, T b)
  18. {
  19. T s = a + b;
  20. T e = b - (s - a);
  21. return { s, e };
  22. }
  23. template<typename T>
  24. inline Pair<T> twoSum(T a, T b)
  25. {
  26. T s = a + b;
  27. T v = s - a;
  28. T e = (a - (s - v)) + (b - v);
  29. return { s, e };
  30. }
  31. inline Pair<double> split(double a)
  32. {
  33. static const double splitter = double((1ULL << 27) + 1);
  34. double t = splitter * a;
  35. double ahi = t - (t - a);
  36. double alo = a - ahi;
  37. return { ahi, alo };
  38. }
  39. template<typename T>
  40. inline Pair<T> twoProd(T a, T b)
  41. {
  42. T p = a * b;
  43. auto [ahi, alo] = split(a);
  44. auto [bhi, blo] = split(b);
  45. T e = ((ahi * bhi - p) + ahi * blo + alo * bhi) + alo * blo;
  46. return { p, e };
  47. }
  48. /*template<typename T>
  49. inline Pair<T> twoProdFma(T a, T b)
  50. {
  51. T p = a * b;
  52. T e = std::fma(a, b, -p);
  53. return { p, e };
  54. }*/
  55. }
  56. }
  57. struct mnd::LightDoubleDouble
  58. {
  59. double x[2];
  60. LightDoubleDouble(void) = default;
  61. inline LightDoubleDouble(double val) :
  62. x{ val, 0 }
  63. {}
  64. inline LightDoubleDouble(double u, double l) :
  65. x{ u, l }
  66. {}
  67. inline LightDoubleDouble(mnd::ldd::DoublePair dp) :
  68. x{ dp.first, dp.second }
  69. {}
  70. double operator[] (int i) const { return x[i]; }
  71. const double& operator[] (int i) { return x[i]; }
  72. private:
  73. };
  74. inline mnd::LightDoubleDouble operator+(const mnd::LightDoubleDouble& a,
  75. const mnd::LightDoubleDouble& b)
  76. {
  77. auto[s, e] = mnd::ldd::twoSum(a[0], b[0]);
  78. e += a[1] + b[1];
  79. return mnd::ldd::quickTwoSum(s, e);
  80. }
  81. inline mnd::LightDoubleDouble operator-(const mnd::LightDoubleDouble& a,
  82. const mnd::LightDoubleDouble& b)
  83. {
  84. auto[s, e] = mnd::ldd::twoSum(a[0], -b[0]);
  85. e += a[1] - b[1];
  86. return mnd::ldd::quickTwoSum(s, e);
  87. }
  88. inline mnd::LightDoubleDouble operator*(const mnd::LightDoubleDouble& a,
  89. const mnd::LightDoubleDouble& b)
  90. {
  91. auto[p1, p2] = mnd::ldd::twoProd(a[0], b[0]);
  92. p2 += a[0] * b[1] + a[1] * b[0];
  93. return mnd::ldd::quickTwoSum(p1, p2);
  94. }
  95. inline bool operator>(const mnd::LightDoubleDouble& a,
  96. const mnd::LightDoubleDouble& b)
  97. {
  98. return a[0] > b[0] || (a[0] == b[0] && a[1] > b[1]);
  99. }
  100. struct mnd::LightDoubleFloat
  101. {
  102. float x[2];
  103. inline LightDoubleFloat(double val)
  104. {
  105. x[0] = float(val);
  106. x[1] = float(val - double(x[0]));
  107. }
  108. inline LightDoubleFloat(float u, float l) :
  109. x{ u, l }
  110. {}
  111. inline LightDoubleFloat(mnd::ldd::FloatPair dp) :
  112. x{ dp.first, dp.second }
  113. {}
  114. float operator[] (int i) const { return x[i]; }
  115. const float& operator[] (int i) { return x[i]; }
  116. private:
  117. };
  118. inline mnd::LightDoubleFloat operator+(const mnd::LightDoubleFloat& a,
  119. const mnd::LightDoubleFloat& b)
  120. {
  121. auto[s, e] = mnd::ldd::twoSum(a[0], b[0]);
  122. e += a[1] + b[1];
  123. return mnd::ldd::quickTwoSum(s, e);
  124. }
  125. inline mnd::LightDoubleFloat operator-(const mnd::LightDoubleFloat& a,
  126. const mnd::LightDoubleFloat& b)
  127. {
  128. auto[s, e] = mnd::ldd::twoSum(a[0], -b[0]);
  129. e += a[1] - b[1];
  130. return mnd::ldd::quickTwoSum(s, e);
  131. }
  132. inline mnd::LightDoubleFloat operator*(const mnd::LightDoubleFloat& a,
  133. const mnd::LightDoubleFloat& b)
  134. {
  135. auto[p1, p2] = mnd::ldd::twoProd(a[0], b[0]);
  136. p2 += a[0] * b[1] + a[1] * b[0];
  137. return mnd::ldd::quickTwoSum(p1, p2);
  138. }
  139. #endif // MANDEL_LIGHTDOUBLEDOUBLE_H