Explorar o código

better smoothing

Nicolas Winkler %!s(int64=5) %!d(string=hai) anos
pai
achega
1c8a27f250

+ 3 - 0
choosegenerators.cpp

@@ -129,8 +129,11 @@ double Benchmarker::benchmarkResult(mnd::MandelGenerator& mg) const
         else if (time < std::chrono::milliseconds(20)) {
             i += 3;
         }
+        QThread::msleep(1);
     }
 
+    QThread::msleep(10);
+
     try {
         const mnd::MandelInfo& mi = benches[(testIndex >= benches.size()) ? (benches.size() - 1) : testIndex];
         Bitmap<float> bmp(mi.bWidth, mi.bHeight);

+ 1 - 0
libalmond/CMakeLists.txt

@@ -40,6 +40,7 @@ else()
     endforeach()
     set(ZLIB_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/zlib-1.2.11 ${ZLIB_PUB_INCLUDE} )
     
+    set(SKIP_INSTALL_ALL ON)
     add_subdirectory(lpng1637)
     foreach(header ${libpng_public_hdrs})
         get_filename_component(the_incluude ${header} DIRECTORY)

+ 10 - 8
libmandel/include/OpenClCode.h

@@ -1,16 +1,18 @@
 #ifndef MANDEL_OPENCLCODE_H
 #define MANDEL_OPENCLCODE_H
 
+#include <string>
+
 namespace mnd
 {
-    const char* getFloat_cl();
-    const char* getDouble_cl();
-    const char* getDoubleFloat_cl();
-    const char* getDoubleDouble_cl();
-    const char* getQuadDouble_cl();
-    const char* getFixed64_cl();
-    const char* getFixed128_cl();
-    const char* getFixed512_cl();
+    std::string getFloat_cl();
+    std::string getDouble_cl();
+    std::string getDoubleFloat_cl();
+    std::string getDoubleDouble_cl();
+    std::string getQuadDouble_cl();
+    std::string getFixed64_cl();
+    std::string getFixed128_cl();
+    std::string getFixed512_cl();
 }
 
 #endif // MANDEL_OPENCLCODE_H

+ 14 - 7
libmandel/include/Types.h

@@ -148,16 +148,17 @@ namespace mnd
     template<>
     inline DoubleDouble convert<DoubleDouble, Real>(const Real& x)
     {
-        std::string s = x.str();
-        return DoubleDouble(s.c_str());
+        double s = static_cast<double>(x);
+        double e = static_cast<double>(x - s);
+        return DoubleDouble{ s, e };
     }
 
     template<>
     inline LightDoubleDouble convert<LightDoubleDouble, Real>(const Real& x)
     {
-        double upper = static_cast<double>(x);
-        double lower = static_cast<double>(x - upper);
-        return { upper, lower };
+        double s = static_cast<double>(x);
+        double e = static_cast<double>(x - s);
+        return LightDoubleDouble{ s, e };
     }
 
     template<>
@@ -175,8 +176,14 @@ namespace mnd
     template<>
     inline QuadDouble convert<QuadDouble, Real>(const Real& x)
     {
-        std::string s = x.str();
-        return QuadDouble(s.c_str());
+        double s = static_cast<double>(x);
+        Real tmp = x - s;
+        double e1 = static_cast<double>(tmp);
+        tmp = tmp - e1;
+        double e2 = static_cast<double>(tmp);
+        tmp = tmp - e2;
+        double e3 = static_cast<double>(tmp);
+        return QuadDouble{ s, e1, e2, e3 };
     }
 
     template<>

+ 23 - 17
libmandel/src/CpuGeneratorsAVX.cpp

@@ -54,13 +54,9 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
 
             __m256 counter = _mm256_setzero_ps();
             __m256 adder = _mm256_set1_ps(1);
-            __m256 resultsa = _mm256_setzero_ps();
-            __m256 resultsb = _mm256_setzero_ps();
 
             __m256 counter2 = _mm256_setzero_ps();
             __m256 adder2 = _mm256_set1_ps(1);
-            __m256 resultsa2 = _mm256_setzero_ps();
-            __m256 resultsb2 = _mm256_setzero_ps();
 
             __m256 threshold = _mm256_set1_ps(16);
 
@@ -73,7 +69,14 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
             __m256 cx2 = info.julia ? juliaX : xs2;
             __m256 cy = info.julia ? juliaY : ys;
 
+            __m256 resultsa = a;
+            __m256 resultsb = b;
+            __m256 resultsa2 = a2;
+            __m256 resultsb2 = b2;
+
             if (info.smooth) {
+                __m256 cmp = _mm256_cmp_ps(a, a, _CMP_LE_OQ);
+                __m256 cmp2 = _mm256_cmp_ps(a, a, _CMP_LE_OQ);
                 for (int k = 0; k < info.maxIter; k++) {
                     __m256 aa = _mm256_mul_ps(a, a);
                     __m256 aa2 = _mm256_mul_ps(a2, a2);
@@ -85,12 +88,12 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
                     a2 = _mm256_add_ps(_mm256_sub_ps(aa2, bb2), cx2);
                     b = _mm256_add_ps(abab, cy);
                     b2 = _mm256_add_ps(abab2, cy);
-                    __m256 cmp = _mm256_cmp_ps(_mm256_add_ps(aa, bb), threshold, _CMP_LE_OQ);
-                    __m256 cmp2 = _mm256_cmp_ps(_mm256_add_ps(aa2, bb2), threshold, _CMP_LE_OQ);
                     resultsa = _mm256_or_ps(_mm256_andnot_ps(cmp, resultsa), _mm256_and_ps(cmp, a));
                     resultsb = _mm256_or_ps(_mm256_andnot_ps(cmp, resultsb), _mm256_and_ps(cmp, b));
                     resultsa2 = _mm256_or_ps(_mm256_andnot_ps(cmp2, resultsa2), _mm256_and_ps(cmp2, a2));
                     resultsb2 = _mm256_or_ps(_mm256_andnot_ps(cmp2, resultsb2), _mm256_and_ps(cmp2, b2));
+                    cmp = _mm256_cmp_ps(_mm256_add_ps(aa, bb), threshold, _CMP_LE_OQ);
+                    cmp2 = _mm256_cmp_ps(_mm256_add_ps(aa2, bb2), threshold, _CMP_LE_OQ);
                     adder = _mm256_and_ps(adder, cmp);
                     counter = _mm256_add_ps(counter, adder);
                     adder2 = _mm256_and_ps(adder2, cmp2);
@@ -145,12 +148,12 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
             _mm256_store_ps(resb + 8, resultsb2);
             for (int k = 0; k < 16 && i + k < info.bWidth; k++) {
                 if (info.smooth) {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
                 }
                 else {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter : ftRes[k];
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter : ftRes[k];
                 }
             }
         }
@@ -211,6 +214,8 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
             __m256d cy = info.julia ? juliaY : ys;
 
             if (info.smooth) {
+                __m256d cmp = _mm256_cmp_pd(a, a, _CMP_LE_OQ);
+                __m256d cmp2 = _mm256_cmp_pd(a, a, _CMP_LE_OQ);
                 for (int k = 0; k < info.maxIter; k++) {
                     __m256d aa = _mm256_mul_pd(a, a);
                     __m256d aa2 = _mm256_mul_pd(a2, a2);
@@ -222,12 +227,12 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
                     a2 = _mm256_add_pd(_mm256_sub_pd(aa2, bb2), cx2);
                     b = _mm256_add_pd(abab, cy);
                     b2 = _mm256_add_pd(abab2, cy);
-                    __m256d cmp = _mm256_cmp_pd(_mm256_add_pd(aa, bb), threshold, _CMP_LE_OQ);
-                    __m256d cmp2 = _mm256_cmp_pd(_mm256_add_pd(aa2, bb2), threshold, _CMP_LE_OQ);
                     resultsa = _mm256_or_pd(_mm256_andnot_pd(cmp, resultsa), _mm256_and_pd(cmp, a));
                     resultsb = _mm256_or_pd(_mm256_andnot_pd(cmp, resultsb), _mm256_and_pd(cmp, b));
                     resultsa2 = _mm256_or_pd(_mm256_andnot_pd(cmp2, resultsa2), _mm256_and_pd(cmp2, a2));
                     resultsb2 = _mm256_or_pd(_mm256_andnot_pd(cmp2, resultsb2), _mm256_and_pd(cmp2, b2));
+                    cmp = _mm256_cmp_pd(_mm256_add_pd(aa, bb), threshold, _CMP_LE_OQ);
+                    cmp2 = _mm256_cmp_pd(_mm256_add_pd(aa2, bb2), threshold, _CMP_LE_OQ);
                     adder = _mm256_and_pd(adder, cmp);
                     counter = _mm256_add_pd(counter, adder);
                     adder2 = _mm256_and_pd(adder2, cmp2);
@@ -278,11 +283,11 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
             _mm256_store_pd(ftRes, counter);
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? float(info.maxIter) :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? float(info.maxIter) :
                         ftRes[k] >= info.maxIter ? float(info.maxIter) :
                         float(((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f));
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] >= 0 ? float(ftRes[k]) : info.maxIter;
             }
 
             resa = (double*) &resultsa2;
@@ -291,11 +296,11 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
             i += 4;
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? float(info.maxIter) :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? float(info.maxIter) :
                         ftRes[k] >= info.maxIter ? float(info.maxIter) :
                         float(((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f));
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] >= 0 ? float(ftRes[k]) : info.maxIter;
             }
             i -= 4;
         }
@@ -473,17 +478,18 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX, parallel>::generate(const mnd
             __m256d resultsa = _mm256_set1_pd(0);
             __m256d resultsb = _mm256_set1_pd(0);
 
+            __m256d cmp = _mm256_cmp_pd(threshold, threshold, _CMP_LE_OQ);
             for (int k = 0; k < info.maxIter; k++) {
                 AvxDoubleDouble aa = a * a;
                 AvxDoubleDouble bb = b * b;
                 AvxDoubleDouble abab = a * b; abab = abab + abab;
                 a = aa - bb + cx;
                 b = abab + cy;
-                __m256d cmp = _mm256_cmp_pd(_mm256_add_pd(aa.x[0], bb.x[0]), threshold, _CMP_LE_OQ);
                 if (info.smooth) {
                     resultsa = _mm256_or_pd(_mm256_andnot_pd(cmp, resultsa), _mm256_and_pd(cmp, a.x[0]));
                     resultsb = _mm256_or_pd(_mm256_andnot_pd(cmp, resultsb), _mm256_and_pd(cmp, b.x[0]));
                 }
+                cmp = _mm256_cmp_pd(_mm256_add_pd(aa.x[0], bb.x[0]), threshold, _CMP_LE_OQ);
                 adder = _mm256_and_pd(adder, cmp);
                 counter = _mm256_add_pd(counter, adder);
                 if (_mm256_testz_si256(_mm256_castpd_si256(cmp), _mm256_castpd_si256(cmp)) != 0) {
@@ -506,11 +512,11 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX, parallel>::generate(const mnd
 
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = float(ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = float(ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(float(resa[k] * resa[k] + resb[k] * resb[k])) / 2) / ::log(2.0f));
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] >= 0 ? float(ftRes[k]) : info.maxIter;
             }
         }
     }

+ 18 - 11
libmandel/src/CpuGeneratorsAVX512.cpp

@@ -82,6 +82,8 @@ void CpuGenerator<float, mnd::X86_AVX_512, parallel>::generate(const mnd::Mandel
             //__m512 b2 = ys;
 
             if (info.smooth) {
+                __mmask16 cmp0 = 0xFFFF;
+                __mmask16 cmp1 = 0xFFFF;
                 for (int k = 0; k < info.maxIter; k++) {
                     __m512 aa0 = _mm512_mul_ps(a0, a0);
                     __m512 aa1 = _mm512_mul_ps(a1, a1);
@@ -89,24 +91,28 @@ void CpuGenerator<float, mnd::X86_AVX_512, parallel>::generate(const mnd::Mandel
                     __m512 abab0 = _mm512_mul_ps(a0, b0);
                     __m512 abab1 = _mm512_mul_ps(a1, b1);
                     //__m512 abab2 = _mm512_mul_ps(a2, b2);
-                    __mmask16 cmp0 = _mm512_cmp_ps_mask(_mm512_fmadd_ps(b0, b0, aa0), threshold, _CMP_LE_OQ);
-                    __mmask16 cmp1 = _mm512_cmp_ps_mask(_mm512_fmadd_ps(b1, b1, aa1), threshold, _CMP_LE_OQ);
-                    //__mmask16 cmp2 = _mm512_cmp_ps_mask(_mm512_fmadd_ps(b2, b2, aa2), threshold, _CMP_LE_OQ);
+
                     a0 = _mm512_sub_ps(aa0, _mm512_fmsub_ps(b0, b0, cx0));
                     a1 = _mm512_sub_ps(aa1, _mm512_fmsub_ps(b1, b1, cx1));
                     //a2 = _mm512_sub_ps(aa2, _mm512_fmsub_ps(b2, b2, xs2));
                     b0 = _mm512_fmadd_ps(two, abab0, cy);
                     b1 = _mm512_fmadd_ps(two, abab1, cy);
                     //b2 = _mm512_fmadd_ps(two, abab2, ys);
-                    counter0 = _mm512_mask_add_ps(counter0, cmp0, counter0, adder0);
-                    counter1 = _mm512_mask_add_ps(counter1, cmp1, counter1, adder1);
-                    //counter2 = _mm512_mask_add_ps(counter2, cmp2, counter2, adder2);
+
                     resultsa0 = _mm512_mask_blend_ps(cmp0, resultsa0, a0);
                     resultsa1 = _mm512_mask_blend_ps(cmp1, resultsa1, a1);
                     //resultsa2 = _mm512_mask_blend_ps(cmp2, resultsa2, a2);
                     resultsb0 = _mm512_mask_blend_ps(cmp0, resultsb0, b0);
                     resultsb1 = _mm512_mask_blend_ps(cmp1, resultsb1, b1);
                     //resultsb2 = _mm512_mask_blend_ps(cmp2, resultsb2, b2);
+
+                    cmp0 = _mm512_cmp_ps_mask(_mm512_fmadd_ps(b0, b0, aa0), threshold, _CMP_LE_OQ);
+                    cmp1 = _mm512_cmp_ps_mask(_mm512_fmadd_ps(b1, b1, aa1), threshold, _CMP_LE_OQ);
+                    //__mmask16 cmp2 = _mm512_cmp_ps_mask(_mm512_fmadd_ps(b2, b2, aa2), threshold, _CMP_LE_OQ);
+
+                    counter0 = _mm512_mask_add_ps(counter0, cmp0, counter0, adder0);
+                    counter1 = _mm512_mask_add_ps(counter1, cmp1, counter1, adder1);
+                    //counter2 = _mm512_mask_add_ps(counter2, cmp2, counter2, adder2);
                     if (cmp0 == 0 && cmp1 == 0 /*&& cmp2 == 0*/) {
                         break;
                     }
@@ -162,12 +168,12 @@ void CpuGenerator<float, mnd::X86_AVX_512, parallel>::generate(const mnd::Mandel
             }
             for (int k = 0; k < 2 * 16 && i + k < info.bWidth; k++) {
                 if (info.smooth) {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
                 }
                 else {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter : ftRes[k];
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter : ftRes[k];
                 }
             }
         }
@@ -222,14 +228,15 @@ void CpuGenerator<double, mnd::X86_AVX_512, parallel>::generate(const mnd::Mande
             __m512d b = ys;
 
             if (info.smooth) {
+                __mmask8 cmp = 0xFF;
                 for (int k = 0; k < info.maxIter; k++) {
                     __m512d aa = _mm512_mul_pd(a, a);
                     __m512d ab = _mm512_mul_pd(a, b);
-                    __mmask8 cmp = _mm512_cmp_pd_mask(_mm512_fmadd_pd(b, b, aa), threshold, _CMP_LE_OQ);
                     a = _mm512_sub_pd(aa, _mm512_fmsub_pd(b, b, cx));
                     b = _mm512_fmadd_pd(two, ab, cy);
                     resultsa = _mm512_mask_blend_pd(cmp, resultsa, a);
                     resultsb = _mm512_mask_blend_pd(cmp, resultsb, b);
+                    cmp = _mm512_cmp_pd_mask(_mm512_fmadd_pd(b, b, aa), threshold, _CMP_LE_OQ);
                     counter = _mm512_mask_add_pd(counter, cmp, counter, adder);
                     if (cmp == 0) {
                         break;
@@ -268,12 +275,12 @@ void CpuGenerator<double, mnd::X86_AVX_512, parallel>::generate(const mnd::Mande
             }
             for (int k = 0; k < 8 && i + k < info.bWidth; k++) {
                 if (info.smooth) {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log((float) (resa[k] * resa[k] + resb[k] * resb[k])) / 2) / ::log(2.0f);
                 }
                 else {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter : ftRes[k];
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter : ftRes[k];
                 }
             }
         }

+ 33 - 31
libmandel/src/CpuGeneratorsAVXFMA.cpp

@@ -88,6 +88,9 @@ void CpuGenerator<float, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mandel
             __m256 cy = info.julia ? juliaY : ys;
 
             if (info.smooth) {
+                __m256 cmp = _mm256_cmp_ps(threshold, threshold, _CMP_LE_OQ);
+                __m256 cmp2 = _mm256_cmp_ps(threshold, threshold, _CMP_LE_OQ);
+                __m256 cmp3 = _mm256_cmp_ps(threshold, threshold, _CMP_LE_OQ);
                 for (int k = 0; k < info.maxIter; k++) {
                     __m256 bb = _mm256_mul_ps(b, b);
                     __m256 bb2 = _mm256_mul_ps(b2, b2);
@@ -95,27 +98,27 @@ void CpuGenerator<float, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mandel
                     __m256 ab = _mm256_mul_ps(a, b);
                     __m256 ab2 = _mm256_mul_ps(a2, b2);
                     __m256 ab3 = _mm256_mul_ps(a3, b3);
-                    b = _mm256_fmadd_ps(two, ab, cy);
-                    b2 = _mm256_fmadd_ps(two, ab2, cy);
-                    b3 = _mm256_fmadd_ps(two, ab3, cy);
-                    __m256 cmp = _mm256_cmp_ps(_mm256_fmadd_ps(a, a, bb), threshold, _CMP_LE_OQ);
-                    __m256 cmp2 = _mm256_cmp_ps(_mm256_fmadd_ps(a2, a2, bb2), threshold, _CMP_LE_OQ);
-                    __m256 cmp3 = _mm256_cmp_ps(_mm256_fmadd_ps(a3, a3, bb3), threshold, _CMP_LE_OQ);
                     a = _mm256_add_ps(_mm256_fmsub_ps(a, a, bb), cx);
                     a2 = _mm256_add_ps(_mm256_fmsub_ps(a2, a2, bb2), cx2);
                     a3 = _mm256_add_ps(_mm256_fmsub_ps(a3, a3, bb3), cx3);
+                    b = _mm256_fmadd_ps(two, ab, cy);
+                    b2 = _mm256_fmadd_ps(two, ab2, cy);
+                    b3 = _mm256_fmadd_ps(two, ab3, cy);
                     /*resultsa = _mm256_or_ps(_mm256_andnot_ps(cmp, resultsa), _mm256_and_ps(cmp, a));
                     resultsb = _mm256_or_ps(_mm256_andnot_ps(cmp, resultsb), _mm256_and_ps(cmp, b));
                     resultsa2 = _mm256_or_ps(_mm256_andnot_ps(cmp2, resultsa2), _mm256_and_ps(cmp2, a2));
                     resultsb2 = _mm256_or_ps(_mm256_andnot_ps(cmp2, resultsb2), _mm256_and_ps(cmp2, b2));
                     resultsa3 = _mm256_or_ps(_mm256_andnot_ps(cmp3, resultsa3), _mm256_and_ps(cmp3, a3));
                     resultsb3 = _mm256_or_ps(_mm256_andnot_ps(cmp3, resultsb3), _mm256_and_ps(cmp3, b3));*/
-                    resultsa = _mm256_blendv_ps(resultsa, a, cmp); 
-                    resultsb = _mm256_blendv_ps(resultsb, b, cmp); 
-                    resultsa2 = _mm256_blendv_ps(resultsa2, a2, cmp2); 
-                    resultsb2 = _mm256_blendv_ps(resultsb2, b2, cmp2); 
-                    resultsa3 = _mm256_blendv_ps(resultsa3, a3, cmp3); 
-                    resultsb3 = _mm256_blendv_ps(resultsb3, b3, cmp3); 
+                    resultsa = _mm256_blendv_ps(resultsa, a, cmp);
+                    resultsb = _mm256_blendv_ps(resultsb, b, cmp);
+                    resultsa2 = _mm256_blendv_ps(resultsa2, a2, cmp2);
+                    resultsb2 = _mm256_blendv_ps(resultsb2, b2, cmp2);
+                    resultsa3 = _mm256_blendv_ps(resultsa3, a3, cmp3);
+                    resultsb3 = _mm256_blendv_ps(resultsb3, b3, cmp3);
+                    cmp = _mm256_cmp_ps(_mm256_fmadd_ps(a, a, bb), threshold, _CMP_LE_OQ);
+                    cmp2 = _mm256_cmp_ps(_mm256_fmadd_ps(a2, a2, bb2), threshold, _CMP_LE_OQ);
+                    cmp3 = _mm256_cmp_ps(_mm256_fmadd_ps(a3, a3, bb3), threshold, _CMP_LE_OQ);
                     adder = _mm256_and_ps(adder, cmp);
                     counter = _mm256_add_ps(counter, adder);
                     adder2 = _mm256_and_ps(adder2, cmp2);
@@ -180,12 +183,12 @@ void CpuGenerator<float, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mandel
             _mm256_store_ps(resb + 16, resultsb3);
             for (int k = 0; k < 24 && i + k < info.bWidth; k++) {
                 if (info.smooth) {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
                 }
                 else {
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter : ftRes[k];
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter : ftRes[k];
                 }
             }
         }
@@ -248,13 +251,13 @@ void CpuGenerator<double, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mande
             __m256d cx2 = info.julia ? juliaX : xs2;
             //__m256d cy2 = info.julia ? juliaY : ys;
 
+            __m256d cmp = _mm256_cmp_pd(threshold, threshold, _CMP_LE_OQ);
+            __m256d cmp2 = _mm256_cmp_pd(threshold, threshold, _CMP_LE_OQ);
             for (int k = 0; k < info.maxIter; k++) {
                 __m256d ab = _mm256_mul_pd(a, b);
                 __m256d bb = _mm256_mul_pd(b, b);
                 __m256d ab2 = _mm256_mul_pd(a2, b2);
                 __m256d bb2 = _mm256_mul_pd(b2, b2);
-                __m256d cmp = _mm256_cmp_pd(_mm256_fmadd_pd(a, a, bb), threshold, _CMP_LE_OQ);
-                __m256d cmp2 = _mm256_cmp_pd(_mm256_fmadd_pd(a2, a2, bb2), threshold, _CMP_LE_OQ);
                 a = _mm256_fmsub_pd(a, a, bb);
                 a = _mm256_add_pd(a, cx);
                 a2 = _mm256_fmsub_pd(a2, a2, bb2);
@@ -262,16 +265,13 @@ void CpuGenerator<double, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mande
                 b = _mm256_fmadd_pd(two, ab, cy);
                 b2 = _mm256_fmadd_pd(two, ab2, cy);
                 if (info.smooth) {
-                    /*resultsa = _mm256_or_pd(_mm256_andnot_pd(cmp, resultsa), _mm256_and_pd(cmp, a));
-                    resultsb = _mm256_or_pd(_mm256_andnot_pd(cmp, resultsb), _mm256_and_pd(cmp, b));
-                    resultsa2 = _mm256_or_pd(_mm256_andnot_pd(cmp2, resultsa2), _mm256_and_pd(cmp2, a2));
-                    resultsb2 = _mm256_or_pd(_mm256_andnot_pd(cmp2, resultsb2), _mm256_and_pd(cmp2, b2));*/
-
                     resultsa = _mm256_blendv_pd(resultsa, a, cmp);
                     resultsb = _mm256_blendv_pd(resultsb, b, cmp);
                     resultsa2 = _mm256_blendv_pd(resultsa2, a2, cmp2);
                     resultsb2 = _mm256_blendv_pd(resultsb2, b2, cmp2);
                 }
+                cmp = _mm256_cmp_pd(_mm256_fmadd_pd(a, a, bb), threshold, _CMP_LE_OQ);
+                cmp2 = _mm256_cmp_pd(_mm256_fmadd_pd(a2, a2, bb2), threshold, _CMP_LE_OQ);
                 adder = _mm256_and_pd(adder, cmp);
                 adder2 = _mm256_and_pd(adder2, cmp2);
                 counter = _mm256_add_pd(counter, adder);
@@ -296,11 +296,11 @@ void CpuGenerator<double, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mande
             _mm256_store_pd(ftRes, counter);
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter : float(ftRes[k]);
             }
 
             resa = (double*) &resultsa2;
@@ -309,11 +309,11 @@ void CpuGenerator<double, mnd::X86_AVX_FMA, parallel>::generate(const mnd::Mande
             i += 4;
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter : float(ftRes[k]);
             }
             i -= 4;
         }
@@ -573,17 +573,18 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const
             __m256d resultsa;
             __m256d resultsb;
 
+            __m256d cmp = _mm256_cmp_pd(threshold, threshold, _CMP_LE_OQ);
             for (int k = 0; k < info.maxIter; k++) {
                 AvxDoubleDouble aa = a * a;
                 AvxDoubleDouble bb = b * b;
                 AvxDoubleDouble abab = a * b; abab = abab + abab;
                 a = aa - bb + cx;
                 b = abab + cy;
-                __m256d cmp = _mm256_cmp_pd(_mm256_add_pd(aa.x[0], bb.x[0]), threshold, _CMP_LE_OQ);
                 if (info.smooth) {
                     resultsa = _mm256_blendv_pd(resultsa, a.x[0], cmp);
                     resultsb = _mm256_blendv_pd(resultsb, b.x[0], cmp);
                 }
+                cmp = _mm256_cmp_pd(_mm256_add_pd(aa.x[0], bb.x[0]), threshold, _CMP_LE_OQ);
                 adder = _mm256_and_pd(adder, cmp);
                 counter = _mm256_add_pd(counter, adder);
                 if (_mm256_testz_si256(_mm256_castpd_si256(cmp), _mm256_castpd_si256(cmp)) != 0) {
@@ -606,11 +607,11 @@ void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const
 
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] >= 0 ? float(ftRes[k]) : info.maxIter;
             }
         }
     }
@@ -771,17 +772,18 @@ void CpuGenerator<mnd::QuadDouble, mnd::X86_AVX_FMA, parallel>::generate(const m
             __m256d resultsa;
             __m256d resultsb;
 
+            __m256d cmp = _mm256_cmp_pd(threshold, threshold, _CMP_LE_OQ);
             for (int k = 0; k < info.maxIter; k++) {
                 AvxQuadDouble aa = a * a;
                 AvxQuadDouble bb = b * b;
                 AvxQuadDouble abab = a * b; abab = abab + abab;
                 a = aa - bb + cx;
                 b = abab + cy;
-                __m256d cmp = _mm256_cmp_pd(_mm256_add_pd(aa.x[0], bb.x[0]), threshold, _CMP_LE_OQ);
                 if (info.smooth) {
                     resultsa = _mm256_blendv_pd(resultsa, a.x[0], cmp);
                     resultsb = _mm256_blendv_pd(resultsb, b.x[0], cmp);
                 }
+                cmp = _mm256_cmp_pd(_mm256_add_pd(aa.x[0], bb.x[0]), threshold, _CMP_LE_OQ);
                 adder = _mm256_and_pd(adder, cmp);
                 counter = _mm256_add_pd(counter, adder);
                 if (_mm256_testz_si256(_mm256_castpd_si256(cmp), _mm256_castpd_si256(cmp)) != 0) {
@@ -804,11 +806,11 @@ void CpuGenerator<mnd::QuadDouble, mnd::X86_AVX_FMA, parallel>::generate(const m
 
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                         ftRes[k] >= info.maxIter ? info.maxIter :
                         ((float)ftRes[k]) + 1 - ::log(::log(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::log(2.0f);
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] >= 0 ? float(ftRes[k]) : info.maxIter;
             }
         }
     }

+ 17 - 13
libmandel/src/CpuGeneratorsSSE2.cpp

@@ -72,6 +72,8 @@ void CpuGenerator<float, mnd::X86_SSE2, parallel>::generate(const mnd::MandelInf
             __m128 resulta2 = { 0, 0, 0, 0 };
             __m128 resultb2 = { 0, 0, 0, 0 };
 
+            __m128 cmp = _mm_cmple_ps(threshold, threshold);
+            __m128 cmp2 = _mm_cmple_ps(threshold, threshold);
             for (int k = 0; k < info.maxIter; k++) {
                 __m128 aa = _mm_mul_ps(a, a);
                 __m128 aa2 = _mm_mul_ps(a2, a2);
@@ -83,14 +85,14 @@ void CpuGenerator<float, mnd::X86_SSE2, parallel>::generate(const mnd::MandelInf
                 b = _mm_add_ps(abab, cy);
                 a2 = _mm_add_ps(_mm_sub_ps(aa2, bb2), cx2);
                 b2 = _mm_add_ps(abab2, cy);
-                __m128 cmp = _mm_cmple_ps(_mm_add_ps(aa, bb), threshold);
-                __m128 cmp2 = _mm_cmple_ps(_mm_add_ps(aa2, bb2), threshold);
                 if (info.smooth) {
                     resulta = _mm_or_ps(_mm_andnot_ps(cmp, resulta), _mm_and_ps(cmp, a));
                     resultb = _mm_or_ps(_mm_andnot_ps(cmp, resultb), _mm_and_ps(cmp, b));
                     resulta2 = _mm_or_ps(_mm_andnot_ps(cmp2, resulta2), _mm_and_ps(cmp2, a2));
                     resultb2 = _mm_or_ps(_mm_andnot_ps(cmp2, resultb2), _mm_and_ps(cmp2, b2));
                 }
+                cmp = _mm_cmple_ps(_mm_add_ps(aa, bb), threshold);
+                cmp2 = _mm_cmple_ps(_mm_add_ps(aa2, bb2), threshold);
                 adder = _mm_and_ps(adder, cmp);
                 counter = _mm_add_ps(counter, adder);
                 adder2 = _mm_and_ps(adder2, cmp2);
@@ -121,11 +123,11 @@ void CpuGenerator<float, mnd::X86_SSE2, parallel>::generate(const mnd::MandelInf
             _mm_store_ps(resb + 4, resultb2);
             for (int k = 0; k < 8 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                     ftRes[k] >= info.maxIter ? info.maxIter :
                     ((float)ftRes[k]) + 1 - ::logf(::logf(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::logf(2.0f);
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] >= 0 ? float(ftRes[k]) : info.maxIter;
             }
         }
     }
@@ -177,17 +179,19 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
             __m128d cx = xs;
             __m128d cy = ys;
             __m128d cx2 = xs2;
-	    if (info.julia) {
-		cx = juliaX;
-		cx2 = juliaX;
-		cy = juliaY;
-	    }
+	        if (info.julia) {
+		        cx = juliaX;
+		        cx2 = juliaX;
+		        cy = juliaY;
+	        }
 
             __m128d resulta = { 0, 0 };
             __m128d resultb = { 0, 0 };
             __m128d resulta2 = { 0, 0 };
             __m128d resultb2 = { 0, 0 };
 
+            __m128d cmp = _mm_cmple_pd(threshold, threshold);
+            __m128d cmp2 = _mm_cmple_pd(threshold, threshold);
             for (int k = 0; k < info.maxIter; k++) {
                 __m128d aa = _mm_mul_pd(a, a);
                 __m128d aa2 = _mm_mul_pd(a2, a2);
@@ -199,14 +203,14 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
                 b = _mm_add_pd(abab, cy);
                 a2 = _mm_add_pd(_mm_sub_pd(aa2, bb2), cx2);
                 b2 = _mm_add_pd(abab2, cy);
-                __m128d cmp = _mm_cmple_pd(_mm_add_pd(aa, bb), threshold);
-                __m128d cmp2 = _mm_cmple_pd(_mm_add_pd(aa2, bb2), threshold);
                 if (info.smooth) {
                     resulta = _mm_or_pd(_mm_andnot_pd(cmp, resulta), _mm_and_pd(cmp, a));
                     resultb = _mm_or_pd(_mm_andnot_pd(cmp, resultb), _mm_and_pd(cmp, b));
                     resulta2 = _mm_or_pd(_mm_andnot_pd(cmp2, resulta2), _mm_and_pd(cmp2, a2));
                     resultb2 = _mm_or_pd(_mm_andnot_pd(cmp2, resultb2), _mm_and_pd(cmp2, b2));
                 }
+                cmp = _mm_cmple_pd(_mm_add_pd(aa, bb), threshold);
+                cmp2 = _mm_cmple_pd(_mm_add_pd(aa2, bb2), threshold);
                 adder = _mm_and_pd(adder, cmp);
                 counter = _mm_add_pd(counter, adder);
                 adder2 = _mm_and_pd(adder2, cmp2);
@@ -229,11 +233,11 @@ void CpuGenerator<double, mnd::X86_SSE2, parallel>::generate(const mnd::MandelIn
             _mm_storeu_pd(resb + 2, resultb2);
             for (int k = 0; k < 4 && i + k < info.bWidth; k++) {
                 if (info.smooth)
-                    data[i + k + j * info.bWidth] = ftRes[k] <= 0 ? info.maxIter :
+                    data[i + k + j * info.bWidth] = ftRes[k] < 0 ? info.maxIter :
                     ftRes[k] >= info.maxIter ? info.maxIter :
                     ((float)ftRes[k]) + 1 - ::logf(::logf(resa[k] * resa[k] + resb[k] * resb[k]) / 2) / ::logf(2.0f);
                 else
-                    data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+                    data[i + k + j * info.bWidth] = ftRes[k] >= 0 ? float(ftRes[k]) : info.maxIter;
             }
         }
     }

+ 16 - 16
libmandel/src/OpenClCode.cpp

@@ -12,36 +12,36 @@
 
 namespace mnd
 {
-    const char* getFloat_cl() {
-        return (char*) float_cl;
+    std::string getFloat_cl() {
+        return std::string{ (char*) float_cl, float_cl_len };
     }
 
-    const char* getDouble_cl() {
-        return (char*) double_cl;
+    std::string getDouble_cl() {
+        return std::string{ (char*) double_cl, double_cl_len };
     }
 
-    const char* getDoubleFloat_cl() {
-        return (char*) doublefloat_cl;
+    std::string getDoubleFloat_cl() {
+        return std::string{ (char*) doublefloat_cl, doublefloat_cl_len };
     }
 
-    const char* getDoubleDouble_cl() {
-        return (char*) doubledouble_cl;
+    std::string getDoubleDouble_cl() {
+        return std::string{ (char*) doubledouble_cl, doubledouble_cl_len };
     }
 
-    const char* getQuadDouble_cl() {
-        return (char*) quaddouble_cl;
+    std::string getQuadDouble_cl() {
+        return std::string{ (char*) quaddouble_cl, quaddouble_cl_len };
     }
 
-    const char* getFixed64_cl() {
-        return (char*) fixed64_cl;
+    std::string getFixed64_cl() {
+        return std::string{ (char*) fixed64_cl, fixed64_cl_len };
     }
 
-    const char* getFixed128_cl() {
-        return (char*) fixed128_cl;
+    std::string getFixed128_cl() {
+        return std::string{ (char*) fixed128_cl, fixed128_cl_len };
     }
 
-    const char* getFixed512_cl() {
-        return (char*) fixed512_cl;
+    std::string getFixed512_cl() {
+        return std::string{ (char*) fixed512_cl, fixed512_cl_len };
     }
 }
 

+ 2 - 3
libmandel/src/opencl/double.cl

@@ -13,9 +13,9 @@ __kernel void iterate(__global float* A, const int width, double xl, double yt,
        double aa = a * a;
        double bb = b * b;
        double ab = a * b;
-       if (aa + bb > 16) break;
        a = aa - bb + ca;
        b = ab + ab + cb;
+       if (aa + bb > 16) break;
        n++;
    }
 // N + 1 - log (log  |Z(N)|) / log 2
@@ -27,5 +27,4 @@ __kernel void iterate(__global float* A, const int width, double xl, double yt,
        else
            A[index] = ((float)n);
    }
-}
-
+}

+ 9 - 9
libmandel/src/opencl/double.h

@@ -49,13 +49,13 @@ unsigned char double_cl[] = {
   0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f,
   0x75, 0x62, 0x6c, 0x65, 0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x20,
   0x2a, 0x20, 0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x20,
-  0x3e, 0x20, 0x31, 0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20,
-  0x61, 0x61, 0x20, 0x2d, 0x20, 0x62, 0x62, 0x20, 0x2b, 0x20, 0x63, 0x61,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d,
-  0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x63,
-  0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b,
+  0x61, 0x20, 0x3d, 0x20, 0x61, 0x61, 0x20, 0x2d, 0x20, 0x62, 0x62, 0x20,
+  0x2b, 0x20, 0x63, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x61, 0x62,
+  0x20, 0x2b, 0x20, 0x63, 0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x20, 0x2b, 0x20, 0x62,
+  0x62, 0x20, 0x3e, 0x20, 0x31, 0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61,
+  0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b,
   0x2b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x2f, 0x2f, 0x20, 0x4e,
   0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x20, 0x28,
   0x6c, 0x6f, 0x67, 0x20, 0x20, 0x7c, 0x5a, 0x28, 0x4e, 0x29, 0x7c, 0x29,
@@ -78,6 +78,6 @@ unsigned char double_cl[] = {
   0x6c, 0x73, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20,
   0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d
 };
-unsigned int double_cl_len = 958;
+unsigned int double_cl_len = 956;

+ 1 - 1
libmandel/src/opencl/doubledouble.cl

@@ -76,10 +76,10 @@ __kernel void iterate(__global float* A, const int width,
         double2 aa = mul(a, a);
         double2 bb = mul(b, b);
         double2 ab = mul(a, b);
-        if (aa.s0 + bb.s0 > 16) break;
         double2 minusbb = (double2)(-bb.s0, -bb.s1);
         a = add(add(aa, minusbb), ca);
         b = add(add(ab, ab), cb);
+        if (aa.s0 + bb.s0 > 16) break;
         n++;
     }
 

+ 231 - 223
libmandel/src/opencl/doubledouble.h

@@ -2,241 +2,249 @@ unsigned char doubledouble_cl[] = {
   0x23, 0x70, 0x72, 0x61, 0x67, 0x6d, 0x61, 0x20, 0x4f, 0x50, 0x45, 0x4e,
   0x43, 0x4c, 0x20, 0x45, 0x58, 0x54, 0x45, 0x4e, 0x53, 0x49, 0x4f, 0x4e,
   0x20, 0x63, 0x6c, 0x5f, 0x6b, 0x68, 0x72, 0x5f, 0x66, 0x70, 0x36, 0x34,
-  0x20, 0x3a, 0x20, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x0a, 0x0a, 0x69,
-  0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x32, 0x20, 0x74, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x20, 0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c,
-  0x65, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64,
-  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x61, 0x20,
-  0x2b, 0x20, 0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x20, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x73, 0x20, 0x2d,
-  0x20, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x20, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x61, 0x20, 0x2d, 0x20,
-  0x28, 0x73, 0x20, 0x2d, 0x20, 0x62, 0x62, 0x29, 0x29, 0x20, 0x2b, 0x20,
-  0x28, 0x62, 0x20, 0x2d, 0x20, 0x62, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x64, 0x6f,
-  0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28, 0x73, 0x2c, 0x20, 0x65, 0x29,
-  0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x71, 0x75, 0x69, 0x63,
-  0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x20, 0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f,
-  0x75, 0x62, 0x6c, 0x65, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x61, 0x20, 0x2b,
-  0x20, 0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x20, 0x65, 0x20, 0x3d, 0x20, 0x62, 0x20, 0x2d, 0x20, 0x28,
-  0x73, 0x20, 0x2d, 0x20, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x3a, 0x20, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x0d, 0x0a, 0x0d,
+  0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64, 0x6f, 0x75, 0x62,
+  0x6c, 0x65, 0x32, 0x20, 0x74, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x64,
+  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75,
+  0x62, 0x6c, 0x65, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x73, 0x20, 0x3d,
+  0x20, 0x61, 0x20, 0x2b, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x62, 0x62, 0x20, 0x3d,
+  0x20, 0x73, 0x20, 0x2d, 0x20, 0x61, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x65, 0x20, 0x3d, 0x20,
+  0x28, 0x61, 0x20, 0x2d, 0x20, 0x28, 0x73, 0x20, 0x2d, 0x20, 0x62, 0x62,
+  0x29, 0x29, 0x20, 0x2b, 0x20, 0x28, 0x62, 0x20, 0x2d, 0x20, 0x62, 0x62,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75,
+  0x72, 0x6e, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x29,
+  0x28, 0x73, 0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a,
+  0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64, 0x6f, 0x75,
+  0x62, 0x6c, 0x65, 0x32, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x54, 0x77,
+  0x6f, 0x53, 0x75, 0x6d, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20,
+  0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x62, 0x29,
+  0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62,
+  0x6c, 0x65, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x61, 0x20, 0x2b, 0x20, 0x62,
+  0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c,
+  0x65, 0x20, 0x65, 0x20, 0x3d, 0x20, 0x62, 0x20, 0x2d, 0x20, 0x28, 0x73,
+  0x20, 0x2d, 0x20, 0x61, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
   0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x32, 0x29, 0x28, 0x73, 0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0a,
-  0x7d, 0x0a, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64, 0x6f,
-  0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x74, 0x77, 0x6f, 0x50, 0x72, 0x6f,
-  0x64, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x61, 0x2c, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a,
-  0x2f, 0x2f, 0x23, 0x69, 0x66, 0x64, 0x65, 0x66, 0x20, 0x51, 0x44, 0x5f,
-  0x46, 0x4d, 0x53, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x20, 0x70, 0x20, 0x3d, 0x20, 0x61, 0x20, 0x2a, 0x20, 0x62,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x20, 0x65, 0x20, 0x3d, 0x20, 0x66, 0x6d, 0x61, 0x28, 0x61, 0x2c, 0x20,
-  0x62, 0x2c, 0x20, 0x2d, 0x70, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x32, 0x29, 0x28, 0x70, 0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0a,
-  0x2f, 0x2f, 0x23, 0x65, 0x6c, 0x73, 0x65, 0x0a, 0x2f, 0x2f, 0x20, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x61, 0x5f, 0x68, 0x69, 0x2c,
-  0x20, 0x61, 0x5f, 0x6c, 0x6f, 0x2c, 0x20, 0x62, 0x5f, 0x68, 0x69, 0x2c,
-  0x20, 0x62, 0x5f, 0x6c, 0x6f, 0x3b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x64,
-  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x70, 0x20, 0x3d, 0x20, 0x61, 0x20,
-  0x2a, 0x20, 0x62, 0x3b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x73, 0x70, 0x6c,
-  0x69, 0x74, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x5f, 0x68, 0x69, 0x2c, 0x20,
-  0x61, 0x5f, 0x6c, 0x6f, 0x29, 0x3b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x73,
+  0x6c, 0x65, 0x32, 0x29, 0x28, 0x73, 0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0d,
+  0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65,
+  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x74, 0x77, 0x6f,
+  0x50, 0x72, 0x6f, 0x64, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20,
+  0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x62, 0x29,
+  0x20, 0x7b, 0x0d, 0x0a, 0x2f, 0x2f, 0x23, 0x69, 0x66, 0x64, 0x65, 0x66,
+  0x20, 0x51, 0x44, 0x5f, 0x46, 0x4d, 0x53, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x70, 0x20, 0x3d, 0x20,
+  0x61, 0x20, 0x2a, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x65, 0x20, 0x3d, 0x20, 0x66,
+  0x6d, 0x61, 0x28, 0x61, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x2d, 0x70, 0x29,
+  0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
+  0x6e, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28,
+  0x70, 0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x2f, 0x2f, 0x23, 0x65,
+  0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x64, 0x6f, 0x75,
+  0x62, 0x6c, 0x65, 0x20, 0x61, 0x5f, 0x68, 0x69, 0x2c, 0x20, 0x61, 0x5f,
+  0x6c, 0x6f, 0x2c, 0x20, 0x62, 0x5f, 0x68, 0x69, 0x2c, 0x20, 0x62, 0x5f,
+  0x6c, 0x6f, 0x3b, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x64, 0x6f, 0x75,
+  0x62, 0x6c, 0x65, 0x20, 0x70, 0x20, 0x3d, 0x20, 0x61, 0x20, 0x2a, 0x20,
+  0x62, 0x3b, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x73, 0x70, 0x6c, 0x69,
+  0x74, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x5f, 0x68, 0x69, 0x2c, 0x20, 0x61,
+  0x5f, 0x6c, 0x6f, 0x29, 0x3b, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x73,
   0x70, 0x6c, 0x69, 0x74, 0x28, 0x62, 0x2c, 0x20, 0x62, 0x5f, 0x68, 0x69,
-  0x2c, 0x20, 0x62, 0x5f, 0x6c, 0x6f, 0x29, 0x3b, 0x0a, 0x2f, 0x2f, 0x20,
-  0x20, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x61, 0x5f, 0x68,
-  0x69, 0x20, 0x2a, 0x20, 0x62, 0x5f, 0x68, 0x69, 0x20, 0x2d, 0x20, 0x70,
-  0x29, 0x20, 0x2b, 0x20, 0x61, 0x5f, 0x68, 0x69, 0x20, 0x2a, 0x20, 0x62,
-  0x5f, 0x6c, 0x6f, 0x20, 0x2b, 0x20, 0x61, 0x5f, 0x6c, 0x6f, 0x20, 0x2a,
-  0x20, 0x62, 0x5f, 0x68, 0x69, 0x29, 0x20, 0x2b, 0x20, 0x61, 0x5f, 0x6c,
-  0x6f, 0x20, 0x2a, 0x20, 0x62, 0x5f, 0x6c, 0x6f, 0x3b, 0x0a, 0x2f, 0x2f,
-  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x70, 0x3b, 0x0a,
-  0x2f, 0x2f, 0x23, 0x65, 0x6e, 0x64, 0x69, 0x66, 0x0a, 0x7d, 0x0a, 0x0a,
-  0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c,
-  0x65, 0x32, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c,
+  0x2c, 0x20, 0x62, 0x5f, 0x6c, 0x6f, 0x29, 0x3b, 0x0d, 0x0a, 0x2f, 0x2f,
+  0x20, 0x20, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x61, 0x5f,
+  0x68, 0x69, 0x20, 0x2a, 0x20, 0x62, 0x5f, 0x68, 0x69, 0x20, 0x2d, 0x20,
+  0x70, 0x29, 0x20, 0x2b, 0x20, 0x61, 0x5f, 0x68, 0x69, 0x20, 0x2a, 0x20,
+  0x62, 0x5f, 0x6c, 0x6f, 0x20, 0x2b, 0x20, 0x61, 0x5f, 0x6c, 0x6f, 0x20,
+  0x2a, 0x20, 0x62, 0x5f, 0x68, 0x69, 0x29, 0x20, 0x2b, 0x20, 0x61, 0x5f,
+  0x6c, 0x6f, 0x20, 0x2a, 0x20, 0x62, 0x5f, 0x6c, 0x6f, 0x3b, 0x0d, 0x0a,
+  0x2f, 0x2f, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x70,
+  0x3b, 0x0d, 0x0a, 0x2f, 0x2f, 0x23, 0x65, 0x6e, 0x64, 0x69, 0x66, 0x0d,
+  0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65,
+  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x6d, 0x75, 0x6c,
+  0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x2c, 0x20,
+  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x62, 0x29, 0x20, 0x7b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x32, 0x20, 0x70, 0x20, 0x3d, 0x20, 0x74, 0x77, 0x6f, 0x50, 0x72, 0x6f,
+  0x64, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x62, 0x2e, 0x73, 0x30,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x2e, 0x73, 0x31,
+  0x20, 0x2b, 0x3d, 0x20, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20,
+  0x62, 0x2e, 0x73, 0x31, 0x20, 0x2b, 0x20, 0x61, 0x2e, 0x73, 0x31, 0x20,
+  0x2a, 0x20, 0x62, 0x2e, 0x73, 0x30, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x71, 0x75, 0x69,
+  0x63, 0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x70, 0x2e, 0x73,
+  0x30, 0x2c, 0x20, 0x70, 0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x7d,
+  0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64,
+  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x73, 0x71, 0x28, 0x64, 0x6f,
+  0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x29, 0x20, 0x7b, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20,
+  0x70, 0x20, 0x3d, 0x20, 0x74, 0x77, 0x6f, 0x50, 0x72, 0x6f, 0x64, 0x28,
+  0x61, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x61, 0x2e, 0x73, 0x30, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x20, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20,
+  0x61, 0x2e, 0x73, 0x31, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70,
+  0x2e, 0x73, 0x31, 0x20, 0x2b, 0x3d, 0x20, 0x65, 0x20, 0x2b, 0x20, 0x65,
+  0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
+  0x6e, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75,
+  0x6d, 0x28, 0x70, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x70, 0x2e, 0x73, 0x31,
+  0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c,
+  0x69, 0x6e, 0x65, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20,
+  0x61, 0x64, 0x64, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20,
+  0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x62,
+  0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75,
+  0x62, 0x6c, 0x65, 0x32, 0x20, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x74, 0x77,
+  0x6f, 0x53, 0x75, 0x6d, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x62,
+  0x2e, 0x73, 0x30, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x73,
+  0x65, 0x2e, 0x73, 0x31, 0x20, 0x2b, 0x3d, 0x20, 0x61, 0x2e, 0x73, 0x31,
+  0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73, 0x31, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x71, 0x75, 0x69,
+  0x63, 0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x73, 0x65, 0x2e,
+  0x73, 0x30, 0x2c, 0x20, 0x73, 0x65, 0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0d,
+  0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65,
+  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x6d, 0x75, 0x6c,
+  0x44, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c,
   0x65, 0x32, 0x20, 0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x32, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64,
+  0x20, 0x62, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64,
   0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x70, 0x20, 0x3d, 0x20, 0x74,
   0x77, 0x6f, 0x50, 0x72, 0x6f, 0x64, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x2c,
-  0x20, 0x62, 0x2e, 0x73, 0x30, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x70, 0x2e, 0x73, 0x31, 0x20, 0x2b, 0x3d, 0x20, 0x28, 0x61, 0x2e, 0x73,
-  0x30, 0x20, 0x2a, 0x20, 0x62, 0x2e, 0x73, 0x31, 0x20, 0x2b, 0x20, 0x61,
-  0x2e, 0x73, 0x31, 0x20, 0x2a, 0x20, 0x62, 0x2e, 0x73, 0x30, 0x29, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
-  0x71, 0x75, 0x69, 0x63, 0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28,
-  0x70, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x70, 0x2e, 0x73, 0x31, 0x29, 0x3b,
-  0x0a, 0x7d, 0x0a, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64,
-  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x73, 0x71, 0x28, 0x64, 0x6f,
-  0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x70,
-  0x20, 0x3d, 0x20, 0x74, 0x77, 0x6f, 0x50, 0x72, 0x6f, 0x64, 0x28, 0x61,
-  0x2e, 0x73, 0x30, 0x2c, 0x20, 0x61, 0x2e, 0x73, 0x30, 0x29, 0x3b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x65,
-  0x20, 0x3d, 0x20, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x61, 0x2e,
-  0x73, 0x31, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x2e, 0x73, 0x31,
-  0x20, 0x2b, 0x3d, 0x20, 0x65, 0x20, 0x2b, 0x20, 0x65, 0x3b, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x71, 0x75,
-  0x69, 0x63, 0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x70, 0x2e,
-  0x73, 0x30, 0x2c, 0x20, 0x70, 0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0a, 0x7d,
-  0x0a, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x64, 0x64, 0x28, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x32, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x73, 0x65, 0x20,
-  0x3d, 0x20, 0x74, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x61, 0x2e, 0x73,
-  0x30, 0x2c, 0x20, 0x62, 0x2e, 0x73, 0x30, 0x29, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x73, 0x65, 0x2e, 0x73, 0x31, 0x20, 0x2b, 0x3d, 0x20, 0x61,
-  0x2e, 0x73, 0x31, 0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73, 0x31, 0x3b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x71,
-  0x75, 0x69, 0x63, 0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x73,
-  0x65, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x73, 0x65, 0x2e, 0x73, 0x31, 0x29,
-  0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x6d, 0x75, 0x6c, 0x44,
-  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x32, 0x20, 0x61, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20,
-  0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x32, 0x20, 0x70, 0x20, 0x3d, 0x20, 0x74, 0x77, 0x6f,
-  0x50, 0x72, 0x6f, 0x64, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x62,
-  0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x2e, 0x73, 0x31, 0x20,
-  0x2b, 0x3d, 0x20, 0x61, 0x2e, 0x73, 0x31, 0x20, 0x2a, 0x20, 0x62, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
-  0x71, 0x75, 0x69, 0x63, 0x6b, 0x54, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28,
-  0x70, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x70, 0x2e, 0x73, 0x31, 0x29, 0x3b,
-  0x0a, 0x7d, 0x0a, 0x0a, 0x5f, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
-  0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x69, 0x74, 0x65, 0x72, 0x61, 0x74,
-  0x65, 0x28, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66,
-  0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x41, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
-  0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x77, 0x69, 0x64, 0x74, 0x68,
-  0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x78, 0x31, 0x2c, 0x20, 0x64,
-  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x78, 0x32, 0x2c, 0x20, 0x64, 0x6f,
-  0x75, 0x62, 0x6c, 0x65, 0x20, 0x79, 0x31, 0x2c, 0x20, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x20, 0x79, 0x32, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x70, 0x2e,
+  0x73, 0x31, 0x20, 0x2b, 0x3d, 0x20, 0x61, 0x2e, 0x73, 0x31, 0x20, 0x2a,
+  0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74,
+  0x75, 0x72, 0x6e, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x54, 0x77, 0x6f,
+  0x53, 0x75, 0x6d, 0x28, 0x70, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x70, 0x2e,
+  0x73, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x5f,
+  0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64,
+  0x20, 0x69, 0x74, 0x65, 0x72, 0x61, 0x74, 0x65, 0x28, 0x5f, 0x5f, 0x67,
+  0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a,
+  0x20, 0x41, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e,
+  0x74, 0x20, 0x77, 0x69, 0x64, 0x74, 0x68, 0x2c, 0x0d, 0x0a, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x20, 0x70, 0x77, 0x31, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x20, 0x70, 0x77, 0x32, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x20, 0x70, 0x68, 0x31, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x20, 0x70, 0x68, 0x32, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x61,
-  0x78, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x73, 0x6d, 0x6f, 0x6f, 0x74,
-  0x68, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62,
+  0x6c, 0x65, 0x20, 0x78, 0x31, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c,
+  0x65, 0x20, 0x78, 0x32, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x20, 0x79, 0x31, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20,
+  0x79, 0x32, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x69, 0x6e, 0x74, 0x20, 0x6a, 0x75, 0x6c, 0x69, 0x61, 0x2c, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x78, 0x31, 0x2c, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x78, 0x32, 0x2c, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x79, 0x31, 0x2c, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x79, 0x32, 0x29, 0x20,
-  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x6e,
-  0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c,
-  0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x70, 0x77,
+  0x31, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x70, 0x77,
+  0x32, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x70, 0x68,
+  0x31, 0x2c, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x20, 0x70, 0x68,
+  0x32, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x61, 0x78, 0x2c, 0x20,
+  0x69, 0x6e, 0x74, 0x20, 0x73, 0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x2c, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
+  0x6e, 0x74, 0x20, 0x6a, 0x75, 0x6c, 0x69, 0x61, 0x2c, 0x20, 0x64, 0x6f,
+  0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x78, 0x31, 0x2c, 0x20, 0x64, 0x6f,
+  0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x78, 0x32, 0x2c, 0x20, 0x64, 0x6f,
+  0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x79, 0x31, 0x2c, 0x20, 0x64, 0x6f,
+  0x75, 0x62, 0x6c, 0x65, 0x20, 0x6a, 0x79, 0x32, 0x29, 0x20, 0x7b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x64,
+  0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f,
+  0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0d, 0x0a,
   0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x78, 0x20, 0x3d,
   0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x25, 0x20, 0x77, 0x69, 0x64,
-  0x74, 0x68, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20,
-  0x70, 0x79, 0x20, 0x3d, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x2f,
-  0x20, 0x77, 0x69, 0x64, 0x74, 0x68, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x78, 0x6c, 0x20,
-  0x3d, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28,
-  0x78, 0x31, 0x2c, 0x20, 0x78, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x79, 0x74, 0x20,
-  0x3d, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28,
-  0x79, 0x31, 0x2c, 0x20, 0x79, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x70, 0x69, 0x78,
-  0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58, 0x20, 0x3d, 0x20, 0x28,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28, 0x70, 0x77, 0x31,
-  0x2c, 0x20, 0x70, 0x77, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x70, 0x69, 0x78, 0x65,
-  0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59, 0x20, 0x3d, 0x20, 0x28, 0x64,
-  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28, 0x70, 0x68, 0x31, 0x2c,
-  0x20, 0x70, 0x68, 0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64,
-  0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61,
-  0x64, 0x64, 0x28, 0x6d, 0x75, 0x6c, 0x44, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x28, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58,
-  0x2c, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x29, 0x20, 0x70,
-  0x78, 0x29, 0x2c, 0x20, 0x78, 0x6c, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20,
-  0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58, 0x20,
-  0x2a, 0x20, 0x70, 0x78, 0x20, 0x2b, 0x20, 0x78, 0x6c, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x62, 0x20,
-  0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x6d, 0x75, 0x6c, 0x44, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x28, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61,
-  0x6c, 0x65, 0x59, 0x2c, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
-  0x29, 0x20, 0x70, 0x79, 0x29, 0x2c, 0x20, 0x79, 0x74, 0x29, 0x3b, 0x20,
-  0x2f, 0x2f, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c,
-  0x65, 0x59, 0x20, 0x2a, 0x20, 0x70, 0x79, 0x20, 0x2b, 0x20, 0x79, 0x74,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32,
-  0x20, 0x63, 0x61, 0x20, 0x3d, 0x20, 0x6a, 0x75, 0x6c, 0x69, 0x61, 0x20,
-  0x21, 0x3d, 0x20, 0x30, 0x20, 0x3f, 0x20, 0x28, 0x28, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x32, 0x29, 0x20, 0x28, 0x6a, 0x78, 0x31, 0x2c, 0x20,
-  0x6a, 0x78, 0x32, 0x29, 0x29, 0x20, 0x3a, 0x20, 0x61, 0x3b, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x63,
-  0x62, 0x20, 0x3d, 0x20, 0x6a, 0x75, 0x6c, 0x69, 0x61, 0x20, 0x21, 0x3d,
-  0x20, 0x30, 0x20, 0x3f, 0x20, 0x28, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c,
-  0x65, 0x32, 0x29, 0x20, 0x28, 0x6a, 0x79, 0x31, 0x2c, 0x20, 0x6a, 0x79,
-  0x32, 0x29, 0x29, 0x20, 0x3a, 0x20, 0x62, 0x3b, 0x0a, 0x0a, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6e, 0x20, 0x3d, 0x20, 0x30,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65, 0x20,
-  0x28, 0x6e, 0x20, 0x3c, 0x20, 0x6d, 0x61, 0x78, 0x20, 0x2d, 0x20, 0x31,
-  0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x61, 0x20, 0x3d,
-  0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x29, 0x3b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62,
-  0x6c, 0x65, 0x32, 0x20, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c,
-  0x28, 0x62, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x74, 0x68, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74,
+  0x20, 0x70, 0x79, 0x20, 0x3d, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20,
+  0x2f, 0x20, 0x77, 0x69, 0x64, 0x74, 0x68, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a,
   0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20,
-  0x61, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20,
-  0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20,
-  0x62, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x3e, 0x20, 0x31, 0x36, 0x29, 0x20,
-  0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x6d,
-  0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x64, 0x6f,
-  0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28, 0x2d, 0x62, 0x62, 0x2e, 0x73,
-  0x30, 0x2c, 0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20,
-  0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x61, 0x2c, 0x20,
-  0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x61,
-  0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62,
-  0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61,
-  0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x2f, 0x2f, 0x20, 0x4e, 0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20,
-  0x6c, 0x6f, 0x67, 0x20, 0x28, 0x6c, 0x6f, 0x67, 0x20, 0x20, 0x7c, 0x5a,
-  0x28, 0x4e, 0x29, 0x7c, 0x29, 0x20, 0x2f, 0x20, 0x6c, 0x6f, 0x67, 0x20,
-  0x32, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6e, 0x20,
-  0x3e, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x20, 0x2d, 0x20, 0x31, 0x29, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e,
-  0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x3b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x73,
-  0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x29, 0x0a,
+  0x78, 0x6c, 0x20, 0x3d, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x32, 0x29, 0x28, 0x78, 0x31, 0x2c, 0x20, 0x78, 0x32, 0x29, 0x3b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32,
+  0x20, 0x79, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c,
+  0x65, 0x32, 0x29, 0x28, 0x79, 0x31, 0x2c, 0x20, 0x79, 0x32, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x32, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65,
+  0x58, 0x20, 0x3d, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32,
+  0x29, 0x28, 0x70, 0x77, 0x31, 0x2c, 0x20, 0x70, 0x77, 0x32, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x32, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65,
+  0x59, 0x20, 0x3d, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32,
+  0x29, 0x28, 0x70, 0x68, 0x31, 0x2c, 0x20, 0x70, 0x68, 0x32, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x32, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x6d, 0x75,
+  0x6c, 0x44, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x28, 0x70, 0x69, 0x78, 0x65,
+  0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58, 0x2c, 0x20, 0x28, 0x64, 0x6f,
+  0x75, 0x62, 0x6c, 0x65, 0x29, 0x20, 0x70, 0x78, 0x29, 0x2c, 0x20, 0x78,
+  0x6c, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c,
+  0x53, 0x63, 0x61, 0x6c, 0x65, 0x58, 0x20, 0x2a, 0x20, 0x70, 0x78, 0x20,
+  0x2b, 0x20, 0x78, 0x6c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f,
+  0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x64,
+  0x64, 0x28, 0x6d, 0x75, 0x6c, 0x44, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x28,
+  0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59, 0x2c,
+  0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x29, 0x20, 0x70, 0x79,
+  0x29, 0x2c, 0x20, 0x79, 0x74, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x70,
+  0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59, 0x20, 0x2a,
+  0x20, 0x70, 0x79, 0x20, 0x2b, 0x20, 0x79, 0x74, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x63, 0x61,
+  0x20, 0x3d, 0x20, 0x6a, 0x75, 0x6c, 0x69, 0x61, 0x20, 0x21, 0x3d, 0x20,
+  0x30, 0x20, 0x3f, 0x20, 0x28, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x32, 0x29, 0x20, 0x28, 0x6a, 0x78, 0x31, 0x2c, 0x20, 0x6a, 0x78, 0x32,
+  0x29, 0x29, 0x20, 0x3a, 0x20, 0x61, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x63, 0x62, 0x20,
+  0x3d, 0x20, 0x6a, 0x75, 0x6c, 0x69, 0x61, 0x20, 0x21, 0x3d, 0x20, 0x30,
+  0x20, 0x3f, 0x20, 0x28, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32,
+  0x29, 0x20, 0x28, 0x6a, 0x79, 0x31, 0x2c, 0x20, 0x6a, 0x79, 0x32, 0x29,
+  0x29, 0x20, 0x3a, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6e, 0x20, 0x3d, 0x20,
+  0x30, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c,
+  0x65, 0x20, 0x28, 0x6e, 0x20, 0x3c, 0x20, 0x6d, 0x61, 0x78, 0x20, 0x2d,
+  0x20, 0x31, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x61,
+  0x61, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x61,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x20, 0x62, 0x62, 0x20, 0x3d,
+  0x20, 0x6d, 0x75, 0x6c, 0x28, 0x62, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75,
+  0x62, 0x6c, 0x65, 0x32, 0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75,
+  0x6c, 0x28, 0x61, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x32, 0x20, 0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x20, 0x3d, 0x20,
+  0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x32, 0x29, 0x28, 0x2d, 0x62,
+  0x62, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x31,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x61, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28,
+  0x61, 0x61, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x29,
+  0x2c, 0x20, 0x63, 0x61, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28,
+  0x61, 0x64, 0x64, 0x28, 0x61, 0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c,
+  0x20, 0x63, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x2e, 0x73, 0x30,
+  0x20, 0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x3e, 0x20, 0x31,
+  0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x2f, 0x2f, 0x20, 0x4e, 0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d,
+  0x20, 0x6c, 0x6f, 0x67, 0x20, 0x28, 0x6c, 0x6f, 0x67, 0x20, 0x20, 0x7c,
+  0x5a, 0x28, 0x4e, 0x29, 0x7c, 0x29, 0x20, 0x2f, 0x20, 0x6c, 0x6f, 0x67,
+  0x20, 0x32, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
+  0x6e, 0x20, 0x3e, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x20, 0x2d, 0x20, 0x31,
+  0x29, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41,
+  0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x6d, 0x61,
+  0x78, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65,
+  0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x69, 0x66, 0x20, 0x28, 0x73, 0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x20, 0x21,
+  0x3d, 0x20, 0x30, 0x29, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65,
+  0x78, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74,
+  0x29, 0x20, 0x6e, 0x29, 0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c,
+  0x6f, 0x67, 0x28, 0x6c, 0x6f, 0x67, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x20,
+  0x2a, 0x20, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73,
+  0x30, 0x20, 0x2a, 0x20, 0x62, 0x2e, 0x73, 0x30, 0x29, 0x20, 0x2f, 0x20,
+  0x32, 0x29, 0x20, 0x2f, 0x20, 0x6c, 0x6f, 0x67, 0x28, 0x32, 0x2e, 0x30,
+  0x66, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64,
+  0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x29, 0x20, 0x6e, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x7d, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x28,
-  0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x20, 0x6e, 0x29, 0x20, 0x2b,
-  0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x28, 0x6c, 0x6f, 0x67,
-  0x28, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x61, 0x2e, 0x73, 0x30,
-  0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x62, 0x2e,
-  0x73, 0x30, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x29, 0x20, 0x2f, 0x20, 0x6c,
-  0x6f, 0x67, 0x28, 0x32, 0x2e, 0x30, 0x66, 0x29, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41,
-  0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28,
-  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20,
-  0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29, 0x20, 0x2b,
-  0x20, 0x31, 0x20, 0x2d, 0x20, 0x28, 0x61, 0x20, 0x2a, 0x20, 0x61, 0x20,
-  0x2b, 0x20, 0x62, 0x20, 0x2a, 0x20, 0x62, 0x20, 0x2d, 0x20, 0x31, 0x36,
-  0x29, 0x20, 0x2f, 0x20, 0x28, 0x32, 0x35, 0x36, 0x20, 0x2d, 0x20, 0x31,
-  0x36, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x20,
+  0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29, 0x20, 0x2b, 0x20,
+  0x31, 0x20, 0x2d, 0x20, 0x28, 0x61, 0x20, 0x2a, 0x20, 0x61, 0x20, 0x2b,
+  0x20, 0x62, 0x20, 0x2a, 0x20, 0x62, 0x20, 0x2d, 0x20, 0x31, 0x36, 0x29,
+  0x20, 0x2f, 0x20, 0x28, 0x32, 0x35, 0x36, 0x20, 0x2d, 0x20, 0x31, 0x36,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x67,
   0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64,
-  0x28, 0x30, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x35, 0x3b, 0x0a, 0x7d
+  0x28, 0x30, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x35, 0x3b, 0x0d, 0x0a, 0x7d
 };
-unsigned int doubledouble_cl_len = 2867;
+unsigned int doubledouble_cl_len = 2964;

+ 2 - 2
libmandel/src/opencl/doublefloat.cl

@@ -110,10 +110,10 @@ __kernel void iterate(__global float* A, const int width,
         float2 aa = sq(a);
         float2 bb = sq(b);
         float2 ab = mul(a, b);
-        if (aa.s0 + bb.s0 > 16) break;
         float2 minusbb = (float2)(-bb.s0, -bb.s1);
         a = add(add(aa, minusbb), ca);
         b = add(add(ab, ab), cb);
+        if (aa.s0 + bb.s0 > 16) break;
         n++;
     }
 
@@ -126,4 +126,4 @@ __kernel void iterate(__global float* A, const int width,
         else
             A[index] = ((float)n);
     }
-}
+}

+ 16 - 16
libmandel/src/opencl/doublefloat.h

@@ -259,20 +259,20 @@ unsigned char doublefloat_cl[] = {
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32,
   0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c,
   0x20, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x2e, 0x73, 0x30, 0x20,
-  0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x3e, 0x20, 0x31, 0x36,
-  0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32,
-  0x20, 0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x28,
-  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x29, 0x28, 0x2d, 0x62, 0x62, 0x2e,
-  0x73, 0x30, 0x2c, 0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x31, 0x29, 0x3b,
-  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20,
-  0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x61,
-  0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x29, 0x2c, 0x20,
-  0x63, 0x61, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64,
-  0x64, 0x28, 0x61, 0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63,
-  0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x6d, 0x69, 0x6e,
+  0x75, 0x73, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x32, 0x29, 0x28, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x2c, 0x20,
+  0x2d, 0x62, 0x62, 0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64,
+  0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x61, 0x2c, 0x20, 0x6d, 0x69,
+  0x6e, 0x75, 0x73, 0x62, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x61, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20,
+  0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x62,
+  0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
+  0x28, 0x61, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x2e,
+  0x73, 0x30, 0x20, 0x3e, 0x20, 0x31, 0x36, 0x29, 0x20, 0x62, 0x72, 0x65,
+  0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
   0x0d, 0x0a, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x4e,
   0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x20, 0x28,
@@ -297,6 +297,6 @@ unsigned char doublefloat_cl[] = {
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20,
   0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29, 0x3b, 0x0d,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x7d
 };
-unsigned int doublefloat_cl_len = 3587;
+unsigned int doublefloat_cl_len = 3585;

+ 2 - 2
libmandel/src/opencl/fixed128.cl

@@ -73,9 +73,9 @@ __kernel void iterate(__global float* A, const int width,
         long2 aa = mul(a, a);
         long2 bb = mul(b, b);
         long2 ab = mul(a, b);
-        if (aa[0] + bb[0] > (16LL << 48)) break;
         a = add(sub(aa, bb), ca);
         b = add(add(ab, ab), cb);
+        if (aa[0] + bb[0] > (16LL << 48)) break;
         n++;
     }
 
@@ -91,4 +91,4 @@ __kernel void iterate(__global float* A, const int width,
         else
             A[index] = ((float)n);
     }
-}
+}

+ 12 - 12
libmandel/src/opencl/fixed128.h

@@ -170,16 +170,16 @@ unsigned char fixed128_cl[] = {
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x32, 0x20,
   0x61, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20,
   0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x5b, 0x30, 0x5d, 0x20, 0x2b,
-  0x20, 0x62, 0x62, 0x5b, 0x30, 0x5d, 0x20, 0x3e, 0x20, 0x28, 0x31, 0x36,
-  0x4c, 0x4c, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x38, 0x29, 0x29, 0x20, 0x62,
-  0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x73,
-  0x75, 0x62, 0x28, 0x61, 0x61, 0x2c, 0x20, 0x62, 0x62, 0x29, 0x2c, 0x20,
-  0x63, 0x61, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64,
-  0x64, 0x28, 0x61, 0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63,
-  0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x73, 0x75, 0x62,
+  0x28, 0x61, 0x61, 0x2c, 0x20, 0x62, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x61,
+  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x62, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28,
+  0x61, 0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x62, 0x29,
+  0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
+  0x66, 0x20, 0x28, 0x61, 0x61, 0x5b, 0x30, 0x5d, 0x20, 0x2b, 0x20, 0x62,
+  0x62, 0x5b, 0x30, 0x5d, 0x20, 0x3e, 0x20, 0x28, 0x31, 0x36, 0x4c, 0x4c,
+  0x20, 0x3c, 0x3c, 0x20, 0x34, 0x38, 0x29, 0x29, 0x20, 0x62, 0x72, 0x65,
+  0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
   0x0d, 0x0a, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x4e,
   0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x20, 0x28,
@@ -221,6 +221,6 @@ unsigned char fixed128_cl[] = {
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e,
   0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f,
   0x61, 0x74, 0x29, 0x6e, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a
+  0x7d, 0x0d, 0x0a, 0x7d
 };
-unsigned int fixed128_cl_len = 2670;
+unsigned int fixed128_cl_len = 2668;

+ 2 - 2
libmandel/src/opencl/fixed512.cl

@@ -98,9 +98,9 @@ __kernel void iterate(__global float* A, const int width,
         ulong2 aa = mul(a, a);
         ulong2 bb = mul(b, b);
         ulong2 ab = mul(a, b);
-        if (aa.s0 + aa.s1 + bb.s0 + bb.s1 > 16) break;
         a = add(sub(aa, bb), ca);
         b = add(add(ab, ab), cb);
+        if (aa.s0 + aa.s1 + bb.s0 + bb.s1 > 16) break;
         n++;
     }
 
@@ -118,4 +118,4 @@ __kernel void iterate(__global float* A, const int width,
     }
     //               A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);
     //           A[get_global_id(0)] = 5;
-}
+}

+ 12 - 12
libmandel/src/opencl/fixed512.h

@@ -227,16 +227,16 @@ unsigned char fixed512_cl[] = {
   0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x75,
   0x6c, 0x6f, 0x6e, 0x67, 0x32, 0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x6d,
   0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61,
-  0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x61, 0x61, 0x2e, 0x73, 0x31, 0x20,
-  0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x62, 0x62,
-  0x2e, 0x73, 0x31, 0x20, 0x3e, 0x20, 0x31, 0x36, 0x29, 0x20, 0x62, 0x72,
-  0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x73, 0x75, 0x62,
-  0x28, 0x61, 0x61, 0x2c, 0x20, 0x62, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x61,
-  0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62,
-  0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61,
-  0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64,
+  0x64, 0x28, 0x73, 0x75, 0x62, 0x28, 0x61, 0x61, 0x2c, 0x20, 0x62, 0x62,
+  0x29, 0x2c, 0x20, 0x63, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28,
+  0x61, 0x64, 0x64, 0x28, 0x61, 0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c,
+  0x20, 0x63, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x2e, 0x73, 0x30, 0x20,
+  0x2b, 0x20, 0x61, 0x61, 0x2e, 0x73, 0x31, 0x20, 0x2b, 0x20, 0x62, 0x62,
+  0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x31, 0x20,
+  0x3e, 0x20, 0x31, 0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b,
   0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
   0x20, 0x2f, 0x2f, 0x20, 0x4e, 0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20,
@@ -283,6 +283,6 @@ unsigned char fixed512_cl[] = {
   0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x67, 0x65, 0x74,
   0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30,
-  0x29, 0x5d, 0x20, 0x3d, 0x20, 0x35, 0x3b, 0x0a, 0x7d, 0x0a
+  0x29, 0x5d, 0x20, 0x3d, 0x20, 0x35, 0x3b, 0x0a, 0x7d
 };
-unsigned int fixed512_cl_len = 3418;
+unsigned int fixed512_cl_len = 3417;

+ 2 - 2
libmandel/src/opencl/fixed64.cl

@@ -26,9 +26,9 @@ __kernel void iterate(__global float* A, const int width,
         long aa = mul(a, a);
         long bb = mul(b, b);
         long ab = mul(a, b);
-        if (aa + bb > (16LL << 48)) break;
         a = aa - bb + ca;
         b = ab + ab + cb;
+        if (aa + bb > (16LL << 48)) break;
         n++;
     }
 
@@ -44,4 +44,4 @@ __kernel void iterate(__global float* A, const int width,
         else
             A[index] = ((float)n);
     }
-}
+}

+ 10 - 10
libmandel/src/opencl/fixed64.h

@@ -68,14 +68,14 @@ unsigned char fixed64_cl[] = {
   0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
   0x6f, 0x6e, 0x67, 0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c,
   0x28, 0x61, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x20, 0x2b,
-  0x20, 0x62, 0x62, 0x20, 0x3e, 0x20, 0x28, 0x31, 0x36, 0x4c, 0x4c, 0x20,
-  0x3c, 0x3c, 0x20, 0x34, 0x38, 0x29, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61,
-  0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61,
-  0x20, 0x3d, 0x20, 0x61, 0x61, 0x20, 0x2d, 0x20, 0x62, 0x62, 0x20, 0x2b,
-  0x20, 0x63, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x61, 0x62,
-  0x20, 0x2b, 0x20, 0x63, 0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x61, 0x20, 0x2d,
+  0x20, 0x62, 0x62, 0x20, 0x2b, 0x20, 0x63, 0x61, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x62,
+  0x20, 0x2b, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x63, 0x62, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
+  0x61, 0x61, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x20, 0x3e, 0x20, 0x28, 0x31,
+  0x36, 0x4c, 0x4c, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x38, 0x29, 0x29, 0x20,
+  0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
   0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x4e, 0x20,
   0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x20, 0x28, 0x6c,
@@ -115,6 +115,6 @@ unsigned char fixed64_cl[] = {
   0x73, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20,
   0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d
 };
-unsigned int fixed64_cl_len = 1402;
+unsigned int fixed64_cl_len = 1401;

+ 7 - 7
libmandel/src/opencl/float.cl

@@ -13,9 +13,9 @@ __kernel void iterate(__global float* A, const int width, float xl, float yt, fl
        float aa = a * a;
        float bb = b * b;
        float ab = a * b;
-       if (aa + bb > 16) break;
        a = aa - bb + ca;
        b = ab + ab + cb;
+       if (aa + bb > 16) break;
        n++;
    }
    if (n >= max - 1) {
@@ -44,15 +44,16 @@ __kernel void iterate_vec4(__global float* A, const int width, float xl, float y
 
    int n = 0;
    if (smooth) {
+       int4 cmp = isless((float4)(16.0f), (float4)(16.0f));
        while (n < max) {
            float4 ab = a * b;
            float4 cmpVal = fma(a, a, b * b);
-           int4 cmp = isless(cmpVal, (float4)(16.0f));
-           if (!any(cmp)) break;
            a = fma(a, a, -fma(b, b, -ca));
            b = fma(2, ab, cb);
            resa = as_float4((as_int4(a) & cmp) | (as_int4(resa) & ~cmp));
            resb = as_float4((as_int4(b) & cmp) | (as_int4(resb) & ~cmp));
+           cmp = isless(cmpVal, (float4)(16.0f));
+           if (!any(cmp)) break;
            count += cmp & (int4)(1);
            n++;
        }
@@ -61,10 +62,10 @@ __kernel void iterate_vec4(__global float* A, const int width, float xl, float y
        while (n < max) {
            float4 ab = a * b;
            float4 cmpVal = fma(a, a, b * b);
-           int4 cmp = isless(cmpVal, (float4)(16.0f));
-           if (!any(cmp)) break;
            a = fma(a, a, -fma(b, b, -ca));
            b = fma(2, ab, cb);
+           int4 cmp = isless(cmpVal, (float4)(16.0f));
+           if (!any(cmp)) break;
            count += cmp & (int4)(1);
            n++;
        }
@@ -75,5 +76,4 @@ __kernel void iterate_vec4(__global float* A, const int width, float xl, float y
       else
           A[index + i] = ((float) count[i]);
    }
-}
-
+}

+ 89 - 85
libmandel/src/opencl/float.h

@@ -44,13 +44,13 @@ unsigned char float_cl[] = {
   0x62, 0x20, 0x3d, 0x20, 0x62, 0x20, 0x2a, 0x20, 0x62, 0x3b, 0x0d, 0x0a,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74,
   0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x20, 0x2a, 0x20, 0x62, 0x3b,
-  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
-  0x28, 0x61, 0x61, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x20, 0x3e, 0x20, 0x31,
-  0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x61,
-  0x20, 0x2d, 0x20, 0x62, 0x62, 0x20, 0x2b, 0x20, 0x63, 0x61, 0x3b, 0x0d,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d, 0x20,
-  0x61, 0x62, 0x20, 0x2b, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x63, 0x62,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d,
+  0x20, 0x61, 0x61, 0x20, 0x2d, 0x20, 0x62, 0x62, 0x20, 0x2b, 0x20, 0x63,
+  0x61, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62,
+  0x20, 0x3d, 0x20, 0x61, 0x62, 0x20, 0x2b, 0x20, 0x61, 0x62, 0x20, 0x2b,
+  0x20, 0x63, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x20, 0x2b, 0x20, 0x62, 0x62,
+  0x20, 0x3e, 0x20, 0x31, 0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b,
   0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b,
   0x2b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x20, 0x20,
   0x20, 0x69, 0x66, 0x20, 0x28, 0x6e, 0x20, 0x3e, 0x3d, 0x20, 0x6d, 0x61,
@@ -127,22 +127,61 @@ unsigned char float_cl[] = {
   0x0d, 0x0a, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6e, 0x20, 0x3d,
   0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
   0x73, 0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65, 0x20,
-  0x28, 0x6e, 0x20, 0x3c, 0x20, 0x6d, 0x61, 0x78, 0x29, 0x20, 0x7b, 0x0d,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x20, 0x61, 0x62, 0x20, 0x3d, 0x20,
-  0x61, 0x20, 0x2a, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74,
-  0x34, 0x20, 0x63, 0x6d, 0x70, 0x56, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x66,
-  0x6d, 0x61, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x2c, 0x20, 0x62, 0x20, 0x2a,
-  0x20, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x34, 0x20, 0x63, 0x6d,
-  0x70, 0x20, 0x3d, 0x20, 0x69, 0x73, 0x6c, 0x65, 0x73, 0x73, 0x28, 0x63,
-  0x6d, 0x70, 0x56, 0x61, 0x6c, 0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61,
-  0x74, 0x34, 0x29, 0x28, 0x31, 0x36, 0x2e, 0x30, 0x66, 0x29, 0x29, 0x3b,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x34, 0x20, 0x63,
+  0x6d, 0x70, 0x20, 0x3d, 0x20, 0x69, 0x73, 0x6c, 0x65, 0x73, 0x73, 0x28,
+  0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x29, 0x28, 0x31, 0x36, 0x2e,
+  0x30, 0x66, 0x29, 0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34,
+  0x29, 0x28, 0x31, 0x36, 0x2e, 0x30, 0x66, 0x29, 0x29, 0x3b, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65,
+  0x20, 0x28, 0x6e, 0x20, 0x3c, 0x20, 0x6d, 0x61, 0x78, 0x29, 0x20, 0x7b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x20, 0x61, 0x62, 0x20, 0x3d,
+  0x20, 0x61, 0x20, 0x2a, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x34, 0x20, 0x63, 0x6d, 0x70, 0x56, 0x61, 0x6c, 0x20, 0x3d, 0x20,
+  0x66, 0x6d, 0x61, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x2c, 0x20, 0x62, 0x20,
+  0x2a, 0x20, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x66, 0x6d,
+  0x61, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x2c, 0x20, 0x2d, 0x66, 0x6d, 0x61,
+  0x28, 0x62, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x2d, 0x63, 0x61, 0x29, 0x29,
+  0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x66, 0x6d, 0x61, 0x28, 0x32, 0x2c,
+  0x20, 0x61, 0x62, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b, 0x0d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
+  0x73, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x5f, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x34, 0x28, 0x28, 0x61, 0x73, 0x5f, 0x69, 0x6e, 0x74, 0x34, 0x28,
+  0x61, 0x29, 0x20, 0x26, 0x20, 0x63, 0x6d, 0x70, 0x29, 0x20, 0x7c, 0x20,
+  0x28, 0x61, 0x73, 0x5f, 0x69, 0x6e, 0x74, 0x34, 0x28, 0x72, 0x65, 0x73,
+  0x61, 0x29, 0x20, 0x26, 0x20, 0x7e, 0x63, 0x6d, 0x70, 0x29, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x72, 0x65, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x5f, 0x66,
+  0x6c, 0x6f, 0x61, 0x74, 0x34, 0x28, 0x28, 0x61, 0x73, 0x5f, 0x69, 0x6e,
+  0x74, 0x34, 0x28, 0x62, 0x29, 0x20, 0x26, 0x20, 0x63, 0x6d, 0x70, 0x29,
+  0x20, 0x7c, 0x20, 0x28, 0x61, 0x73, 0x5f, 0x69, 0x6e, 0x74, 0x34, 0x28,
+  0x72, 0x65, 0x73, 0x62, 0x29, 0x20, 0x26, 0x20, 0x7e, 0x63, 0x6d, 0x70,
+  0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x63, 0x6d, 0x70, 0x20, 0x3d, 0x20, 0x69, 0x73,
+  0x6c, 0x65, 0x73, 0x73, 0x28, 0x63, 0x6d, 0x70, 0x56, 0x61, 0x6c, 0x2c,
+  0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x29, 0x28, 0x31, 0x36,
+  0x2e, 0x30, 0x66, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21,
+  0x61, 0x6e, 0x79, 0x28, 0x63, 0x6d, 0x70, 0x29, 0x29, 0x20, 0x62, 0x72,
+  0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x20, 0x2b,
+  0x3d, 0x20, 0x63, 0x6d, 0x70, 0x20, 0x26, 0x20, 0x28, 0x69, 0x6e, 0x74,
+  0x34, 0x29, 0x28, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x20,
+  0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73,
+  0x65, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x77, 0x68, 0x69, 0x6c, 0x65, 0x20, 0x28, 0x6e, 0x20, 0x3c, 0x20, 0x6d,
+  0x61, 0x78, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34,
+  0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x20, 0x2a, 0x20, 0x62, 0x3b,
   0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x61, 0x6e, 0x79, 0x28, 0x63, 0x6d,
-  0x70, 0x29, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x20, 0x63, 0x6d, 0x70, 0x56,
+  0x61, 0x6c, 0x20, 0x3d, 0x20, 0x66, 0x6d, 0x61, 0x28, 0x61, 0x2c, 0x20,
+  0x61, 0x2c, 0x20, 0x62, 0x20, 0x2a, 0x20, 0x62, 0x29, 0x3b, 0x0d, 0x0a,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61,
   0x20, 0x3d, 0x20, 0x66, 0x6d, 0x61, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x2c,
   0x20, 0x2d, 0x66, 0x6d, 0x61, 0x28, 0x62, 0x2c, 0x20, 0x62, 0x2c, 0x20,
@@ -150,34 +189,6 @@ unsigned char float_cl[] = {
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x66,
   0x6d, 0x61, 0x28, 0x32, 0x2c, 0x20, 0x61, 0x62, 0x2c, 0x20, 0x63, 0x62,
   0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x73,
-  0x5f, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x28, 0x28, 0x61, 0x73, 0x5f,
-  0x69, 0x6e, 0x74, 0x34, 0x28, 0x61, 0x29, 0x20, 0x26, 0x20, 0x63, 0x6d,
-  0x70, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x61, 0x73, 0x5f, 0x69, 0x6e, 0x74,
-  0x34, 0x28, 0x72, 0x65, 0x73, 0x61, 0x29, 0x20, 0x26, 0x20, 0x7e, 0x63,
-  0x6d, 0x70, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x62, 0x20, 0x3d,
-  0x20, 0x61, 0x73, 0x5f, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x28, 0x28,
-  0x61, 0x73, 0x5f, 0x69, 0x6e, 0x74, 0x34, 0x28, 0x62, 0x29, 0x20, 0x26,
-  0x20, 0x63, 0x6d, 0x70, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x61, 0x73, 0x5f,
-  0x69, 0x6e, 0x74, 0x34, 0x28, 0x72, 0x65, 0x73, 0x62, 0x29, 0x20, 0x26,
-  0x20, 0x7e, 0x63, 0x6d, 0x70, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x75,
-  0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x6d, 0x70, 0x20, 0x26, 0x20,
-  0x28, 0x69, 0x6e, 0x74, 0x34, 0x29, 0x28, 0x31, 0x29, 0x3b, 0x0d, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e,
-  0x2b, 0x2b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x7d, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65, 0x20, 0x28, 0x6e,
-  0x20, 0x3c, 0x20, 0x6d, 0x61, 0x78, 0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c,
-  0x6f, 0x61, 0x74, 0x34, 0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x20,
-  0x2a, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34, 0x20,
-  0x63, 0x6d, 0x70, 0x56, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x66, 0x6d, 0x61,
-  0x28, 0x61, 0x2c, 0x20, 0x61, 0x2c, 0x20, 0x62, 0x20, 0x2a, 0x20, 0x62,
-  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x34, 0x20, 0x63, 0x6d, 0x70, 0x20,
   0x3d, 0x20, 0x69, 0x73, 0x6c, 0x65, 0x73, 0x73, 0x28, 0x63, 0x6d, 0x70,
   0x56, 0x61, 0x6c, 0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x34,
@@ -185,40 +196,33 @@ unsigned char float_cl[] = {
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
   0x66, 0x20, 0x28, 0x21, 0x61, 0x6e, 0x79, 0x28, 0x63, 0x6d, 0x70, 0x29,
   0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d,
-  0x20, 0x66, 0x6d, 0x61, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x2c, 0x20, 0x2d,
-  0x66, 0x6d, 0x61, 0x28, 0x62, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x2d, 0x63,
-  0x61, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x66, 0x6d, 0x61,
-  0x28, 0x32, 0x2c, 0x20, 0x61, 0x62, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x75,
+  0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x6d, 0x70, 0x20, 0x26, 0x20,
+  0x28, 0x69, 0x6e, 0x74, 0x34, 0x29, 0x28, 0x31, 0x29, 0x3b, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e,
+  0x2b, 0x2b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x7d, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x20, 0x20,
+  0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x20,
+  0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x34, 0x20, 0x26,
+  0x26, 0x20, 0x69, 0x20, 0x2b, 0x20, 0x78, 0x20, 0x3c, 0x20, 0x77, 0x69,
+  0x64, 0x74, 0x68, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x20, 0x7b, 0x0d,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
+  0x73, 0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x29,
   0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x6d,
-  0x70, 0x20, 0x26, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x34, 0x29, 0x28, 0x31,
-  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
-  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x69, 0x6e,
-  0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c,
-  0x20, 0x34, 0x20, 0x26, 0x26, 0x20, 0x69, 0x20, 0x2b, 0x20, 0x78, 0x20,
-  0x3c, 0x20, 0x77, 0x69, 0x64, 0x74, 0x68, 0x3b, 0x20, 0x69, 0x2b, 0x2b,
-  0x29, 0x20, 0x7b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x69, 0x66, 0x20, 0x28, 0x73, 0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x20, 0x21,
-  0x3d, 0x20, 0x30, 0x29, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78,
-  0x20, 0x2b, 0x20, 0x69, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c,
-  0x6f, 0x61, 0x74, 0x29, 0x20, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x5b, 0x69,
-  0x5d, 0x29, 0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67,
-  0x28, 0x6c, 0x6f, 0x67, 0x28, 0x66, 0x6d, 0x61, 0x28, 0x72, 0x65, 0x73,
-  0x61, 0x5b, 0x69, 0x5d, 0x2c, 0x20, 0x72, 0x65, 0x73, 0x61, 0x5b, 0x69,
-  0x5d, 0x2c, 0x20, 0x72, 0x65, 0x73, 0x62, 0x5b, 0x69, 0x5d, 0x20, 0x2a,
-  0x20, 0x72, 0x65, 0x73, 0x62, 0x5b, 0x69, 0x5d, 0x29, 0x29, 0x20, 0x2f,
-  0x20, 0x32, 0x29, 0x20, 0x2f, 0x20, 0x6c, 0x6f, 0x67, 0x28, 0x32, 0x2e,
-  0x30, 0x66, 0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20,
-  0x2b, 0x20, 0x69, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f,
-  0x61, 0x74, 0x29, 0x20, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x5b, 0x69, 0x5d,
-  0x29, 0x3b, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d,
-  0x0a, 0x0d, 0x0a
+  0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x2b, 0x20, 0x69,
+  0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29,
+  0x20, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x5b, 0x69, 0x5d, 0x29, 0x20, 0x2b,
+  0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x28, 0x6c, 0x6f, 0x67,
+  0x28, 0x66, 0x6d, 0x61, 0x28, 0x72, 0x65, 0x73, 0x61, 0x5b, 0x69, 0x5d,
+  0x2c, 0x20, 0x72, 0x65, 0x73, 0x61, 0x5b, 0x69, 0x5d, 0x2c, 0x20, 0x72,
+  0x65, 0x73, 0x62, 0x5b, 0x69, 0x5d, 0x20, 0x2a, 0x20, 0x72, 0x65, 0x73,
+  0x62, 0x5b, 0x69, 0x5d, 0x29, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x29, 0x20,
+  0x2f, 0x20, 0x6c, 0x6f, 0x67, 0x28, 0x32, 0x2e, 0x30, 0x66, 0x29, 0x3b,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65,
+  0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x2b, 0x20, 0x69, 0x5d,
+  0x20, 0x3d, 0x20, 0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x20,
+  0x63, 0x6f, 0x75, 0x6e, 0x74, 0x5b, 0x69, 0x5d, 0x29, 0x3b, 0x0d, 0x0a,
+  0x20, 0x20, 0x20, 0x7d, 0x0d, 0x0a, 0x7d
 };
-unsigned int float_cl_len = 2643;
+unsigned int float_cl_len = 2695;

+ 1 - 1
libmandel/src/opencl/quaddouble.cl

@@ -231,10 +231,10 @@ __kernel void iterate(__global float* A, const int width,
         double4 aa = sq(a);
         double4 bb = sq(b);
         double4 ab = mul(a, b);
-        if (aa.s0 + bb.s0 > 16) break;
         double4 minusbb = (double4)(-bb.s0, -bb.s1, -bb.s2, -bb.s3);
         a = add(add(aa, minusbb), ca);
         b = add(add(ab, ab), cb);
+        if (aa.s0 + bb.s0 > 16) break;
         n++;
     }
 

+ 15 - 15
libmandel/src/opencl/quaddouble.h

@@ -490,21 +490,21 @@ unsigned char quaddouble_cl[] = {
   0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f,
   0x75, 0x62, 0x6c, 0x65, 0x34, 0x20, 0x61, 0x62, 0x20, 0x3d, 0x20, 0x6d,
   0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61,
-  0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x20,
-  0x3e, 0x20, 0x31, 0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75,
-  0x62, 0x6c, 0x65, 0x34, 0x20, 0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62,
-  0x20, 0x3d, 0x20, 0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x34, 0x29,
-  0x28, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x2d, 0x62, 0x62,
-  0x2e, 0x73, 0x31, 0x2c, 0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x32, 0x2c,
-  0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x33, 0x29, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64,
-  0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x61, 0x2c, 0x20, 0x6d, 0x69,
-  0x6e, 0x75, 0x73, 0x62, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x61, 0x29, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d,
-  0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x62, 0x2c,
-  0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65,
+  0x34, 0x20, 0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x20, 0x3d, 0x20,
+  0x28, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x34, 0x29, 0x28, 0x2d, 0x62,
+  0x62, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x31,
+  0x2c, 0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x32, 0x2c, 0x20, 0x2d, 0x62,
+  0x62, 0x2e, 0x73, 0x33, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61,
+  0x64, 0x64, 0x28, 0x61, 0x61, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x75, 0x73,
+  0x62, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x61, 0x64,
+  0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x62, 0x2c, 0x20, 0x61, 0x62,
+  0x29, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x61, 0x2e, 0x73,
+  0x30, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x3e, 0x20,
+  0x31, 0x36, 0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b, 0x3b, 0x0a,
   0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f,
   0x2f, 0x20, 0x4e, 0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f,

+ 1 - 1
mandelvid/src/main.cpp

@@ -19,7 +19,7 @@ int main() {
         mnd::Real("1.2246019034401093377903721086780361028058704962292211685926779200766324399350798858672587301860274703389823933260119617558370004128301410779021141722617e-10")
     };*/
     evi.end = mnd::MandelViewport {
-        mnd::Real("-2.0"),
+        mnd::Real("-1.0"),
         mnd::Real("-1.0"),
         mnd::Real("1.0e-3"),
         mnd::Real("1.0e-3")