Bladeren bron

doublefloat

Nicolas Winkler 5 jaren geleden
bovenliggende
commit
672cf3572e

+ 13 - 0
libmandel/include/ClGenerators.h

@@ -15,6 +15,7 @@ namespace mnd
 {
     class ClGenerator;
     class ClGeneratorFloat;
+    class ClGeneratorDoubleFloat;
     class ClGeneratorDouble;
     class ClGeneratorDoubleDouble;
     class ClGeneratorQuadDouble;
@@ -50,6 +51,18 @@ protected:
 };
 
 
+class mnd::ClGeneratorDoubleFloat : public ClGenerator
+{
+public:
+    ClGeneratorDoubleFloat(cl::Device device);
+    virtual ~ClGeneratorDoubleFloat(void) = default;
+
+    virtual void generate(const MandelInfo& info, float* data);
+protected:
+    virtual std::string getKernelCode(bool smooth) const;
+};
+
+
 class mnd::ClGeneratorDouble : public ClGenerator
 {
 public:

+ 1 - 0
libmandel/include/Mandel.h

@@ -33,6 +33,7 @@ enum class mnd::GeneratorType
     FLOAT_AVX,
     FLOAT_AVX512,
     FLOAT_NEON,
+    DOUBLE_FLOAT,
     DOUBLE,
     DOUBLE_SSE2,
     DOUBLE_AVX,

+ 67 - 0
libmandel/src/ClGenerators.cpp

@@ -1,16 +1,19 @@
 #include "ClGenerators.h"
 #include "doubledouble.h"
+#include "doublefloat.h"
 
 #ifdef WITH_OPENCL
 
 #include <iostream>
 #include <iterator>
+#include <utility>
 
 
 using namespace cl;
 
 using mnd::ClGenerator;
 using mnd::ClGeneratorFloat;
+using mnd::ClGeneratorDoubleFloat;
 using mnd::ClGeneratorDouble;
 using mnd::ClGeneratorDoubleDouble;
 using mnd::ClGeneratorQuadDouble;
@@ -167,6 +170,70 @@ std::string ClGeneratorFloat::getKernelCode(bool smooth) const
 }
 
 
+ClGeneratorDoubleFloat::ClGeneratorDoubleFloat(cl::Device device) :
+    ClGenerator{ device }
+{
+    context = Context{ device };
+    Program::Sources sources;
+
+    std::string kcode = this->getKernelCode(false);
+
+    sources.push_back({ kcode.c_str(), kcode.length() });
+
+    program = Program{ context, sources };
+    if (program.build({ device }) != CL_SUCCESS) {
+        throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
+    }
+
+    queue = CommandQueue(context, device);
+}
+
+
+void ClGeneratorDoubleFloat::generate(const mnd::MandelInfo& info, float* data)
+{
+    ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
+
+    auto splitDouble = [] (double x) {
+        float hi = float(x);
+        float lo = float(x - double(hi));
+        return std::pair{ hi, lo };
+    };
+
+    Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
+    double pixelScaleX = double(info.view.width / info.bWidth);
+    double pixelScaleY = double(info.view.height / info.bHeight);
+
+    auto[x1, x2] = splitDouble(double(info.view.x));
+    auto[y1, y2] = splitDouble(double(info.view.y));
+    auto[w1, w2] = splitDouble(pixelScaleX);
+    auto[h1, h2] = splitDouble(pixelScaleY);
+
+
+    Kernel iterate = Kernel(program, "iterate");
+    iterate.setArg(0, buffer_A);
+    iterate.setArg(1, int(info.bWidth));
+    iterate.setArg(2, x1);
+    iterate.setArg(3, x2);
+    iterate.setArg(4, y1);
+    iterate.setArg(5, y2);
+    iterate.setArg(6, w1);
+    iterate.setArg(7, w2);
+    iterate.setArg(8, h1);
+    iterate.setArg(9, h2);
+    iterate.setArg(10, int(info.maxIter));
+    iterate.setArg(11, int(info.smooth ? 1 : 0));
+
+    cl_int result = queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
+    queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
+}
+
+
+std::string ClGeneratorDoubleFloat::getKernelCode(bool smooth) const
+{
+    return (char*) doublefloat_cl;
+}
+
+
 ClGeneratorDouble::ClGeneratorDouble(cl::Device device) :
     ClGenerator{ device }
 {

+ 47 - 0
libmandel/src/CpuGenerators.cpp

@@ -94,6 +94,53 @@ void CpuGenerator<T, mnd::NONE, parallel>::generate(const mnd::MandelInfo& info,
     }
 }
 
+
+/*
+template<bool parallel>
+void CpuGenerator<double, mnd::NONE, parallel>::generate(const mnd::MandelInfo& info, float* data)
+{
+    const MandelViewport& view = info.view;
+
+    T viewx = mnd::convert<T>(view.x);
+    T viewy = mnd::convert<T>(view.y);
+    T wpp = mnd::convert<T>(view.width / info.bWidth);
+    T hpp = mnd::convert<T>(view.height / info.bHeight);
+
+    if constexpr (parallel)
+        omp_set_num_threads(omp_get_num_procs());
+#pragma omp parallel for schedule(static, 1) if (parallel)
+    for (long j = 0; j < info.bHeight; j++) {
+        T y = viewy + T(double(j)) * hpp;
+        long i = 0;
+        for (i; i < info.bWidth; i++) {
+            T x = viewx + T(double(i)) * wpp;
+
+            T a = x;
+            T b = y;
+
+            int k = 0;
+            for (k = 0; k < info.maxIter; k++) {
+                T aa = a * a;
+                T bb = b * b;
+                T ab = a * b;
+                a = aa - bb + x;
+                b = ab + ab + y;
+                if (aa + bb > T(16.0)) {
+                    break;
+                }
+            }
+            if (info.smooth) {
+                if (k >= info.maxIter)
+                    data[i + j * info.bWidth] = float(info.maxIter);
+                else
+                    data[i + j * info.bWidth] = ((float) k) + 1 - ::logf(::logf(mnd::convert<float>(a * a + b * b)) / 2) / ::logf(2.0f);
+            }
+            else
+                data[i + j * info.bWidth] = k;
+        }
+    }
+}*/
+
 /*
 #if defined(WITH_BOOST) || 1
 template<bool parallel>

+ 2 - 0
libmandel/src/Mandel.cpp

@@ -29,6 +29,7 @@ static const std::map<mnd::GeneratorType, std::string> typeNames =
     { mnd::GeneratorType::FLOAT_AVX, "float AVX" },
     { mnd::GeneratorType::FLOAT_AVX512, "float AVX512" },
     { mnd::GeneratorType::FLOAT_NEON, "float NEON" },
+    { mnd::GeneratorType::DOUBLE_FLOAT, "double float" },
     { mnd::GeneratorType::DOUBLE, "double" },
     { mnd::GeneratorType::DOUBLE_SSE2, "double SSE2" },
     { mnd::GeneratorType::DOUBLE_AVX, "double AVX" },
@@ -243,6 +244,7 @@ std::vector<MandelDevice> MandelContext::createDevices(void)
             //printf("    using opencl device: %s\n", md.name.c_str());
             try {
                 md.generators.insert({ GeneratorType::FLOAT, std::make_unique<ClGeneratorFloat>(device) });
+                md.generators.insert({ GeneratorType::DOUBLE_FLOAT, std::make_unique<ClGeneratorDoubleFloat>(device) });
             }
             catch (const std::string& err) {
                 printf("err: %s", err.c_str());

+ 95 - 0
libmandel/src/doublefloat.cl

@@ -0,0 +1,95 @@
+// citation: Guillaume da Graçca, David Defour. Implementation of float-float operators on graphics hardware.
+// Real Numbers and Computers 7, Jul 2006, Nancy, France. pp.23-32. ffhal-00021443
+// https://hal.archives-ouvertes.fr/hal-00021443/document
+
+float2 twoSum(float a, float b) {
+    float s = a + b;
+    float v = s - a;
+    float r = (a - (s - v)) + (b - v);
+    return (float2)(s, r);
+}
+
+float2 split(float a) {
+    float c = (65536 + 1) * a;
+    float abig = c - a;
+    float ahi = c - abig;
+    float alo = a - ahi;
+    return (float2)(ahi, alo);
+}
+
+float2 twoProd(float a, float b) {
+    float x = a * b;
+    float2 aex = split(a);
+    float2 bex = split(b);
+    float errx = x - (aex.s0 * bex.s0);
+    float erry = errx - (aex.s1 * bex.s0);
+    float errz = erry - (aex.s0 * bex.s1);
+    float y = (aex.s1 * bex.s1) - errz;
+    return (float2)(x, y);
+}
+
+float2 add(float2 a, float2 b) {
+    float r = a.s0 + b.s0;
+    float s;
+    if (fabs(a.s0) >= fabs(b.s0)) {
+        s = (((a.s0 - r) + b.s0) + b.s1) + a.s1;
+    }
+    else {
+        s = (((b.s0 - r) + a.s0) + a.s1) + b.s1;
+    }
+    return twoSum(r, s);
+}
+
+float2 mul(float2 a, float2 b) {
+    float2 t = twoProd(a.s0, b.s0);
+    float t3 = ((a.s0 * b.s1) + (a.s1 * b.s0)) + t.s1;
+    return twoSum(t.s0, t.s1);
+}
+
+float2 mulFloat(float2 a, float b) {
+    float2 t = twoProd(a.s0, b);
+    float t3 = (a.s1 * b) + t.s1;
+    return twoSum(t.s0, t.s1);
+}
+
+__kernel void iterate(__global float* A, const int width,
+                      float x1, float x2, float y1, float y2,
+                      float pw1, float pw2, float ph1, float ph2, int max, int smooth) {
+    int index = get_global_id(0);
+    int px = index % width;
+    int py = index / width;
+
+    float2 xl = (float2)(x1, x2);
+    float2 yt = (float2)(y1, y2);
+    float2 pixelScaleX = (float2)(pw1, pw2);
+    float2 pixelScaleY = (float2)(ph1, ph2);
+
+    float2 a = add(mulFloat(pixelScaleX, (float) px), xl); // pixelScaleX * px + xl
+    float2 b = add(mulFloat(pixelScaleY, (float) py), yt); // pixelScaleY * py + yt
+    float2 ca = a;
+    float2 cb = b;
+
+    int n = 0;
+    while (n < max - 1) {
+        float2 aa = mul(a, a);
+        float2 bb = mul(b, b);
+        float2 ab = mul(a, b);
+        if (aa.s0 + aa.s1 + bb.s0 + bb.s1 > 16) break;
+        float2 minusbb = (float2)(-bb.s0, -bb.s1);
+        a = add(add(aa, minusbb), ca);
+        b = add(add(ab, ab), cb);
+        n++;
+    }
+
+    // N + 1 - log (log  |Z(N)|) / log 2
+    if (n >= max - 1)
+        A[index] = max;
+    else {
+        if (smooth != 0)
+            A[index] = ((float) n) + 1 - log(log(a.s0 * a.s0 + b.s0 * b.s0) / 2) / log(2.0f);
+        else
+            A[index] = ((float)n);
+    }
+    //               A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);
+    //           A[get_global_id(0)] = 5;
+}

+ 238 - 0
libmandel/src/doublefloat.h

@@ -0,0 +1,238 @@
+unsigned char doublefloat_cl[] = {
+  0x2f, 0x2f, 0x20, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x3a,
+  0x20, 0x47, 0x75, 0x69, 0x6c, 0x6c, 0x61, 0x75, 0x6d, 0x65, 0x20, 0x64,
+  0x61, 0x20, 0x47, 0x72, 0x61, 0xc3, 0xa7, 0x63, 0x61, 0x2c, 0x20, 0x44,
+  0x61, 0x76, 0x69, 0x64, 0x20, 0x44, 0x65, 0x66, 0x6f, 0x75, 0x72, 0x2e,
+  0x20, 0x49, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74,
+  0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74,
+  0x2d, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x6f, 0x70, 0x65, 0x72, 0x61,
+  0x74, 0x6f, 0x72, 0x73, 0x20, 0x6f, 0x6e, 0x20, 0x67, 0x72, 0x61, 0x70,
+  0x68, 0x69, 0x63, 0x73, 0x20, 0x68, 0x61, 0x72, 0x64, 0x77, 0x61, 0x72,
+  0x65, 0x2e, 0x0a, 0x2f, 0x2f, 0x20, 0x52, 0x65, 0x61, 0x6c, 0x20, 0x4e,
+  0x75, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x43,
+  0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x72, 0x73, 0x20, 0x37, 0x2c, 0x20,
+  0x4a, 0x75, 0x6c, 0x20, 0x32, 0x30, 0x30, 0x36, 0x2c, 0x20, 0x4e, 0x61,
+  0x6e, 0x63, 0x79, 0x2c, 0x20, 0x46, 0x72, 0x61, 0x6e, 0x63, 0x65, 0x2e,
+  0x20, 0x70, 0x70, 0x2e, 0x32, 0x33, 0x2d, 0x33, 0x32, 0x2e, 0x20, 0x66,
+  0x66, 0x68, 0x61, 0x6c, 0x2d, 0x30, 0x30, 0x30, 0x32, 0x31, 0x34, 0x34,
+  0x33, 0x0a, 0x2f, 0x2f, 0x20, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f,
+  0x2f, 0x68, 0x61, 0x6c, 0x2e, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65,
+  0x73, 0x2d, 0x6f, 0x75, 0x76, 0x65, 0x72, 0x74, 0x65, 0x73, 0x2e, 0x66,
+  0x72, 0x2f, 0x68, 0x61, 0x6c, 0x2d, 0x30, 0x30, 0x30, 0x32, 0x31, 0x34,
+  0x34, 0x33, 0x2f, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x0a,
+  0x0a, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x74, 0x77, 0x6f, 0x53,
+  0x75, 0x6d, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x2c, 0x20,
+  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x73, 0x20, 0x3d,
+  0x20, 0x61, 0x20, 0x2b, 0x20, 0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x76, 0x20, 0x3d, 0x20, 0x73, 0x20,
+  0x2d, 0x20, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f,
+  0x61, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x61, 0x20, 0x2d, 0x20,
+  0x28, 0x73, 0x20, 0x2d, 0x20, 0x76, 0x29, 0x29, 0x20, 0x2b, 0x20, 0x28,
+  0x62, 0x20, 0x2d, 0x20, 0x76, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x32, 0x29, 0x28, 0x73, 0x2c, 0x20, 0x72, 0x29, 0x3b, 0x0a, 0x7d,
+  0x0a, 0x0a, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x73, 0x70, 0x6c,
+  0x69, 0x74, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x29, 0x20,
+  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20,
+  0x63, 0x20, 0x3d, 0x20, 0x28, 0x36, 0x35, 0x35, 0x33, 0x36, 0x20, 0x2b,
+  0x20, 0x31, 0x29, 0x20, 0x2a, 0x20, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x62, 0x69, 0x67, 0x20,
+  0x3d, 0x20, 0x63, 0x20, 0x2d, 0x20, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x68, 0x69, 0x20, 0x3d,
+  0x20, 0x63, 0x20, 0x2d, 0x20, 0x61, 0x62, 0x69, 0x67, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x6c, 0x6f,
+  0x20, 0x3d, 0x20, 0x61, 0x20, 0x2d, 0x20, 0x61, 0x68, 0x69, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28,
+  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x29, 0x28, 0x61, 0x68, 0x69, 0x2c,
+  0x20, 0x61, 0x6c, 0x6f, 0x29, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x32, 0x20, 0x74, 0x77, 0x6f, 0x50, 0x72, 0x6f, 0x64,
+  0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x2c, 0x20, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x78, 0x20, 0x3d, 0x20, 0x61,
+  0x20, 0x2a, 0x20, 0x62, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x32, 0x20, 0x61, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x73,
+  0x70, 0x6c, 0x69, 0x74, 0x28, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x62, 0x65, 0x78, 0x20,
+  0x3d, 0x20, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x62, 0x29, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x65, 0x72,
+  0x72, 0x78, 0x20, 0x3d, 0x20, 0x78, 0x20, 0x2d, 0x20, 0x28, 0x61, 0x65,
+  0x78, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x62, 0x65, 0x78, 0x2e, 0x73,
+  0x30, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x20, 0x65, 0x72, 0x72, 0x79, 0x20, 0x3d, 0x20, 0x65, 0x72, 0x72,
+  0x78, 0x20, 0x2d, 0x20, 0x28, 0x61, 0x65, 0x78, 0x2e, 0x73, 0x31, 0x20,
+  0x2a, 0x20, 0x62, 0x65, 0x78, 0x2e, 0x73, 0x30, 0x29, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x65, 0x72, 0x72,
+  0x7a, 0x20, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x79, 0x20, 0x2d, 0x20, 0x28,
+  0x61, 0x65, 0x78, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x62, 0x65, 0x78,
+  0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x20, 0x79, 0x20, 0x3d, 0x20, 0x28, 0x61, 0x65, 0x78,
+  0x2e, 0x73, 0x31, 0x20, 0x2a, 0x20, 0x62, 0x65, 0x78, 0x2e, 0x73, 0x31,
+  0x29, 0x20, 0x2d, 0x20, 0x65, 0x72, 0x72, 0x7a, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x32, 0x29, 0x28, 0x78, 0x2c, 0x20, 0x79, 0x29, 0x3b,
+  0x0a, 0x7d, 0x0a, 0x0a, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x61,
+  0x64, 0x64, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x61, 0x2c,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x62, 0x29, 0x20, 0x7b,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x72,
+  0x20, 0x3d, 0x20, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x62, 0x2e,
+  0x73, 0x30, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x20, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
+  0x28, 0x66, 0x61, 0x62, 0x73, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x29, 0x20,
+  0x3e, 0x3d, 0x20, 0x66, 0x61, 0x62, 0x73, 0x28, 0x62, 0x2e, 0x73, 0x30,
+  0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x73, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x28, 0x61, 0x2e, 0x73, 0x30,
+  0x20, 0x2d, 0x20, 0x72, 0x29, 0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73, 0x30,
+  0x29, 0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73, 0x31, 0x29, 0x20, 0x2b, 0x20,
+  0x61, 0x2e, 0x73, 0x31, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x28,
+  0x28, 0x28, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x2d, 0x20, 0x72, 0x29, 0x20,
+  0x2b, 0x20, 0x61, 0x2e, 0x73, 0x30, 0x29, 0x20, 0x2b, 0x20, 0x61, 0x2e,
+  0x73, 0x31, 0x29, 0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73, 0x31, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
+  0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28,
+  0x72, 0x2c, 0x20, 0x73, 0x29, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x32, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x66, 0x6c, 0x6f,
+  0x61, 0x74, 0x32, 0x20, 0x61, 0x2c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74,
+  0x32, 0x20, 0x62, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66,
+  0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x77,
+  0x6f, 0x50, 0x72, 0x6f, 0x64, 0x28, 0x61, 0x2e, 0x73, 0x30, 0x2c, 0x20,
+  0x62, 0x2e, 0x73, 0x30, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66,
+  0x6c, 0x6f, 0x61, 0x74, 0x20, 0x74, 0x33, 0x20, 0x3d, 0x20, 0x28, 0x28,
+  0x61, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x62, 0x2e, 0x73, 0x31, 0x29,
+  0x20, 0x2b, 0x20, 0x28, 0x61, 0x2e, 0x73, 0x31, 0x20, 0x2a, 0x20, 0x62,
+  0x2e, 0x73, 0x30, 0x29, 0x29, 0x20, 0x2b, 0x20, 0x74, 0x2e, 0x73, 0x31,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
+  0x20, 0x74, 0x77, 0x6f, 0x53, 0x75, 0x6d, 0x28, 0x74, 0x2e, 0x73, 0x30,
+  0x2c, 0x20, 0x74, 0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a,
+  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x6d, 0x75, 0x6c, 0x46, 0x6c,
+  0x6f, 0x61, 0x74, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x61,
+  0x2c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x62, 0x29, 0x20, 0x7b,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20,
+  0x74, 0x20, 0x3d, 0x20, 0x74, 0x77, 0x6f, 0x50, 0x72, 0x6f, 0x64, 0x28,
+  0x61, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x74, 0x33, 0x20, 0x3d,
+  0x20, 0x28, 0x61, 0x2e, 0x73, 0x31, 0x20, 0x2a, 0x20, 0x62, 0x29, 0x20,
+  0x2b, 0x20, 0x74, 0x2e, 0x73, 0x31, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x77, 0x6f, 0x53, 0x75,
+  0x6d, 0x28, 0x74, 0x2e, 0x73, 0x30, 0x2c, 0x20, 0x74, 0x2e, 0x73, 0x31,
+  0x29, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x5f, 0x5f, 0x6b, 0x65, 0x72, 0x6e,
+  0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x69, 0x74, 0x65, 0x72,
+  0x61, 0x74, 0x65, 0x28, 0x5f, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x2a, 0x20, 0x41, 0x2c, 0x20, 0x63,
+  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x77, 0x69, 0x64,
+  0x74, 0x68, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x78, 0x31, 0x2c, 0x20,
+  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x78, 0x32, 0x2c, 0x20, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x20, 0x79, 0x31, 0x2c, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x20, 0x79, 0x32, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x70, 0x77,
+  0x31, 0x2c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x70, 0x77, 0x32,
+  0x2c, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x70, 0x68, 0x31, 0x2c,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x70, 0x68, 0x32, 0x2c, 0x20,
+  0x69, 0x6e, 0x74, 0x20, 0x6d, 0x61, 0x78, 0x2c, 0x20, 0x69, 0x6e, 0x74,
+  0x20, 0x73, 0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x29, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78,
+  0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61,
+  0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x78, 0x20, 0x3d, 0x20, 0x69, 0x6e,
+  0x64, 0x65, 0x78, 0x20, 0x25, 0x20, 0x77, 0x69, 0x64, 0x74, 0x68, 0x3b,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x79, 0x20,
+  0x3d, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x2f, 0x20, 0x77, 0x69,
+  0x64, 0x74, 0x68, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c,
+  0x6f, 0x61, 0x74, 0x32, 0x20, 0x78, 0x6c, 0x20, 0x3d, 0x20, 0x28, 0x66,
+  0x6c, 0x6f, 0x61, 0x74, 0x32, 0x29, 0x28, 0x78, 0x31, 0x2c, 0x20, 0x78,
+  0x32, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x32, 0x20, 0x79, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f,
+  0x61, 0x74, 0x32, 0x29, 0x28, 0x79, 0x31, 0x2c, 0x20, 0x79, 0x32, 0x29,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32,
+  0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58,
+  0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x29, 0x28,
+  0x70, 0x77, 0x31, 0x2c, 0x20, 0x70, 0x77, 0x32, 0x29, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x70, 0x69,
+  0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59, 0x20, 0x3d, 0x20,
+  0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x29, 0x28, 0x70, 0x68, 0x31,
+  0x2c, 0x20, 0x70, 0x68, 0x32, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x61, 0x20, 0x3d, 0x20,
+  0x61, 0x64, 0x64, 0x28, 0x6d, 0x75, 0x6c, 0x46, 0x6c, 0x6f, 0x61, 0x74,
+  0x28, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58,
+  0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x20, 0x70, 0x78,
+  0x29, 0x2c, 0x20, 0x78, 0x6c, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x70,
+  0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x58, 0x20, 0x2a,
+  0x20, 0x70, 0x78, 0x20, 0x2b, 0x20, 0x78, 0x6c, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x62, 0x20, 0x3d, 0x20,
+  0x61, 0x64, 0x64, 0x28, 0x6d, 0x75, 0x6c, 0x46, 0x6c, 0x6f, 0x61, 0x74,
+  0x28, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59,
+  0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x20, 0x70, 0x79,
+  0x29, 0x2c, 0x20, 0x79, 0x74, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x70,
+  0x69, 0x78, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x59, 0x20, 0x2a,
+  0x20, 0x70, 0x79, 0x20, 0x2b, 0x20, 0x79, 0x74, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x63, 0x61, 0x20, 0x3d,
+  0x20, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x32, 0x20, 0x63, 0x62, 0x20, 0x3d, 0x20, 0x62, 0x3b, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6e, 0x20, 0x3d, 0x20,
+  0x30, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65,
+  0x20, 0x28, 0x6e, 0x20, 0x3c, 0x20, 0x6d, 0x61, 0x78, 0x20, 0x2d, 0x20,
+  0x31, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x61, 0x61, 0x20, 0x3d,
+  0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x61, 0x29, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61,
+  0x74, 0x32, 0x20, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28,
+  0x62, 0x2c, 0x20, 0x62, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20, 0x61, 0x62,
+  0x20, 0x3d, 0x20, 0x6d, 0x75, 0x6c, 0x28, 0x61, 0x2c, 0x20, 0x62, 0x29,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66,
+  0x20, 0x28, 0x61, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2b, 0x20, 0x61, 0x61,
+  0x2e, 0x73, 0x31, 0x20, 0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x30, 0x20,
+  0x2b, 0x20, 0x62, 0x62, 0x2e, 0x73, 0x31, 0x20, 0x3e, 0x20, 0x31, 0x36,
+  0x29, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x32, 0x20,
+  0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x66,
+  0x6c, 0x6f, 0x61, 0x74, 0x32, 0x29, 0x28, 0x2d, 0x62, 0x62, 0x2e, 0x73,
+  0x30, 0x2c, 0x20, 0x2d, 0x62, 0x62, 0x2e, 0x73, 0x31, 0x29, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x20, 0x3d, 0x20,
+  0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61, 0x61, 0x2c, 0x20,
+  0x6d, 0x69, 0x6e, 0x75, 0x73, 0x62, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x61,
+  0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62,
+  0x20, 0x3d, 0x20, 0x61, 0x64, 0x64, 0x28, 0x61, 0x64, 0x64, 0x28, 0x61,
+  0x62, 0x2c, 0x20, 0x61, 0x62, 0x29, 0x2c, 0x20, 0x63, 0x62, 0x29, 0x3b,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x2b, 0x2b,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x2f, 0x2f, 0x20, 0x4e, 0x20, 0x2b, 0x20, 0x31, 0x20, 0x2d, 0x20,
+  0x6c, 0x6f, 0x67, 0x20, 0x28, 0x6c, 0x6f, 0x67, 0x20, 0x20, 0x7c, 0x5a,
+  0x28, 0x4e, 0x29, 0x7c, 0x29, 0x20, 0x2f, 0x20, 0x6c, 0x6f, 0x67, 0x20,
+  0x32, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6e, 0x20,
+  0x3e, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x20, 0x2d, 0x20, 0x31, 0x29, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x69, 0x6e,
+  0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x73,
+  0x6d, 0x6f, 0x6f, 0x74, 0x68, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x29, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x28,
+  0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x20, 0x6e, 0x29, 0x20, 0x2b,
+  0x20, 0x31, 0x20, 0x2d, 0x20, 0x6c, 0x6f, 0x67, 0x28, 0x6c, 0x6f, 0x67,
+  0x28, 0x61, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x61, 0x2e, 0x73, 0x30,
+  0x20, 0x2b, 0x20, 0x62, 0x2e, 0x73, 0x30, 0x20, 0x2a, 0x20, 0x62, 0x2e,
+  0x73, 0x30, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x29, 0x20, 0x2f, 0x20, 0x6c,
+  0x6f, 0x67, 0x28, 0x32, 0x2e, 0x30, 0x66, 0x29, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41,
+  0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x28,
+  0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x41, 0x5b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20,
+  0x28, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x6e, 0x29, 0x20, 0x2b,
+  0x20, 0x31, 0x20, 0x2d, 0x20, 0x28, 0x61, 0x20, 0x2a, 0x20, 0x61, 0x20,
+  0x2b, 0x20, 0x62, 0x20, 0x2a, 0x20, 0x62, 0x20, 0x2d, 0x20, 0x31, 0x36,
+  0x29, 0x20, 0x2f, 0x20, 0x28, 0x32, 0x35, 0x36, 0x20, 0x2d, 0x20, 0x31,
+  0x36, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x41, 0x5b, 0x67,
+  0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64,
+  0x28, 0x30, 0x29, 0x5d, 0x20, 0x3d, 0x20, 0x35, 0x3b, 0x0a, 0x7d, 0x0a
+};
+unsigned int doublefloat_cl_len = 2820;