ソースを参照

improved benchmarks

Nicolas Winkler 6 年 前
コミット
c399438643

+ 0 - 1
Almond.pro

@@ -38,7 +38,6 @@ HEADERS += \
         Almond.h \
         Bitmap.h \
         Color.h \
-        Fixed.h \
         MandelWidget.h \
         SectionManager.h \
         VideoStream.h \

+ 0 - 521
Fixed.h

@@ -1,521 +0,0 @@
-#pragma once
-
-#include <cinttypes>
-#include <cmath>
-
-struct Fixed128
-{
-    uint64_t upper;
-    uint64_t lower;
-
-    Fixed128(const Fixed128&) = default;
-    ~Fixed128() = default;
-
-
-    inline Fixed128(uint64_t upper, uint64_t lower) :
-        upper{ upper }, lower{ lower }
-    {
-    }
-
-    inline Fixed128(uint32_t a, uint32_t b, uint32_t c, uint32_t d) :
-        upper{ (uint64_t(a) << 32) | b }, lower{ (uint64_t(c) << 32) | d }
-    {
-    }
-
-    inline Fixed128(double x)
-    {
-        const double twoToThe32 = double(0x100000000ULL);
-        upper = uint64_t(int64_t(x * twoToThe32));
-        double remainder = x - double(upper) / twoToThe32;
-        lower = uint64_t(int64_t(x * twoToThe32 * twoToThe32 * twoToThe32));
-        /*int integerPart = ::floor(x);
-        double fractionalPart = x - integerPart;
-        upper = int64_t(integerPart) << 32;
-        upper |= uint64_t(fractionalPart * (1ULL << 32)) & 0xFFFFFFFFULL;
-        lower = 0;// uint64_t(fractionalPart * (1ULL << 32) * (1ULL << 63) * 2);*/
-    }
-
-    inline Fixed128 operator + (const Fixed128& other) const {
-        uint64_t lowerAdded = lower + other.lower;
-        uint64_t upperAdded = upper + other.upper + (lowerAdded < lower);
-        return Fixed128{ upperAdded, lowerAdded };
-    }
-    
-    inline Fixed128& operator +=(const Fixed128& other) {
-        uint64_t lowerAdded = lower + other.lower;
-        upper += other.upper + (lowerAdded < lower);
-        lower = lowerAdded;
-        return *this;
-    }
-
-    inline Fixed128 operator - (const Fixed128& other) const {
-        uint64_t lowerSubbed = lower - other.lower;
-        uint64_t upperSubbed = upper - other.upper - (lowerSubbed > lower);
-        return Fixed128{ upperSubbed, lowerSubbed };
-    }
-
-    inline Fixed128 operator - (void) const {
-        return this->operator~() + Fixed128{ 0, 0, 0, 1 };
-    }
-
-//private:
-    static inline std::pair<uint64_t, uint64_t> mul64(int64_t a, int64_t b) {
-        int32_t aa[2] = { a >> 32, a & 0xFFFFFFFF };
-        int32_t bb[2] = { b >> 32, b & 0xFFFFFFFF };
-
-        int32_t res[4];
-        int64_t temp = int64_t(aa[1]) * bb[1];
-        res[3] = temp & 0xFFFFFFFF;
-        int32_t carry = temp >> 32;
-        temp = int64_t(aa[0]) * bb[1] + int64_t(aa[1]) * bb[0] + carry;
-        res[2] = temp & 0xFFFFFFFF;
-        carry = temp >> 32;
-        temp = int64_t(aa[0]) * bb[0] + carry;
-        res[1] = temp & 0xFFFFFFFF;
-        res[0] = temp >> 32;
-
-        return std::make_pair(uint64_t((int64_t(res[0]) << 32) | res[1]), uint64_t((int64_t(res[2]) << 32) | res[3]));
-    }
-
-    static inline std::pair<uint64_t, uint64_t> mulu64(uint64_t a, uint64_t b) {
-        uint32_t aa[2] = { a >> 32, a & 0xFFFFFFFF };
-        uint32_t bb[2] = { b >> 32, b & 0xFFFFFFFF };
-
-        uint32_t res[4];
-        uint64_t temp = uint64_t(aa[1]) * bb[1];
-        res[3] = temp & 0xFFFFFFFF;
-        uint32_t carry = temp >> 32;
-        temp = uint64_t(aa[0]) * bb[1] + uint64_t(aa[1]) * bb[0] + carry;
-        res[2] = temp & 0xFFFFFFFF;
-        carry = temp >> 32;
-        temp = uint64_t(aa[0]) * bb[0] + carry;
-        res[1] = temp & 0xFFFFFFFF;
-        res[0] = temp >> 32;
-
-        return std::make_pair((uint64_t(res[0]) << 32) | res[1], (uint64_t(res[2]) << 32) | res[3] );
-    }
-
-public:
-    inline Fixed128 operator * (const Fixed128& other) const {
-        if (isNegative()) {
-            return -(other * this->operator-());
-        }
-        if (other.isNegative()) {
-            return -(*this * (-other));
-        }
-        auto [uuc, uu] = mulu64(upper, other.upper);
-        auto [ulc, ul] = mulu64(upper, other.lower);
-        auto [luc, lu] = mulu64(lower, other.upper);
-        auto [llc, ll] = mulu64(lower, other.lower);
-
-        uint64_t res[4] = { 0, 0, 0, 0 };
-        res[3] = ll;
-        res[2] += lu;
-        res[2] += ul;
-        if (res[2] < ul)
-            res[1]++;
-        res[2] += llc;
-        if (res[2] < llc)
-            res[1]++;
-        res[1] += uu;
-        if (res[1] < uu)
-            res[0]++;
-        res[1] += ulc;
-        if (res[1] < ulc)
-            res[0]++;
-        res[1] += luc;
-        if (res[1] < luc)
-            res[0]++;
-        res[0] += uuc;
-
-
-        return Fixed128{ uint32_t(res[0] & 0xFFFFFFFF), uint32_t(int64_t(res[1]) >> 32), uint32_t(res[1] & 0xFFFFFFFF), uint32_t(int64_t(res[2]) >> 32) };
-
-        /*if (isNegative()) {
-            return -(this->operator-() * other);
-        }
-        if (other.isNegative()) {
-            return -(*this * (-other));
-        }
-
-        bool otherNegative = other.isNegative();
-
-        uint32_t quarters[4] = {
-            (upper >> 32) & 0xFFFFFFFF,
-            upper & 0xFFFFFFFF,
-            (lower >> 32) & 0xFFFFFFFF,
-            lower & 0xFFFFFFFF
-        };
-
-        auto [a, ra] = other.mul(quarters[0]);
-        auto [b, rb] = other.mul(quarters[1]);
-        auto [c, rc] = other.mul(quarters[2]);
-        auto [d, rd] = other.mul(quarters[3]);
-        b.arshift(1);
-        c.arshift(2);
-        d.arshift(3);
-        Fixed128 carries = { uint32_t(rb), uint32_t(rc), uint32_t(rd), 0 };
-        Fixed128 result = a + b + c + d + carries;
-        return result;*/
-    }
-
-    inline std::pair<Fixed128, uint32_t> mul(uint32_t factor) const {
-        uint32_t quarters[4] = {
-            (upper >> 32) & 0xFFFFFFFF,
-            upper & 0xFFFFFFFF,
-            (lower >> 32) & 0xFFFFFFFF,
-            lower & 0xFFFFFFFF
-        };
-        uint32_t newQ[4];
-        uint32_t carry = 0;
-        for (int i = 3; i >= 0; i--) {
-            int64_t prod = int64_t(quarters[i]) * factor + carry;
-            newQ[i] = prod & 0xFFFFFFFF;
-            carry = prod >> 32;
-        }
-        /*    newQ[i] = quarters[i] * factor;
-        uint64_t tempLower = newQ[3];
-        uint64_t newLower = tempLower + (newQ[2] << 32);
-        uint64_t newUpper = (newQ[2] >> 32) + newQ[1] + (newQ[0] << 32) + (newLower < tempLower ? 1 : 0);*/
-        return std::make_pair(Fixed128{ newQ[0], newQ[1], newQ[2], newQ[3] }, carry);
-    }
-
-    /*
-    inline void arshift(int fac32) {
-        uint32_t temp = 0;
-        switch (fac32) {
-        case 0:
-            return;
-        case 1:
-            temp = upper & 0xFFFFFFFF;
-            upper = uint64_t(int64_t(upper) >> 32);
-            lower >>= 32;
-            lower |= uint64_t(temp) << 32;
-        case 2:
-            lower = upper;
-            upper = uint64_t(int64_t(upper) >> 63);
-        case 3:
-            lower = uint64_t(int64_t(upper) >> 32);
-            upper = uint64_t(int64_t(upper) >> 63);
-        default:
-            lower = uint64_t(int64_t(upper) >> 63);
-            upper = uint64_t(int64_t(upper) >> 63);
-        }
-    }*/
-    /*
-    inline Fixed128 operator * (const Fixed128& other) const {
-        int32_t quarters[4] = {
-            (upper >> 32) & 0xFFFFFFFF,
-            upper & 0xFFFFFFFF,
-            (lower >> 32) & 0xFFFFFFFF,
-            lower & 0xFFFFFFFF
-        };
-
-        int32_t otherQuarters[4] = {
-            (other.upper >> 32) & 0xFFFFFFFF,
-            other.upper & 0xFFFFFFFF,
-            (other.lower >> 32) & 0xFFFFFFFF,
-            other.lower & 0xFFFFFFFF
-        };
-
-        int64_t prods[4][4];
-        for (int i = 0; i < 4; i++) {
-            for (int j = 0; j < 4 && j + i < 5; j++) {
-                if (i == 0 || j == 0)
-                    prods[i][j] = int64_t(quarters[i]) * int64_t(otherQuarters[j]);
-                else
-                    prods[i][j] = uint64_t(uint32_t(quarters[i])) * uint64_t(uint32_t(otherQuarters[j]));
-            }
-        }
-
-        Fixed128 ret = { 0, 0 };
-        for (int i = 0; i < 4; i++) {
-            for (int j = 0; j < 4 && j + i < 5; j++) {
-                if (i == 0 || j == 0)
-                    ret.addSigned(prods[i][j], i + j);
-                else
-                    ret.add(prods[i][j], i + j);
-            }
-        }
-        return ret;
-        
-        /*
-        int64_t x00 = int64_t(quarters[0]) * int64_t(otherQuarters[0]);
-        int64_t x01 = int64_t(quarters[0]) * int64_t(otherQuarters[1]);
-        int64_t x02 = int64_t(quarters[0]) * int64_t(otherQuarters[2]);
-        int64_t x03 = int64_t(quarters[0]) * int64_t(otherQuarters[3]);
-        int64_t x10 = int64_t(quarters[1]) * int64_t(otherQuarters[0]);
-        int64_t x11 = int64_t(quarters[1]) * int64_t(otherQuarters[1]);
-        int64_t x12 = int64_t(quarters[1]) * int64_t(otherQuarters[2]);
-        int64_t x13 = int64_t(quarters[1]) * int64_t(otherQuarters[3]);
-        int64_t x20 = int64_t(quarters[2]) * int64_t(otherQuarters[0]);
-        int64_t x21 = int64_t(quarters[2]) * int64_t(otherQuarters[1]);
-        int64_t x22 = int64_t(quarters[2]) * int64_t(otherQuarters[2]);
-        int64_t x30 = int64_t(quarters[3]) * int64_t(otherQuarters[0]);
-        int64_t x31 = int64_t(quarters[3]) * int64_t(otherQuarters[1]);
-
-        Fixed128 ret = { 0, 0 };
-        /*uint32_t newQuarters[4] = {
-            x00,
-            x01 + x10,
-            x02 + x11 + x20,
-            x03 + x12 + x21 + x30,
-        };*//*
-        ret.add(x00, 0);
-        ret.add(x01 + x10, 1);
-        ret.add(x02 + x11 + x20, 2);
-        ret.add(x03 + x12 + x21 + x30, 3);
-        ret.add(x13 + x22 + x31, 4);
-
-        return ret;*/
-    /*}*/
-
-private:
-    inline void add(uint64_t val, int b32offset) {
-        switch (b32offset) {
-        case 0:
-            upper += val << 32;
-            return;
-        case 1:
-            upper += val;
-            return;
-        case 2:
-            upper += val >> 32;
-            lower += val << 32;
-            return;
-        case 3: {
-            uint64_t newLower = lower + val;
-            if (newLower < lower) upper++;
-            lower = newLower;
-            return;
-        }
-        case 4:
-            uint64_t newLower = lower + (val >> 32);
-            if (lower > newLower) upper++;
-            lower += newLower;
-            return;
-        }
-    }
-    inline void addSigned(int64_t val, int b32offset) {
-        switch (b32offset) {
-        case 0:
-            upper += val << 32;
-            return;
-        case 1:
-            upper += val;
-            return;
-        case 2:
-            upper += val >> 32;
-            lower += val << 32;
-            return;
-        case 3:
-            lower += val;
-            if (val < 0) upper--;
-            return;
-        
-        case 4: {
-            uint64_t newLower = lower + (val >> 32);
-            if (lower > newLower) upper++;
-            lower = newLower;
-            return;
-        }
-        default:
-            if (val < 0) {
-                if (lower == 0) upper--;
-                lower--;
-            }
-            return;
-        }
-    }
-public:
-
-    bool isNegative(void) const {
-        return (upper & (uint64_t(1) << 63)) != 0;
-    }
-
-    operator double(void) const {
-        const int64_t twoToThe32 = 0x100000000ULL;
-        return double(int64_t(upper)) / twoToThe32 + int64_t(lower) / twoToThe32 / twoToThe32 / twoToThe32;
-    }
-
-    inline Fixed128 operator ~ (void) const {
-        return Fixed128{ ~upper, ~lower };
-    }
-
-    inline bool operator == (const Fixed128& other) const {
-        return upper == other.upper && lower == other.lower;
-    }
-
-    inline bool operator != (const Fixed128& other) const {
-        return !operator==(other);
-    }
-
-    inline bool operator < (const Fixed128& other) const {
-        return upper < other.upper || (upper == other.upper && lower < other.lower);
-    }
-
-    inline bool operator <= (const Fixed128& other) const {
-        return operator<(other) || operator==(other);
-    }
-
-    inline bool operator > (const Fixed128& other) const {
-        return upper > other.upper || (upper == other.upper && lower > other.lower);
-    }
-
-    inline bool operator >= (const Fixed128& other) const {
-        return operator>(other) || operator==(other);
-    }
-};
-
-struct Fixed64
-{
-    bool sign;
-    uint64_t bits;
-
-    Fixed64(const Fixed64&) = default;
-    ~Fixed64() = default;
-
-
-    inline Fixed64(uint64_t bits, bool dummy) :
-        bits{ bits }
-    {
-    }
-
-    inline Fixed64(double x)
-    {
-        if (x < 0) {
-            sign = true;
-            x *= -1;
-        }
-        else {
-            sign = false;
-        }
-        int integerPart = int(x);
-        double fractionalPart = x - integerPart;
-        bits = uint64_t(integerPart) << 32;
-        bits |= uint64_t(fractionalPart * (1ULL << 32)) & 0xFFFFFFFF;
-    }
-
-    inline Fixed64 operator + (const Fixed64& other) {
-        return Fixed64{ bits + other.bits, true };
-    }
-    
-    inline Fixed64& operator +=(const Fixed64& other) {
-        bits += other.bits;
-        return *this;
-    }
-
-    inline Fixed64 operator - (const Fixed64& other) {
-        return Fixed64{ bits - other.bits, true };
-    }
-
-    inline Fixed64 operator * (const Fixed64& other) {
-        /*int32_t upper = bits >> 32;
-        uint32_t lower = uint32_t(bits & 0xFFFFFFFF);
-        int64_t upup = int64_t(upper) * int64_t(upper);
-        int64_t loup = int64_t(upper) * int64_t(lower);
-        int64_t lolo = int64_t(lower) * int64_t(lower);
-
-        int32_t newUp = upup & 0xFFFFFFFF + (loup >> 32);
-        int32_t newLo = loup & 0xFFFFFFFF + (lolo >> 32);*/
-        double d = int32_t(bits >> 32) + double(uint32_t(bits)) / (1ULL << 32);
-        double od = int32_t(other.bits >> 32) + double(uint32_t(other.bits)) / (1ULL << 32);
-        return d * od * (other.sign != sign) ? -1 : 1;
-
-        //return Fixed64{ (uint64_t(newUp) << 32) | newLo, true };
-    }
-
-    inline bool operator == (const Fixed64& other) {
-        return bits == other.bits;
-    }
-
-    inline bool operator != (const Fixed64& other) {
-        return !operator==(other);
-    }
-
-    inline bool operator < (const Fixed64& other) {
-        return bits < other.bits;
-    }
-
-    inline bool operator <= (const Fixed64& other) {
-        return operator<(other) || operator==(other);
-    }
-
-    inline bool operator > (const Fixed64& other) {
-        return bits > other.bits;
-    }
-
-    inline bool operator >= (const Fixed64& other) {
-        return operator>(other) || operator==(other);
-    }
-};
-
-struct Fixed32
-{
-    int32_t bits;
-
-    Fixed32(const Fixed32&) = default;
-    ~Fixed32() = default;
-
-
-    inline Fixed32(int32_t bits, bool dummy) :
-        bits{ bits }
-    {
-    }
-
-    inline Fixed32(double x)
-    {
-        int integerPart = ::floor(x);
-        double fractionalPart = x - integerPart;
-        /*if (x < 0) {
-            integerPart--;
-            fractionalPart = 1.0 - fractionalPart;
-        }*/
-        bits = int32_t(integerPart) << 16;
-        bits |= uint32_t(fractionalPart * (1ULL << 16)) & 0xFFFF;
-    }
-
-    inline Fixed32 operator + (const Fixed32& other) {
-        return Fixed32{ bits + other.bits, true };
-    }
-    
-    inline Fixed32& operator +=(const Fixed32& other) {
-        bits += other.bits;
-        return *this;
-    }
-
-    inline Fixed32 operator - (const Fixed32& other) {
-        return Fixed32{ bits - other.bits, true };
-    }
-
-    inline Fixed32 operator * (const Fixed32& other) {
-        int64_t prod = int64_t(bits) * int64_t(other.bits);
-        return Fixed32{ int32_t(prod >> 16), true };
-        //return Fixed32{ (uint64_t(newUp) << 32) | newLo, true };
-    }
-
-    inline bool operator == (const Fixed32& other) {
-        return bits == other.bits;
-    }
-
-    inline bool operator != (const Fixed32& other) {
-        return !operator==(other);
-    }
-
-    inline bool operator < (const Fixed32& other) {
-        return bits < other.bits;
-    }
-
-    inline bool operator <= (const Fixed32& other) {
-        return operator<(other) || operator==(other);
-    }
-
-    inline bool operator > (const Fixed32& other) {
-        return bits > other.bits;
-    }
-
-    inline bool operator >= (const Fixed32& other) {
-        return operator>(other) || operator==(other);
-    }
-};
-

+ 8 - 4
MandelWidget.cpp

@@ -82,13 +82,17 @@ void MandelView::adaptViewport(const MandelViewport& vp)
             do {
                 //static ClGenerator cpg;
                 MandelInfo mi;
-                mi.bWidth = 1024;//ql.geometry().width();
-                mi.bHeight = 1024; //ql.geometry().height();
+                mi.bWidth = 200;//ql.geometry().width();
+                mi.bHeight = 200; //ql.geometry().height();
                 mi.maxIter = 4000;
                 mi.view = toCalc;
                 auto fmap = Bitmap<float>(mi.bWidth, mi.bHeight);
                 generator.generate(mi, fmap.pixels.get());
-                auto bitmap = fmap.map<RGBColor>([](float i) { return i < 0 ? RGBColor{ 0,0,0 } : RGBColor{ uint8_t(cos(i * 0.015f) * 127 + 127), uint8_t(sin(i * 0.01f) * 127 + 127), uint8_t(i) }; });//uint8_t(::sin(i * 0.01f) * 100 + 100), uint8_t(i) }; });
+                auto bitmap = fmap.map<RGBColor>([&mi](float i) { return i > mi.maxIter ?
+                                RGBColor{ 0,0,0 } :
+                                RGBColor{ uint8_t(cos(i * 0.015f) * 127 + 127),
+                                          uint8_t(sin(i * 0.01f) * 127 + 127),
+                                          uint8_t(i) }; });//uint8_t(::sin(i * 0.01f) * 100 + 100), uint8_t(i) }; });
                 emit updated(new Bitmap<RGBColor>(std::move(bitmap)));
             } while(hasToCalc.exchange(false));
         });
@@ -103,7 +107,7 @@ void MandelView::adaptViewport(const MandelViewport& vp)
 MandelWidget::MandelWidget(mnd::MandelContext& ctxt, QWidget* parent) :
     QGLWidget{ QGLFormat(QGL::SampleBuffers), parent },
     mndContext{ ctxt },
-    mv{ ctxt.getDefaultGenerator() }
+    mv{ ctxt.getCpuGenerator128() }
 {
     this->setContentsMargins(0, 0, 0, 0);
     this->setSizePolicy(QSizePolicy::Expanding,

+ 141 - 34
benchmarkdialog.cpp

@@ -2,33 +2,14 @@
 #include <chrono>
 #include <cmath>
 
-BenchmarkDialog::BenchmarkDialog(mnd::MandelContext& mndContext, QWidget *parent) :
-    QDialog(parent),
-    mndContext{ mndContext }
-{
-    ui.setupUi(this);
-
-    auto& devices = mndContext.getDevices();
-    int nDevices = devices.size() + 1;
-    ui.tableWidget->setColumnCount(2);
-    ui.tableWidget->setRowCount(nDevices);
-    ui.tableWidget->setHorizontalHeaderLabels({"Single Precision", "Double Precision"});
-
-    QString cpuDesc = ("CPU [" + mndContext.getCpuInfo().getBrand() + "]").c_str();
-    ui.tableWidget->setVerticalHeaderItem(0, new QTableWidgetItem(cpuDesc));
-    for (int i = 0; i < devices.size(); i++) {
-        QString cpuDesc = ("GPU " + std::to_string(i + 1) + " [" + devices[i].getVendor() + " " + devices[i].getName() + "]").c_str();
-        ui.tableWidget->setVerticalHeaderItem(i + 1, new QTableWidgetItem(cpuDesc));
-    }
-}
 
-mnd::MandelViewport BenchmarkDialog::benchViewport(void) const
+mnd::MandelViewport Benchmarker::benchViewport(void) const
 {
     return mnd::MandelViewport{ -0.758267525104592591494, -0.066895616551111110830, 0.000000043217777777655, 0.000000043217777777655 };
 }
 
 
-double BenchmarkDialog::measureMips(const std::function<Bitmap<float>()>& bench) const
+double Benchmarker::measureMips(const std::function<Bitmap<float>()>& bench) const
 {
     using namespace std::chrono;
     auto before = high_resolution_clock::now();
@@ -41,45 +22,171 @@ double BenchmarkDialog::measureMips(const std::function<Bitmap<float>()>& bench)
     }
 
     double iterPerNanos = double(sum) / duration_cast<nanoseconds>(after - before).count();
-    printf("test took %lld nanos\n", duration_cast<nanoseconds>(after - before).count());
+    printf("test took %lld millis\n", duration_cast<milliseconds>(after - before).count());
     printf("test did %lld iters\n", sum);
     double megaItersPerSecond = iterPerNanos * 1000.0;
     return megaItersPerSecond;
 }
 
-QString BenchmarkDialog::benchmarkResult(mnd::Generator& mg, int size, int iters) const
+double Benchmarker::benchmarkResult(mnd::Generator& mg) const
 {
+    // create testbenchmark
     mnd::MandelInfo mi;
-    mi.bWidth = size;
-    mi.bHeight = size;
-    mi.maxIter = iters;
+    mi.bWidth = 250;
+    mi.bHeight = 250;
+    mi.maxIter = 4000;
     mi.view = benchViewport();
-    double megaItersPerSecond = measureMips([&mg, &mi] () {
+    double testValue = measureMips([&mg, &mi] () {
         Bitmap<float> bmp(mi.bWidth, mi.bHeight);
         mg.generate(mi, bmp.pixels.get());
         return bmp;
     });
 
-    QString mips = QString::number(megaItersPerSecond, 'f', 2);
-    return mips;
+    printf("testbench: %lf\n", testValue);
+
+    std::vector<std::pair<double, mnd::MandelInfo>> benches {
+        { 200, mnd::MandelInfo{ benchViewport(), 750, 750, 5000} },
+        { 500, mnd::MandelInfo{ benchViewport(), 2000, 1000, 7500} },
+        { 2000, mnd::MandelInfo{ benchViewport(), 2000, 2000, 15000} },
+        { 5000, mnd::MandelInfo{ benchViewport(), 3000, 3000, 30000} },
+        { 10000, mnd::MandelInfo{ benchViewport(), 4000, 4000, 75000} },
+        { 100000, mnd::MandelInfo{ benchViewport(), 6000, 6000, 750000} },
+        { std::numeric_limits<double>::max(), mnd::MandelInfo{ benchViewport(), 7000, 7000, 1000000} }
+    };
+
+    double megaItersPerSecond = 0.0;
+    if (testValue < 100) {
+        megaItersPerSecond = testValue;
+    }
+    else {
+        for (auto& [thresh, info] : benches) {
+            auto& m = info;
+            if (testValue < thresh) {
+                megaItersPerSecond = measureMips([&mg, &m] () {
+                    Bitmap<float> bmp(m.bWidth, m.bHeight);
+                    mg.generate(m, bmp.pixels.get());
+                    return bmp;
+                });
+                break;
+            }
+        }
+    }
+
+
+
+    return megaItersPerSecond;
 }
 
-void BenchmarkDialog::on_run_clicked()
+
+void Benchmarker::start(void)
 {
     mnd::Generator& cpuf = mndContext.getCpuGeneratorFloat();
     mnd::Generator& cpud = mndContext.getCpuGeneratorDouble();
+    mnd::Generator& cpu128 = mndContext.getCpuGenerator128();
 
-    ui.tableWidget->setItem(0, 0, new QTableWidgetItem(benchmarkResult(cpuf, 1000, 5000)));
-    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(cpud, 1000, 5000)));
+    double nTests = 3;
 
     auto& devices = mndContext.getDevices();
     for (int i = 0; i < devices.size(); i++) {
         if (mnd::Generator* gpuf; gpuf = devices[i].getGeneratorFloat()) {
-            ui.tableWidget->setItem(i + 1, 0, new QTableWidgetItem(benchmarkResult(*gpuf, 4000, 5000)));
+            nTests++;
+        }
+        if (mnd::Generator* gpud; gpud = devices[i].getGeneratorDouble()) {
+            nTests++;
+        }
+    }
+
+    double progress = 90.0 / nTests;
+
+    BenchmarkResult br;
+    br.values.push_back({});
+    br.percentage = 10;
+
+    emit update(br);
+
+    std::vector<double>& cpu = br.values[0];
+    cpu.push_back(benchmarkResult(cpuf));
+    br.percentage += progress;
+    emit update(br);
+    cpu.push_back(benchmarkResult(cpud));
+    br.percentage += progress;
+    emit update(br);
+    cpu.push_back(benchmarkResult(cpu128));
+    br.percentage += progress;
+    emit update(br);
+
+    for (int i = 0; i < devices.size(); i++) {
+        br.values.push_back({});
+        std::vector<double>& gpu = br.values[br.values.size() - 1];
+        if (mnd::Generator* gpuf; gpuf = devices[i].getGeneratorFloat()) {
+            gpu.push_back(benchmarkResult(*gpuf));
+            br.percentage += progress;
+            emit update(br);
         }
         if (mnd::Generator* gpud; gpud = devices[i].getGeneratorDouble()) {
-            ui.tableWidget->setItem(i + 1, 1, new QTableWidgetItem(benchmarkResult(*gpud, 4000, 5000)));
+            gpu.push_back(benchmarkResult(*gpud));
+            br.percentage += progress;
+            emit update(br);
+        }
+    }
+    printf("benchmark finished\n");
+    emit update(br);
+    emit finished();
+}
+
+
+BenchmarkDialog::BenchmarkDialog(mnd::MandelContext& mndContext, QWidget *parent) :
+    QDialog(parent),
+    mndContext{ mndContext },
+    benchmarker{ mndContext }
+{
+    ui.setupUi(this);
+
+    auto& devices = mndContext.getDevices();
+    int nDevices = devices.size() + 1;
+    ui.tableWidget->setColumnCount(3);
+    ui.tableWidget->setRowCount(nDevices);
+    ui.tableWidget->setHorizontalHeaderLabels({"Single Precision", "Double Precision", "128-bit Fixed Point"});
+
+    QString cpuDesc = ("CPU [" + mndContext.getCpuInfo().getBrand() + "]").c_str();
+    ui.tableWidget->setVerticalHeaderItem(0, new QTableWidgetItem(cpuDesc));
+    for (int i = 0; i < devices.size(); i++) {
+        QString cpuDesc = ("GPU " + std::to_string(i + 1) + " [" + devices[i].getVendor() + " " + devices[i].getName() + "]").c_str();
+        ui.tableWidget->setVerticalHeaderItem(i + 1, new QTableWidgetItem(cpuDesc));
+    }
+
+    qRegisterMetaType<BenchmarkResult>();
+
+    benchmarker.moveToThread(&benchThread);
+    connect(&benchThread, &QThread::started, &benchmarker, &Benchmarker::start);
+    connect(&benchmarker, SIGNAL (finished()), &benchThread, SLOT (quit()));
+    connect(&benchmarker, SIGNAL (update(BenchmarkResult)), this, SLOT (update(BenchmarkResult)));
+}
+
+
+void BenchmarkDialog::update(BenchmarkResult br)
+{
+    std::vector<double> cpu = br.values[0];
+    for (int j = 0; j < int(br.values.size()); j++) {
+        for (int i = 0; i < int(br.values[j].size()); i++) {
+            ui.tableWidget->setItem(j, i, new QTableWidgetItem(QString::number(br.values[j][i])));
         }
     }
+    ui.progressBar->setValue(int(br.percentage));
+}
+
+
+void BenchmarkDialog::on_run_clicked()
+{
+    if (!benchThread.isRunning()) {
+        /*for (int i = 0; i < ui.tableWidget->columnCount(); i++) {
+            for (int j = 0; j < ui.tableWidget->rowCount(); j++) {
+                ui.tableWidget->setItem(j, i, new QTableWidgetItem(""));
+            }
+        }*/
+
+        benchThread.start();
+    }
+
 //    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(clg, 4000, 10000)));
 }

+ 37 - 4
benchmarkdialog.h

@@ -6,6 +6,40 @@
 #include "ui_benchmarks.h"
 #include <Mandel.h>
 #include "Bitmap.h"
+#include <QThread>
+
+
+struct BenchmarkResult
+{
+    std::vector<std::vector<double>> values;
+    double percentage = 0.0;
+};
+
+Q_DECLARE_METATYPE(BenchmarkResult)
+
+class Benchmarker : public QObject
+{
+    Q_OBJECT
+private:
+    mnd::MandelContext mndContext;
+public:
+    inline Benchmarker(mnd::MandelContext& mndContext) :
+        mndContext{ mnd::initializeContext() }
+    {
+    }
+
+    mnd::MandelViewport benchViewport(void) const;
+
+    double measureMips(const std::function<Bitmap<float>()>& bench) const;
+    double benchmarkResult(mnd::Generator& mg) const;
+
+public slots:
+    void start(void);
+signals:
+    void update(BenchmarkResult br);
+    void finished(void);
+};
+
 
 class BenchmarkDialog : public QDialog
 {
@@ -13,17 +47,16 @@ class BenchmarkDialog : public QDialog
 private:
     Ui::BenchmarkDialog ui;
     mnd::MandelContext& mndContext;
+    QThread benchThread;
+    Benchmarker benchmarker;
 public:
     explicit BenchmarkDialog(mnd::MandelContext& mndContext, QWidget *parent = nullptr);
 
-    mnd::MandelViewport benchViewport(void) const;
-
-    double measureMips(const std::function<Bitmap<float>()>& bench) const;
-    QString benchmarkResult(mnd::Generator& mg, int size, int iters) const;
 
 signals:
 
 public slots:
+    void update(BenchmarkResult br);
 private slots:
     void on_run_clicked();
 };

+ 19 - 0
benchmarks.ui

@@ -45,6 +45,25 @@
       </widget>
      </item>
      <item>
+      <widget class="QProgressBar" name="progressBar">
+       <property name="enabled">
+        <bool>false</bool>
+       </property>
+       <property name="maximum">
+        <number>100</number>
+       </property>
+       <property name="value">
+        <number>0</number>
+       </property>
+       <property name="textVisible">
+        <bool>false</bool>
+       </property>
+       <property name="invertedAppearance">
+        <bool>false</bool>
+       </property>
+      </widget>
+     </item>
+     <item>
       <widget class="QTableWidget" name="tableWidget">
        <property name="editTriggers">
         <set>QAbstractItemView::NoEditTriggers</set>

+ 8 - 0
libmandel/include/CpuGenerators.h

@@ -7,6 +7,7 @@ namespace mnd
 {
     class CpuGeneratorFloat;
     class CpuGeneratorDouble;
+    class CpuGenerator128;
 
     class CpuGeneratorSse2Float;
     class CpuGeneratorSse2Double;
@@ -33,6 +34,13 @@ public:
 };
 
 
+class mnd::CpuGenerator128 : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+
 class mnd::CpuGeneratorSse2Float : public Generator
 {
 public:

+ 3 - 1
libmandel/include/Mandel.h

@@ -48,6 +48,7 @@ private:
 
     std::unique_ptr<Generator> cpuGeneratorFloat;
     std::unique_ptr<Generator> cpuGeneratorDouble;
+    std::unique_ptr<Generator> cpuGenerator128;
 
     std::vector<MandelDevice> devices;
 
@@ -60,7 +61,8 @@ public:
     const std::vector<MandelDevice>& getDevices(void);
 
     Generator& getCpuGeneratorFloat(void);
-    Generator& getCpuGeneratorDouble(void);
+    Generator& getCpuGeneratorDouble(void); 
+    Generator& getCpuGenerator128(void); 
 
     const CpuInfo& getCpuInfo(void) const { return cpuInfo; }
 };

+ 34 - 0
libmandel/src/CpuGenerators.cpp

@@ -1,4 +1,5 @@
 #include "CpuGenerators.h"
+#include "Fixed.h"
 
 #include <omp.h>
 
@@ -6,6 +7,7 @@
 
 using mnd::CpuGeneratorFloat;
 using mnd::CpuGeneratorDouble;
+using mnd::CpuGenerator128;
 
 
 void CpuGeneratorFloat::generate(const mnd::MandelInfo& info, float* data)
@@ -69,3 +71,35 @@ void CpuGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
     }
 }
 
+
+void CpuGenerator128::generate(const mnd::MandelInfo& info, float* data)
+{
+    const MandelViewport& view = info.view;
+    omp_set_num_threads(2 * omp_get_num_procs());
+#pragma omp parallel for
+    for (long j = 0; j < info.bHeight; j++) {
+        Fixed128 y = Fixed128(view.y) + Fixed128(j) * Fixed128(view.height / info.bHeight);
+        long i = 0;
+        for (i; i < info.bWidth; i++) {
+            Fixed128 x = view.x + Fixed128(i) * Fixed128(view.width / info.bWidth);
+
+            Fixed128 a = x;
+            Fixed128 b = y;
+
+            int k = 0;
+            for (k = 0; k < info.maxIter; k++) {
+                Fixed128 aa = a * a;
+                Fixed128 bb = b * b;
+                Fixed128 ab = a * b;
+                a = aa - bb + x;
+                b = ab + ab + y;
+                if (aa + bb > Fixed128(16)) {
+                    break;
+                }
+            }
+
+            data[i + j * info.bWidth] = k;
+        }
+    }
+}
+

+ 8 - 0
libmandel/src/mandel.cpp

@@ -56,6 +56,8 @@ MandelContext::MandelContext(void)
         cpuGeneratorDouble = std::make_unique<CpuGeneratorDouble>();
     }
 
+    cpuGenerator128 = std::make_unique<CpuGenerator128>();
+
     devices = createDevices();
 }
 
@@ -142,3 +144,9 @@ Generator& MandelContext::getCpuGeneratorDouble(void)
 {
     return *cpuGeneratorDouble;
 }
+
+
+Generator& MandelContext::getCpuGenerator128(void)
+{
+    return *cpuGenerator128;
+}