Nicolas Winkler 5 년 전
부모
커밋
a5fa129c1b

+ 1 - 8
Almond.cpp

@@ -90,17 +90,10 @@ void Almond::on_smooth_stateChanged(int checked)
 }
 
 
-void Almond::on_runBenchmark_clicked()
-{
-    if (!benchmarkDialog)
-        benchmarkDialog = std::make_unique<BenchmarkDialog>(mandelContext, this);
-    benchmarkDialog->exec();
-}
-
-
 void Almond::on_exportImage_clicked()
 {
     ExportImageDialog dialog(this);
+    dialog.setMaxIterations(mw->getMaxIterations());
     //dialog.show();
     auto response = dialog.exec();
     if (response == 1) {

+ 2 - 3
Almond.h

@@ -8,7 +8,7 @@
 #include "exportdialogs.h"
 #include "gradientchoosedialog.h"
 #include "choosegenerators.h"
-#include "benchmarkdialog.h"
+//#include "benchmarkdialog.h"
 
 #include <memory>
 
@@ -18,7 +18,7 @@ class Almond : public QMainWindow
 private:
     mnd::MandelContext mandelContext;
     std::unique_ptr<MandelWidget> mw;
-    std::unique_ptr<BenchmarkDialog> benchmarkDialog;
+    //std::unique_ptr<BenchmarkDialog> benchmarkDialog;
     std::unique_ptr<ChooseGenerators> generatorsDialog;
     mnd::Generator* currentGenerator;
     GradientChooseDialog gcd;
@@ -33,7 +33,6 @@ private slots:
     void on_chooseGradient_clicked();
     void on_exportVideo_clicked();
     void on_smooth_stateChanged(int arg1);
-    void on_runBenchmark_clicked();
     void on_exportImage_clicked();
     void on_resetZoom_clicked();
 

+ 2 - 5
Almond.pro

@@ -35,7 +35,6 @@ SOURCES += \
         MandelVideoGenerator.cpp \
         MandelWidget.cpp \
         VideoStream.cpp \
-        benchmarkdialog.cpp \
         choosegenerators.cpp \
         exportdialogs.cpp \
         gradientchoosedialog.cpp \
@@ -51,14 +50,12 @@ HEADERS += \
         MandelVideoGenerator.h \
         MandelWidget.h \
         VideoStream.h \
-        benchmarkdialog.h \
         choosegenerators.h \
         exportdialogs.h \
         gradientchoosedialog.h
 
 FORMS += \
         Almond.ui \
-        benchmarks.ui \
         choosegenerators.ui \
         exportimagedialog.ui \
         exportvideodialog.ui \
@@ -131,8 +128,8 @@ RESOURCES += Almond.qrc
 
 unix|win32: LIBS += -L$$PWD/libmandel/ -lmandel -lqd
 
-INCLUDEPATH += $$PWD/libmandel/include
-DEPENDPATH += $$PWD/libmandel/include
+INCLUDEPATH += $$PWD/libmandel/include $$PWD/libmandel/qd-2.3.22/include
+DEPENDPATH += $$PWD/libmandel/include $$PWD/libmandel/qd-2.3.22/include
 
 win32:!win32-g++: PRE_TARGETDEPS += $$PWD/libmandel/mandel.lib  $$PWD/libmandel/qd.lib
 else:unix|win32-g++: PRE_TARGETDEPS += $$PWD/libmandel/libmandel.a $$PWD/libmandel/libqd.a

+ 0 - 7
Almond.ui

@@ -166,13 +166,6 @@
         </spacer>
        </item>
        <item>
-        <widget class="QPushButton" name="runBenchmark">
-         <property name="text">
-          <string>Run Benchmark</string>
-         </property>
-        </widget>
-       </item>
-       <item>
         <widget class="QPushButton" name="chooseGenerator">
          <property name="text">
           <string>Select Generators</string>

+ 0 - 260
benchmarkdialog.cpp

@@ -1,260 +0,0 @@
-#include "benchmarkdialog.h"
-#include <chrono>
-#include <cmath>
-
-
-mnd::MandelViewport Benchmarker::benchViewport(void)
-{
-    return mnd::MandelViewport{ -1.250000598933854152929, 0.0001879894057291665530, 0.0000003839916666666565, 0.0000003839916666666565 };
-}
-
-const std::vector<mnd::MandelInfo> Benchmarker::benches {
-    mnd::MandelInfo{ benchViewport(), 50, 50, 250, false },
-    mnd::MandelInfo{ benchViewport(), 50, 50, 500, false },
-    mnd::MandelInfo{ benchViewport(), 50, 100, 500, false },
-    mnd::MandelInfo{ benchViewport(), 100, 100, 500, false },
-    mnd::MandelInfo{ benchViewport(), 100, 100, 1000, false },
-    mnd::MandelInfo{ benchViewport(), 100, 200, 1000, false },
-    mnd::MandelInfo{ benchViewport(), 200, 200, 1000, false },
-    mnd::MandelInfo{ benchViewport(), 200, 200, 2000, false },
-    mnd::MandelInfo{ benchViewport(), 200, 400, 2000, false },
-    mnd::MandelInfo{ benchViewport(), 400, 400, 2000, false },
-    mnd::MandelInfo{ benchViewport(), 400, 400, 4000, false },
-    mnd::MandelInfo{ benchViewport(), 400, 800, 4000, false },
-    mnd::MandelInfo{ benchViewport(), 800, 800, 4000, false },
-    mnd::MandelInfo{ benchViewport(), 800, 800, 8000, false },
-    mnd::MandelInfo{ benchViewport(), 800, 800, 16000, false },
-    mnd::MandelInfo{ benchViewport(), 800, 1600, 16000, false },
-    mnd::MandelInfo{ benchViewport(), 1600, 1600, 16000, false },
-    mnd::MandelInfo{ benchViewport(), 1600, 1600, 32000, false },
-    mnd::MandelInfo{ benchViewport(), 1600, 1600, 64000, false },
-    mnd::MandelInfo{ benchViewport(), 1600, 3200, 64000, false },
-    mnd::MandelInfo{ benchViewport(), 3200, 3200, 64000, false },
-    mnd::MandelInfo{ benchViewport(), 3200, 3200, 128000, false },
-    mnd::MandelInfo{ benchViewport(), 3200, 3200, 256000, false },
-    mnd::MandelInfo{ benchViewport(), 3200, 3200, 512000, false },
-    mnd::MandelInfo{ benchViewport(), 3200, 3200, 1024000, false },
-    mnd::MandelInfo{ benchViewport(), 3200, 3200, 2048000, false },
-    mnd::MandelInfo{ benchViewport(), 3200, 6400, 2048000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 2048000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 4096000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 8192000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 16384000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 32768000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 65536000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 131072000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 262144000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 524288000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 1048576000, false },
-    mnd::MandelInfo{ benchViewport(), 6400, 6400, 2097152000, false },
-};
-
-
-std::pair<long long, std::chrono::nanoseconds> Benchmarker::measureMips(const std::function<Bitmap<float>*()>& bench) const
-{
-    using namespace std::chrono;
-    auto before = high_resolution_clock::now();
-    auto* bitmap = bench();
-    auto after = high_resolution_clock::now();
-
-    long long sum = 0;
-    for (int i = 0; i < bitmap->width * bitmap->height; i++) {
-        sum += static_cast<long long>(std::floor(bitmap->pixels[size_t(i)]));
-    }
-
-    return std::make_pair(sum, duration_cast<nanoseconds>(after - before));
-}
-
-double Benchmarker::benchmarkResult(mnd::Generator& mg) const
-{
-    size_t testIndex = 0;
-
-    for (size_t i = 0; i < benches.size(); i++) {
-        const mnd::MandelInfo& mi = benches[i];
-        Bitmap<float> bmp(mi.bWidth, mi.bHeight);
-        auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
-            mg.generate(mi, bmp.pixels.get());
-            return &bmp;
-        });
-        if (time > std::chrono::milliseconds(500)) {
-            testIndex = i + 0;
-            //printf("testing index %d\n", testIndex);
-            fflush(stdout);
-            break;
-        }
-    }
-
-
-    const mnd::MandelInfo& mi = benches[(testIndex >= benches.size()) ? (benches.size() - 1) : testIndex];
-    Bitmap<float> bmp(mi.bWidth, mi.bHeight);
-    auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
-        mg.generate(mi, bmp.pixels.get());
-        return &bmp;
-    });
-
-    return double(iters) / time.count() * 1000;
-}
-
-
-void Benchmarker::start(void)
-{
-    /*
-    mnd::Generator& cpuf = mndContext.getCpuGeneratorFloat();
-    mnd::Generator& cpud = mndContext.getCpuGeneratorDouble();
-    mnd::Generator* cpudd = mndContext.getCpuGeneratorDD();
-    mnd::Generator* cpuqd = mndContext.getCpuGeneratorQD();
-    mnd::Generator* cpu128 = mndContext.getCpuGeneratorQuad();
-    mnd::Generator* cpu256 = mndContext.getCpuGeneratorOct();
-
-    double nTests = 2;
-
-    if (cpudd)
-        nTests++;
-    if (cpuqd)
-        nTests++;
-    if (cpu128)
-        nTests++;
-    if (cpu256)
-        nTests++;
-
-    auto& devices = mndContext.getDevices();
-    for (size_t i = 0; i < devices.size(); i++) {
-        if (mnd::Generator* gpuf; (gpuf = devices[i].getGeneratorFloat())) {
-            nTests++;
-        }
-        if (mnd::Generator* gpud; (gpud = devices[i].getGeneratorDouble())) {
-            nTests++;
-        }
-        if (mnd::Generator* gpudd; (gpudd = devices[i].getGeneratorDoubleDouble())) {
-            nTests++;
-        }
-    }
-
-    double progress = 90.0 / nTests;
-
-    BenchmarkResult br;
-    br.values.push_back({});
-    br.percentage = 10;
-
-    emit update(br);
-
-    std::vector<double>& cpu = br.values[0];
-    cpu.push_back(benchmarkResult(cpuf));
-    br.percentage += progress;
-    emit update(br);
-    cpu.push_back(benchmarkResult(cpud));
-    br.percentage += progress;
-    emit update(br);
-
-    if (cpudd) {
-        cpu.push_back(benchmarkResult(*cpudd));
-        br.percentage += progress;
-        emit update(br);
-    }
-    if (cpuqd) {
-        cpu.push_back(benchmarkResult(*cpuqd));
-        br.percentage += progress;
-        emit update(br);
-    }
-    if (cpu128) {
-        cpu.push_back(benchmarkResult(*cpu128));
-        br.percentage += progress;
-        emit update(br);
-    }
-    if (cpu256) {
-        cpu.push_back(benchmarkResult(*cpu256));
-        br.percentage += progress;
-        emit update(br);
-    }
-
-    for (size_t i = 0; i < devices.size(); i++) {
-        br.values.push_back({});
-        std::vector<double>& gpu = br.values[br.values.size() - 1];
-        if (mnd::Generator* gpuf; (gpuf = devices[i].getGeneratorFloat())) {
-            gpu.push_back(benchmarkResult(*gpuf));
-            br.percentage += progress;
-            emit update(br);
-        }
-        if (mnd::Generator* gpud; (gpud = devices[i].getGeneratorDouble())) {
-            gpu.push_back(benchmarkResult(*gpud));
-            br.percentage += progress;
-            emit update(br);
-        }
-        if (mnd::Generator* gpudd; (gpudd = devices[i].getGeneratorDoubleDouble())) {
-            gpu.push_back(benchmarkResult(*gpudd));
-            br.percentage += progress;
-            emit update(br);
-        }
-    }
-    emit update(br);
-    */
-    emit finished();
-}
-
-
-BenchmarkDialog::BenchmarkDialog(mnd::MandelContext& mndContext, QWidget *parent) :
-    QDialog{ parent },
-    mndContext{ mndContext },
-    benchmarker{ mndContext }
-{
-    ui.setupUi(this);
-    //printf("bench!\n"); fflush(stdout);
-
-    auto& devices = mndContext.getDevices();
-    size_t nDevices = devices.size() + 1;
-    ui.tableWidget->setColumnCount(6);
-    ui.tableWidget->setRowCount(int(nDevices));
-    ui.tableWidget->setHorizontalHeaderLabels({"Single Precision", "Double Precision", "Double-Double Precision", "Quad-Double Precision", "Quad Precision", "Oct Precision"});
-
-    QString cpuDesc = ("CPU [" + mndContext.getCpuInfo().getBrand() + "]").c_str();
-    ui.tableWidget->setVerticalHeaderItem(0, new QTableWidgetItem(cpuDesc));
-    for (size_t i = 0; i < devices.size(); i++) {
-        std::string cpuDescS = std::string("GPU ") + std::to_string(i + 1) + " [" + devices[i].getName().c_str() + "]";
-        QString cpuDesc = QString::fromLatin1(cpuDescS.c_str());
-        /*printf("brand [%d]: --> %s <--\n", (int) cpuDescS.size(), cpuDescS.c_str());
-        for (int x = 0; x < cpuDescS.size(); x++) {
-            printf("%d\n", cpuDescS[x]);
-        }
-        printf("\n");*/
-        auto label = new QTableWidgetItem(cpuDesc);
-        label->setStatusTip(QString::fromLatin1(devices[i].getName().c_str()));
-        ui.tableWidget->setVerticalHeaderItem(int(i + 1), label);
-    }
-
-    qRegisterMetaType<BenchmarkResult>();
-
-    benchmarker.moveToThread(&benchThread);
-    connect(&benchThread, &QThread::started, &benchmarker, &Benchmarker::start);
-    connect(&benchmarker, SIGNAL (finished()), &benchThread, SLOT (quit()));
-    connect(&benchmarker, SIGNAL (update(BenchmarkResult)), this, SLOT (update(BenchmarkResult)));
-
-    ui.tableWidget->horizontalHeader()->setSectionResizeMode(QHeaderView::Stretch);
-}
-
-
-void BenchmarkDialog::update(BenchmarkResult br)
-{
-    std::vector<double> cpu = br.values[0];
-    for (size_t j = 0; j < br.values.size(); j++) {
-        for (size_t i = 0; i < br.values[j].size(); i++) {
-            ui.tableWidget->setItem(int(j), int(i), new QTableWidgetItem(QString::number(br.values[j][i])));
-        }
-    }
-    ui.progressBar->setValue(int(br.percentage));
-}
-
-
-void BenchmarkDialog::on_run_clicked()
-{
-    if (!benchThread.isRunning()) {
-        /*for (int i = 0; i < ui.tableWidget->columnCount(); i++) {
-            for (int j = 0; j < ui.tableWidget->rowCount(); j++) {
-                ui.tableWidget->setItem(j, i, new QTableWidgetItem(""));
-            }
-        }*/
-
-        benchThread.start();
-    }
-
-//    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(clg, 4000, 10000)));
-}

+ 0 - 65
benchmarkdialog.h

@@ -1,65 +0,0 @@
-#ifndef BENCHMARKDIALOG_H
-#define BENCHMARKDIALOG_H
-
-#include <QDialog>
-#include <functional>
-#include "ui_benchmarks.h"
-#include <Mandel.h>
-#include "Bitmap.h"
-#include <QThread>
-
-
-struct BenchmarkResult
-{
-    std::vector<std::vector<double>> values;
-    double percentage = 0.0;
-};
-
-Q_DECLARE_METATYPE(BenchmarkResult)
-
-class Benchmarker : public QObject
-{
-    Q_OBJECT
-private:
-    mnd::MandelContext mndContext;
-    static const std::vector<mnd::MandelInfo> benches;
-public:
-    inline Benchmarker(mnd::MandelContext& mndContext) :
-        mndContext{ mnd::initializeContext() }
-    {
-    }
-
-    static mnd::MandelViewport benchViewport(void);
-
-    std::pair<long long, std::chrono::nanoseconds> measureMips(const std::function<Bitmap<float>*()>& bench) const;
-    double benchmarkResult(mnd::Generator& mg) const;
-
-public slots:
-    void start(void);
-signals:
-    void update(BenchmarkResult br);
-    void finished(void);
-};
-
-
-class BenchmarkDialog : public QDialog
-{
-    Q_OBJECT
-private:
-    Ui::BenchmarkDialog ui;
-    mnd::MandelContext& mndContext;
-    QThread benchThread;
-    Benchmarker benchmarker;
-public:
-    explicit BenchmarkDialog(mnd::MandelContext& mndContext, QWidget *parent = nullptr);
-
-
-signals:
-
-public slots:
-    void update(BenchmarkResult br);
-private slots:
-    void on_run_clicked();
-};
-
-#endif // BENCHMARKDIALOG_H

+ 0 - 95
benchmarks.ui

@@ -1,95 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<ui version="4.0">
- <class>BenchmarkDialog</class>
- <widget class="QDialog" name="BenchmarkDialog">
-  <property name="geometry">
-   <rect>
-    <x>0</x>
-    <y>0</y>
-    <width>1215</width>
-    <height>550</height>
-   </rect>
-  </property>
-  <property name="windowTitle">
-   <string>Dialog</string>
-  </property>
-  <layout class="QVBoxLayout" name="verticalLayout_2">
-   <item>
-    <layout class="QVBoxLayout" name="verticalLayout">
-     <item>
-      <widget class="Line" name="line_2">
-       <property name="orientation">
-        <enum>Qt::Horizontal</enum>
-       </property>
-      </widget>
-     </item>
-     <item>
-      <widget class="QLabel" name="label">
-       <property name="sizePolicy">
-        <sizepolicy hsizetype="Minimum" vsizetype="Minimum">
-         <horstretch>0</horstretch>
-         <verstretch>0</verstretch>
-        </sizepolicy>
-       </property>
-       <property name="text">
-        <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Measure the preformance of Almond on this machine. To obtain good results, run the benchmarks while no other compute-heavy tasks are running on the computer.&lt;/p&gt;&lt;p&gt;The benchmarking consists of several renders of the Mandelbrot fractal each using a different datatype for the underlying operations. The more precision the type allows for, the slower it performs (normally). If you have a video card, Almond will try to use it to do calculations as the tasks are exceptionally well suited for parallel execution.&lt;/p&gt;&lt;p&gt;All results are measured in mega-iterations per second.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
-       </property>
-       <property name="wordWrap">
-        <bool>true</bool>
-       </property>
-      </widget>
-     </item>
-     <item>
-      <widget class="Line" name="line">
-       <property name="orientation">
-        <enum>Qt::Horizontal</enum>
-       </property>
-      </widget>
-     </item>
-     <item>
-      <layout class="QHBoxLayout" name="horizontalLayout">
-       <item>
-        <widget class="QPushButton" name="run">
-         <property name="text">
-          <string>Run</string>
-         </property>
-        </widget>
-       </item>
-       <item>
-        <widget class="QProgressBar" name="progressBar">
-         <property name="enabled">
-          <bool>false</bool>
-         </property>
-         <property name="maximum">
-          <number>100</number>
-         </property>
-         <property name="value">
-          <number>0</number>
-         </property>
-         <property name="textVisible">
-          <bool>false</bool>
-         </property>
-         <property name="invertedAppearance">
-          <bool>false</bool>
-         </property>
-        </widget>
-       </item>
-      </layout>
-     </item>
-     <item>
-      <widget class="QTableWidget" name="tableWidget">
-       <property name="editTriggers">
-        <set>QAbstractItemView::NoEditTriggers</set>
-       </property>
-       <attribute name="horizontalHeaderMinimumSectionSize">
-        <number>130</number>
-       </attribute>
-      </widget>
-     </item>
-    </layout>
-   </item>
-  </layout>
- </widget>
- <resources/>
- <connections/>
-</ui>

+ 191 - 0
choosegenerators.cpp

@@ -6,6 +6,110 @@
 #include <QComboBox>
 #include <QRegExp>
 #include <QRegExpValidator>
+#include <QMessageBox>
+
+
+
+mnd::MandelViewport Benchmarker::benchViewport(void)
+{
+    return mnd::MandelViewport{ -1.250000598933854152929, 0.0001879894057291665530, 0.0000003839916666666565, 0.0000003839916666666565 };
+}
+
+
+const std::vector<mnd::MandelInfo> Benchmarker::benches {
+    mnd::MandelInfo{ benchViewport(), 50, 50, 250, false },
+    mnd::MandelInfo{ benchViewport(), 50, 50, 500, false },
+    mnd::MandelInfo{ benchViewport(), 50, 100, 500, false },
+    mnd::MandelInfo{ benchViewport(), 100, 100, 500, false },
+    mnd::MandelInfo{ benchViewport(), 100, 100, 1000, false },
+    mnd::MandelInfo{ benchViewport(), 100, 200, 1000, false },
+    mnd::MandelInfo{ benchViewport(), 200, 200, 1000, false },
+    mnd::MandelInfo{ benchViewport(), 200, 200, 2000, false },
+    mnd::MandelInfo{ benchViewport(), 200, 400, 2000, false },
+    mnd::MandelInfo{ benchViewport(), 400, 400, 2000, false },
+    mnd::MandelInfo{ benchViewport(), 400, 400, 4000, false },
+    mnd::MandelInfo{ benchViewport(), 400, 800, 4000, false },
+    mnd::MandelInfo{ benchViewport(), 800, 800, 4000, false },
+    mnd::MandelInfo{ benchViewport(), 800, 800, 8000, false },
+    mnd::MandelInfo{ benchViewport(), 800, 800, 16000, false },
+    mnd::MandelInfo{ benchViewport(), 800, 1600, 16000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 16000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 32000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 64000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 128000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 256000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 512000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 1024000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 4096000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 8192000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 16384000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 32768000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 65536000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 131072000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 262144000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 524288000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 1048576000, false },
+    mnd::MandelInfo{ benchViewport(), 1600, 1600, 2097152000, false },
+};
+
+
+Benchmarker::~Benchmarker(void)
+{
+}
+
+
+std::pair<long long, std::chrono::nanoseconds> Benchmarker::measureMips(const std::function<Bitmap<float>*()>& bench) const
+{
+    using namespace std::chrono;
+    auto before = high_resolution_clock::now();
+    auto* bitmap = bench();
+    auto after = high_resolution_clock::now();
+
+    long long sum = 0;
+    for (int i = 0; i < bitmap->width * bitmap->height; i++) {
+        sum += static_cast<long long>(std::floor(bitmap->pixels[size_t(i)]));
+    }
+
+    return std::make_pair(sum, duration_cast<nanoseconds>(after - before));
+}
+
+double Benchmarker::benchmarkResult(mnd::Generator& mg) const
+{
+    size_t testIndex = 0;
+
+    for (size_t i = 0; i < benches.size(); i++) {
+        const mnd::MandelInfo& mi = benches[i];
+        Bitmap<float> bmp(mi.bWidth, mi.bHeight);
+        auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
+            mg.generate(mi, bmp.pixels.get());
+            return &bmp;
+        });
+        if (time > std::chrono::milliseconds(500)) {
+            testIndex = i + 2;
+            //printf("testing index %d\n", testIndex);
+            fflush(stdout);
+            break;
+        }
+    }
+
+
+    const mnd::MandelInfo& mi = benches[(testIndex >= benches.size()) ? (benches.size() - 1) : testIndex];
+    Bitmap<float> bmp(mi.bWidth, mi.bHeight);
+    auto [iters, time] = measureMips([&mg, &mi, &bmp]() {
+        mg.generate(mi, bmp.pixels.get());
+        return &bmp;
+    });
+
+    return double(iters) / time.count() * 1000;
+}
+
+
+void Benchmarker::run(void)
+{
+    double result = benchmarkResult(generator);
+    emit finished(row, percentage, result);
+}
+
 
 ChooseGenerators::ChooseGenerators(mnd::MandelContext& mndCtxt, QWidget *parent) :
     QDialog{ parent },
@@ -14,16 +118,42 @@ ChooseGenerators::ChooseGenerators(mnd::MandelContext& mndCtxt, QWidget *parent)
     tableContent{}
 {
     ui->setupUi(this);
+    ui->progressBar->setRange(0, 1000);
+    benchmarker.setMaxThreadCount(1);
 
     QRegExp floatingpoint{ "^[-+]?(\\d*\\.?\\d+|\\d+\\.?\\d*)([eE][-+]\\d+)?$" };
     floatValidator = std::make_unique<QRegExpValidator>(floatingpoint, this);
 
+    auto genName = [] (mnd::GeneratorType type) {
+        static const std::map<mnd::GeneratorType, QString> names {
+            { mnd::GeneratorType::FLOAT, "float" },
+            { mnd::GeneratorType::FLOAT_SSE2, "float SSE2" },
+            { mnd::GeneratorType::FLOAT_AVX, "float AVX" },
+            { mnd::GeneratorType::FLOAT_AVX512, "float AVX512" },
+            { mnd::GeneratorType::FLOAT_NEON, "float Neon" },
+            { mnd::GeneratorType::DOUBLE, "double" },
+            { mnd::GeneratorType::DOUBLE_SSE2, "double SSE2" },
+            { mnd::GeneratorType::DOUBLE_AVX, "double AVX" },
+            { mnd::GeneratorType::DOUBLE_AVX512, "double AVX512" },
+            { mnd::GeneratorType::DOUBLE_NEON, "double Neon" },
+            { mnd::GeneratorType::DOUBLE_DOUBLE, "double double" },
+            { mnd::GeneratorType::DOUBLE_DOUBLE_AVX, "double double AVX" },
+            { mnd::GeneratorType::QUAD_DOUBLE, "quad double" },
+            { mnd::GeneratorType::FLOAT128, "float128" },
+            { mnd::GeneratorType::FLOAT256, "float256" },
+            { mnd::GeneratorType::FIXED512, "fixed512" },
+        };
+
+        return names.at(type);
+    };
+
     generators = std::map<QString, mnd::Generator*> {
         { "float", mndCtxt.getCpuGenerator(mnd::GeneratorType::FLOAT) },
         { "double", mndCtxt.getCpuGenerator(mnd::GeneratorType::DOUBLE) },
         { "double double", mndCtxt.getCpuGenerator(mnd::GeneratorType::DOUBLE_DOUBLE) },
         { "quad double", mndCtxt.getCpuGenerator(mnd::GeneratorType::QUAD_DOUBLE) },
         { "float256", mndCtxt.getCpuGenerator(mnd::GeneratorType::FLOAT256) },
+        { "fixed512", mndCtxt.getCpuGenerator(mnd::GeneratorType::FIXED512) },
     };
 
     if (mndCtxt.getCpuInfo().hasSse2()) {
@@ -33,6 +163,9 @@ ChooseGenerators::ChooseGenerators(mnd::MandelContext& mndCtxt, QWidget *parent)
     if (mndCtxt.getCpuInfo().hasAvx()) {
         generators.insert({ "float AVX", mndCtxt.getCpuGenerator(mnd::GeneratorType::FLOAT_AVX) });
         generators.insert({ "double AVX", mndCtxt.getCpuGenerator(mnd::GeneratorType::DOUBLE_AVX) });
+        if (mndCtxt.getCpuInfo().hasFma()) {
+            generators.insert({ "double double AVX", mndCtxt.getCpuGenerator(mnd::GeneratorType::DOUBLE_DOUBLE_AVX) });
+        }
     }
     if (mndCtxt.getCpuInfo().hasNeon()) {
         generators.insert({ "float Neon", mndCtxt.getCpuGenerator(mnd::GeneratorType::FLOAT_NEON) });
@@ -75,6 +208,23 @@ ChooseGenerators::ChooseGenerators(mnd::MandelContext& mndCtxt, QWidget *parent)
     }
     ui->table->resizeColumnsToContents();
 
+    std::vector<mnd::GeneratorType> generatorTypes = mndCtxt.getSupportedTypes();
+    for (size_t i = 0; i < generatorTypes.size(); i++) {
+        ui->generatorTable->insertRow(ui->generatorTable->rowCount());
+        ui->generatorTable->setItem(ui->generatorTable->rowCount() - 1, 0, new QTableWidgetItem);
+        ui->generatorTable->item(ui->generatorTable->rowCount() - 1, 0)->setText(genName(generatorTypes[i]));
+        actualGenerators.push_back(mndCtxt.getCpuGenerator(generatorTypes[i]));
+    }
+
+    for (auto& device : mndCtxt.getDevices()) {
+        std::vector<mnd::GeneratorType> generatorTypes = device.getSupportedTypes();
+        for (size_t i = 0; i < generatorTypes.size(); i++) {
+            ui->generatorTable->insertRow(ui->generatorTable->rowCount());
+            ui->generatorTable->setItem(ui->generatorTable->rowCount() - 1, 0, new QTableWidgetItem);
+            ui->generatorTable->item(ui->generatorTable->rowCount() - 1, 0)->setText(genName(generatorTypes[i]) + " [" + QString::fromStdString(device.getName()) + "]");
+            actualGenerators.push_back(device.getGenerator(generatorTypes[i]));
+        }
+    }
 
     //ui->addRow->setIcon(ui->addRow->style()->standardIcon(QStyle::SP_));
     //ui->moveRowUp->setIcon(ui->moveRowUp->style()->standardIcon(QStyle::SP_ArrowUp));
@@ -107,6 +257,14 @@ QLineEdit* ChooseGenerators::createFloatText(void)
 }
 
 
+void ChooseGenerators::setBenchmarkResult(int row, float percentage, double result)
+{
+    this->ui->generatorTable->setItem(row, 1, new QTableWidgetItem);
+    this->ui->generatorTable->item(row, 1)->setText(QString::number(result));
+    ui->progressBar->setValue(int(percentage * 10.0f));
+}
+
+
 void ChooseGenerators::on_buttonBox_accepted()
 {
     if (!createdGenerator)
@@ -131,3 +289,36 @@ void ChooseGenerators::on_buttonBox_accepted()
     }
 }
 
+
+void ChooseGenerators::on_run_clicked()
+{
+    ui->progressBar->setValue(0);
+    for (int i = 0; i < ui->generatorTable->rowCount(); i++) {
+        mnd::Generator* gen = actualGenerators.at(i);
+        if (gen != nullptr) {
+            Benchmarker* bench = new Benchmarker(mndCtxt, *gen, i, 100.0f * (i + 1) / ui->generatorTable->rowCount());
+            QObject::connect(bench, &Benchmarker::finished, this, &ChooseGenerators::setBenchmarkResult);
+            benchmarker.start(bench);
+        }
+    }
+}
+
+
+void ChooseGenerators::on_generatorTable_cellDoubleClicked(int row, int column)
+{
+    if (column == 1) {
+        QMessageBox msgBox{ this };
+        msgBox.setText("Would you like to benchmark this generator?");
+        msgBox.setStandardButtons(QMessageBox::Yes | QMessageBox::No);
+        int response = msgBox.exec();
+        if (response == QMessageBox::Yes) {
+            mnd::Generator* gen = actualGenerators.at(row);
+            if (gen != nullptr) {
+                ui->progressBar->setValue(0);
+                Benchmarker* bench = new Benchmarker(mndCtxt, *gen, row, 100.0f);
+                QObject::connect(bench, &Benchmarker::finished, this, &ChooseGenerators::setBenchmarkResult);
+                benchmarker.start(bench);
+            }
+        }
+    }
+}

+ 44 - 1
choosegenerators.h

@@ -2,12 +2,17 @@
 #define CHOOSEGENERATORS_H
 #include "ui_choosegenerators.h"
 
-#include "Mandel.h"
+#include <Mandel.h>
+
+#include "Bitmap.h"
 
 #include <QDialog>
 #include <QValidator>
 #include <QLineEdit>
 #include <QComboBox>
+#include <QRunnable>
+#include <QThread>
+#include <QThreadPool>
 #include <memory>
 #include <map>
 
@@ -17,16 +22,51 @@ namespace Ui
 }
 
 
+class Benchmarker : public QObject, public QRunnable
+{
+    Q_OBJECT
+private:
+    mnd::MandelContext& mndContext;
+    mnd::Generator& generator;
+    int row;
+    float percentage;
+    static const std::vector<mnd::MandelInfo> benches;
+public:
+    inline Benchmarker(mnd::MandelContext& mndContext, mnd::Generator& generator, int row, float percentage) :
+        mndContext{ mndContext },
+        generator{ generator },
+        row{ row },
+        percentage{ percentage }
+    {
+    }
+
+    virtual ~Benchmarker(void) override;
+
+    static mnd::MandelViewport benchViewport(void);
+
+    std::pair<long long, std::chrono::nanoseconds> measureMips(const std::function<Bitmap<float>*()>& bench) const;
+    double benchmarkResult(mnd::Generator& mg) const;
+
+    void run(void) override;
+
+signals:
+    void finished(int row, float percentage, double mips);
+};
+
+
 class ChooseGenerators : public QDialog
 {
     Q_OBJECT
 private:
+    Ui::ChooseGenerators* sadfgsdfg;
     std::unique_ptr<Ui::ChooseGenerators> ui;
     mnd::MandelContext& mndCtxt;
     std::map<QString, mnd::Generator*> generators;
     std::vector<std::pair<QLineEdit*, QComboBox*>> tableContent;
     std::unique_ptr<QValidator> floatValidator;
     std::unique_ptr<mnd::AdaptiveGenerator> createdGenerator;
+    std::vector<mnd::Generator*> actualGenerators;
+    QThreadPool benchmarker;
 public:
     ChooseGenerators(mnd::MandelContext& mndCtxt, QWidget* parent = nullptr);
     ~ChooseGenerators();
@@ -39,7 +79,10 @@ private:
 public slots:
 
 private slots:
+    void setBenchmarkResult(int row, float percentage, double mips);
     void on_buttonBox_accepted();
+    void on_run_clicked();
+    void on_generatorTable_cellDoubleClicked(int row, int column);
 };
 
 #endif // CHOOSEGENERATORS_H

+ 31 - 10
choosegenerators.ui

@@ -6,12 +6,12 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>647</width>
-    <height>377</height>
+    <width>976</width>
+    <height>493</height>
    </rect>
   </property>
   <property name="windowTitle">
-   <string>Dialog</string>
+   <string>Select Generators</string>
   </property>
   <layout class="QVBoxLayout" name="verticalLayout">
    <item>
@@ -19,7 +19,7 @@
      <item>
       <widget class="QTabWidget" name="tabWidget">
        <property name="currentIndex">
-        <number>1</number>
+        <number>0</number>
        </property>
        <widget class="QWidget" name="tab">
         <attribute name="title">
@@ -27,17 +27,32 @@
         </attribute>
         <layout class="QHBoxLayout" name="horizontalLayout_3">
          <item>
-          <widget class="QTableWidget" name="tableWidget_2">
+          <widget class="QTableWidget" name="generatorTable">
            <property name="editTriggers">
             <set>QAbstractItemView::NoEditTriggers</set>
            </property>
-           <property name="columnCount">
-            <number>1</number>
+           <property name="alternatingRowColors">
+            <bool>true</bool>
            </property>
+           <attribute name="horizontalHeaderDefaultSectionSize">
+            <number>200</number>
+           </attribute>
+           <attribute name="horizontalHeaderMinimumSectionSize">
+            <number>100</number>
+           </attribute>
            <attribute name="verticalHeaderVisible">
             <bool>false</bool>
            </attribute>
-           <column/>
+           <column>
+            <property name="text">
+             <string>Generator Type</string>
+            </property>
+           </column>
+           <column>
+            <property name="text">
+             <string>Performance</string>
+            </property>
+           </column>
           </widget>
          </item>
          <item>
@@ -58,7 +73,7 @@
               </sizepolicy>
              </property>
              <property name="text">
-              <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Measure the preformance of Almond on this machine. To obtain good results, run the benchmarks while no other compute-heavy tasks are running on the computer.&lt;/p&gt;&lt;p&gt;The benchmarking consists of several renders of the Mandelbrot fractal each using a different datatype for the underlying operations. The more precision the type allows for, the slower it performs (normally). If you have a video card, Almond will try to use it to do calculations as the tasks are exceptionally well suited for parallel execution.&lt;/p&gt;&lt;p&gt;All results are measured in mega-iterations per second.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+              <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Measure the preformance of Almond on this machine. To obtain good results, run the benchmarks while no other compute-heavy tasks are running on the computer.&lt;/p&gt;&lt;p&gt;The benchmarking consists of several renders of the Mandelbrot fractal using the specified datatype for the underlying operations. The more precision the type allows for, the slower it performs (normally). If you have a video card, Almond will try to use it to do calculations as the tasks are exceptionally well suited for parallel execution.&lt;/p&gt;&lt;p&gt;All results are measured in mega-iterations per second.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
              </property>
              <property name="wordWrap">
               <bool>true</bool>
@@ -76,8 +91,14 @@
             <layout class="QHBoxLayout" name="horizontalLayout_2">
              <item>
               <widget class="QPushButton" name="run">
+               <property name="sizePolicy">
+                <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+                 <horstretch>0</horstretch>
+                 <verstretch>0</verstretch>
+                </sizepolicy>
+               </property>
                <property name="text">
-                <string>Run</string>
+                <string>Run All Benchmarks</string>
                </property>
               </widget>
              </item>

+ 6 - 0
exportdialogs.cpp

@@ -20,6 +20,12 @@ ExportImageDialog::ExportImageDialog(QWidget* parent) :
 }
 
 
+void ExportImageDialog::setMaxIterations(int mi)
+{
+    eid.maxIterations->setText(QString::number(mi));
+}
+
+
 int ExportImageDialog::getMaxIterations(void) const
 {
     return std::stoi(eid.maxIterations->text().toStdString());

+ 1 - 0
exportdialogs.h

@@ -18,6 +18,7 @@ private:
 public:
     ExportImageDialog(QWidget* parent);
 
+    void setMaxIterations(int mi);
     int getMaxIterations(void) const;
     int getWidth(void) const;
     int getHeight(void) const;

+ 8 - 0
libmandel/include/CpuGenerators.h

@@ -10,6 +10,7 @@ namespace mnd
         NONE,
         X86_SSE2,
         X86_AVX,
+        X86_AVX_FMA,
         ARM_NEON,
     };
 
@@ -95,6 +96,13 @@ public:
 #endif
 
 template<bool parallel>
+class mnd::CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel> : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+template<bool parallel>
 class mnd::CpuGenerator<Fixed128, mnd::NONE, parallel> : public Generator
 {
 public:

+ 3 - 3
libmandel/include/Fixed.h

@@ -29,12 +29,12 @@ struct Fixed512
 
     inline Fixed512(const Float256& val)
     {
-        body = Once{ val * boost::multiprecision::pow(Float256{2}, 512 - 32) };
+        body = Once{ val * boost::multiprecision::pow(Float256{ 2 }, 512 - 32) };
     }
 
     inline Fixed512(double val)
     {
-        body = Once{ boost::multiprecision::pow(Float256{2}, 512 - 32) * val };
+        body = Once{ boost::multiprecision::pow(Float256{ 2 }, 512 - 32) * val };
     }
 
     inline operator Float256(void) const {
@@ -61,7 +61,7 @@ struct Fixed512
 
     inline Fixed512 operator * (const Fixed512& other) const {
         auto prod = Twice{ this->body } * other.body;
-        return Fixed512{ Once{ prod >> (512 - 64) } };
+        return Fixed512{ Once{ prod >> (512 - 32) } };
     }
 
     inline Fixed512& operator *= (const Fixed512& other) {

+ 2 - 0
libmandel/include/Hardware.h

@@ -26,6 +26,7 @@ private:
 
     bool sse2;
     bool avx;
+    bool fma;
     bool avx512;
     bool neon;
 public:
@@ -38,6 +39,7 @@ public:
 //#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
     inline bool hasSse2(void) const { return sse2; };
     inline bool hasAvx(void) const { return avx; };
+    inline bool hasFma(void) const { return fma; };
     inline bool hasAvx512(void) const { return avx512; };
 //#elif defined(__arm__) || defined(__aarch64__)
     inline bool hasNeon(void) const { return neon; };

+ 6 - 1
libmandel/include/Mandel.h

@@ -36,9 +36,11 @@ enum class mnd::GeneratorType
     DOUBLE_AVX512,
     DOUBLE_NEON,
     DOUBLE_DOUBLE,
+    DOUBLE_DOUBLE_AVX,
     QUAD_DOUBLE,
     FLOAT128,
-    FLOAT256
+    FLOAT256,
+    FIXED512
 };
 
 
@@ -63,6 +65,8 @@ public:
     inline const std::string& getName(void) const { return name; }
 
     Generator* getGenerator(GeneratorType type) const;
+
+    std::vector<GeneratorType> getSupportedTypes(void) const;
 };
 
 
@@ -93,6 +97,7 @@ public:
     const std::vector<MandelDevice>& getDevices(void);
 
     Generator* getCpuGenerator(mnd::GeneratorType type);
+    std::vector<GeneratorType> getSupportedTypes(void) const;
 
     const CpuInfo& getCpuInfo(void) const { return cpuInfo; }
 };

+ 2 - 0
libmandel/include/Types.h

@@ -6,6 +6,8 @@
 #include <string>
 #include "Fixed.h"
 
+#define WITH_QD
+
 #ifdef WITH_BOOST
 #   include <boost/multiprecision/cpp_bin_float.hpp>
 #   if defined(__GNUC__) || defined(__INTEL_COMPILER)

+ 2 - 2
libmandel/src/CpuGenerators.cpp

@@ -60,7 +60,7 @@ void CpuGenerator<T, mnd::NONE, parallel>::generate(const mnd::MandelInfo& info,
     T hpp = mnd::convert<T>(view.height / info.bHeight);
 
     if constexpr (parallel)
-        omp_set_num_threads(2 * omp_get_num_procs());
+        omp_set_num_threads(omp_get_num_procs());
 #pragma omp parallel for if (parallel)
     for (long j = 0; j < info.bHeight; j++) {
         T y = viewy + T(double(j)) * hpp;
@@ -78,7 +78,7 @@ void CpuGenerator<T, mnd::NONE, parallel>::generate(const mnd::MandelInfo& info,
                 T ab = a * b;
                 a = aa - bb + x;
                 b = ab + ab + y;
-                if (aa + bb > T(16)) {
+                if (aa + bb > T(16.0)) {
                     break;
                 }
             }

+ 180 - 2
libmandel/src/CpuGeneratorsAVX.cpp

@@ -4,6 +4,7 @@
 #include <omp.h>
 #include <cmath>
 
+#include <utility>
 #include <memory>
 
 using mnd::CpuGenerator;
@@ -15,6 +16,9 @@ namespace mnd
 
     template class CpuGenerator<double, mnd::X86_AVX, false>;
     template class CpuGenerator<double, mnd::X86_AVX, true>;
+
+    template class CpuGenerator<DoubleDouble, mnd::X86_AVX_FMA, false>;
+    template class CpuGenerator<DoubleDouble, mnd::X86_AVX_FMA, true>;
 }
 
 template<bool parallel>
@@ -28,6 +32,7 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
 #pragma omp parallel for schedule(static, 1) if (parallel)
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
+        __m256 ys = {y, y, y, y, y, y, y, y};
         long i = 0;
         for (i; i < info.bWidth; i += 8) {
             __m256 xs = {
@@ -48,7 +53,6 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
 
             __m256 threshold = {16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f};
 
-            __m256 ys = {y, y, y, y, y, y, y, y};
             __m256 a = xs;
             __m256 b = ys;
 
@@ -109,6 +113,7 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
 #pragma omp parallel for schedule(static, 1) if (parallel)
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y + T(j) * view.height / info.bHeight);
+        __m256d ys = { y, y, y, y };
         long i = 0;
         for (i; i < info.bWidth; i += 4) {
             __m256d xs = {
@@ -124,7 +129,6 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
             __m256d counter = { 0, 0, 0, 0 };
             __m256d adder = { 1, 1, 1, 1 };
 
-            __m256d ys = { y, y, y, y };
             __m256d a = xs;
             __m256d b = ys;
 
@@ -162,3 +166,177 @@ void CpuGenerator<double, mnd::X86_AVX, parallel>::generate(const mnd::MandelInf
     }
 }
 
+static inline std::pair<__m256d, __m256d> quickTwoSum(__m256d a, __m256d b)
+{
+    __m256d s = _mm256_add_pd(a, b);
+    __m256d e = _mm256_sub_pd(b, _mm256_sub_pd(s, a));
+    return { s, e };
+}
+
+static inline std::pair<__m256d, __m256d> quickTwoDiff(__m256d a, __m256d b)
+{
+    __m256d s = _mm256_sub_pd(a, b);
+    __m256d e = _mm256_sub_pd(_mm256_sub_pd(a, s), b);
+    return { s, e };
+}
+
+static inline std::pair<__m256d, __m256d> twoSum(__m256d a, __m256d b)
+{
+    __m256d s = _mm256_add_pd(a, b);
+    __m256d bb = _mm256_sub_pd(s, a);
+    __m256d e = _mm256_add_pd(_mm256_sub_pd(a, _mm256_sub_pd(s, bb)), _mm256_sub_pd(b, bb));
+    return { s, e };
+}
+
+static inline std::pair<__m256d, __m256d> twoDiff(__m256d a, __m256d b)
+{
+    __m256d s = _mm256_sub_pd(a, b);
+    __m256d bb = _mm256_sub_pd(s, a);
+    __m256d e = _mm256_sub_pd(_mm256_sub_pd(a, _mm256_sub_pd(s, bb)), _mm256_add_pd(b, bb));
+    return { s, e };
+}
+
+/*
+static inline std::pair<__m256d, __m256d> split(__m256d a)
+{
+    static const __m256d SPLIT_THRESH = { 6.69692879491417e+299, 6.69692879491417e+299, 6.69692879491417e+299, 6.69692879491417e+299 };
+    static const __m256d MINUS_SPLIT_THRESH = { -6.69692879491417e+299, -6.69692879491417e+299, -6.69692879491417e+299, -6.69692879491417e+299 };
+    static const __m256d SPLITTER = { 134217729.0, 134217729.0, 134217729.0, 134217729.0};
+    __m256d temp;
+    __m256i cmp1 = _mm256_castpd_si256(_mm256_cmp_pd(a, SPLIT_THRESH, _CMP_GT_OQ));
+    __m256i cmp2 = _mm256_castpd_si256(_mm256_cmp_pd(a, MINUS_SPLIT_THRESH, _CMP_LT_OQ));
+    __m256i cmp = _mm256_or_si256
+}*/
+
+static inline std::pair<__m256d, __m256d> twoProd(__m256d a, __m256d b)
+{
+//#ifdef CPUID_FMA
+    __m256d p = _mm256_mul_pd(a, b);
+    __m256d e = _mm256_fmadd_pd(a, b, p);
+    return { p, e };
+//#else
+/*    double a_hi, a_lo, b_hi, b_lo;
+    __m256d p = _mm256_mul_ps(a, b);
+    split(a, a_hi, a_lo);
+    split(b, b_hi, b_lo);
+    err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo;
+    return p;*/
+//#endif
+}
+
+struct AvxDoubleDouble
+{
+    __m256d x[2];
+
+    inline AvxDoubleDouble(__m256d a, __m256d b) :
+        x{ a, b }
+    {}
+
+
+    inline AvxDoubleDouble operator + (const AvxDoubleDouble& sm) const
+    {
+        auto[s, e] = twoSum(x[0], sm.x[0]);
+        e = _mm256_add_pd(e, _mm256_add_pd(x[1], sm.x[1]));
+        auto[r1, r2] = quickTwoSum(s, e);
+        return AvxDoubleDouble{ r1, r2 };
+    }
+
+    inline AvxDoubleDouble operator - (const AvxDoubleDouble& sm) const
+    {
+        auto[s, e] = twoDiff(x[0], sm.x[0]);
+        e = _mm256_add_pd(e, x[1]);
+        e = _mm256_sub_pd(e, sm.x[1]);
+        auto[r1, r2] = quickTwoSum(s, e);
+        return AvxDoubleDouble{ r1, r2 };
+    }
+
+    inline AvxDoubleDouble operator * (const AvxDoubleDouble& sm) const
+    {
+        auto[p1, p2] = twoProd(this->x[0], sm.x[0]);
+        p2 = _mm256_add_pd(p2,
+            _mm256_add_pd(_mm256_mul_pd(x[0], sm.x[1]), _mm256_mul_pd(x[1], sm.x[0])) );
+        auto[r1, r2] = quickTwoSum(p1, p2);
+        return AvxDoubleDouble{ r1, r2 };
+    }
+};
+
+template<bool parallel>
+void CpuGenerator<mnd::DoubleDouble, mnd::X86_AVX_FMA, parallel>::generate(const mnd::MandelInfo& info, float* data)
+{
+    const MandelViewport& view = info.view;
+
+    using T = DoubleDouble;
+
+    T viewx = mnd::convert<T>(view.x);
+    T viewy = mnd::convert<T>(view.y);
+    T wpp = mnd::convert<T>(view.width / info.bWidth);
+    T hpp = mnd::convert<T>(view.height / info.bHeight);
+
+    if constexpr(parallel)
+        omp_set_num_threads(2 * omp_get_num_procs());
+#pragma omp parallel for schedule(static, 1) if (parallel)
+    for (long j = 0; j < info.bHeight; j++) {
+        T y = viewy + T(double(j)) * hpp;
+        __m256d y0s = { y.x[0], y.x[0], y.x[0], y.x[0] };
+        __m256d y1s = { y.x[1], y.x[1], y.x[1], y.x[1] };
+        AvxDoubleDouble ys{ y0s, y1s };
+        long i = 0;
+        for (i; i < info.bWidth; i += 4) {
+            T x1 = viewx + T(double(i)) * wpp;
+            T x2 = viewx + T(double(i + 1)) * wpp;
+            T x3 = viewx + T(double(i + 2)) * wpp;
+            T x4 = viewx + T(double(i + 3)) * wpp;
+
+            __m256d x0s = {
+                x1.x[0], x2.x[0], x3.x[0], x4.x[0],
+            };
+
+            __m256d x1s = {
+                x1.x[1], x2.x[1], x3.x[1], x4.x[1],
+            };
+
+            AvxDoubleDouble xs{ x0s, x1s };
+
+            int itRes[4] = { 0, 0, 0, 0 };
+
+            __m256d threshold = { 16.0, 16.0, 16.0, 16.0 };
+            __m256d counter = { 0, 0, 0, 0 };
+            __m256d adder = { 1, 1, 1, 1 };
+
+            AvxDoubleDouble a = xs;
+            AvxDoubleDouble b = ys;
+
+            for (int k = 0; k < info.maxIter; k++) {
+                AvxDoubleDouble aa = a * a;
+                AvxDoubleDouble bb = b * b;
+                AvxDoubleDouble abab = a * b; abab = abab + abab;
+                a = aa - bb + xs;
+                b = abab + ys;
+                __m256i cmp = _mm256_castpd_si256(_mm256_cmp_pd(_mm256_add_pd(aa.x[0], bb.x[0]), threshold, _CMP_LE_OQ));
+                /*if (info.smooth) {
+                    resultsa = _mm256_or_pd(_mm256_andnot_ps(cmp, resultsa), _mm256_and_ps(cmp, a));
+                    resultsb = _mm256_or_ps(_mm256_andnot_ps(cmp, resultsb), _mm256_and_ps(cmp, b));
+                }*/
+                adder = _mm256_and_pd(adder, _mm256_castsi256_pd(cmp));
+                counter = _mm256_add_pd(counter, adder);
+                if ((k & 0x7) == 0 && _mm256_testz_si256(cmp, cmp) != 0) {
+                    break;
+                }
+            }
+
+            auto alignVec = [](double* data) -> double* {
+                void* aligned = data;
+                ::size_t length = 64;
+                std::align(32, 4 * sizeof(double), aligned, length);
+                return static_cast<double*>(aligned);
+            };
+
+            double resData[8];
+            double* ftRes = alignVec(resData);
+            _mm256_store_pd(ftRes, counter);
+            for (int k = 0; k < 4 && i + k < info.bWidth; k++)
+                data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+        }
+    }
+}
+

+ 1 - 0
libmandel/src/Generators.cpp

@@ -47,6 +47,7 @@ void AdaptiveGenerator::generate(const mnd::MandelInfo& info, float* data)
     auto firstSmaller = generators.lower_bound(neededPrecision);
     if (firstSmaller != generators.end()) {
         //printf("use generator with precision: %s\n", mnd::toString(firstSmaller->first).c_str());
+        //printf("gen: %p\n", firstSmaller->second);fflush(stdout);
         firstSmaller->second->generate(info, data);
     }
     else {

+ 2 - 0
libmandel/src/Hardware.cpp

@@ -20,6 +20,7 @@ using mnd::CpuInfo;
 CpuInfo::CpuInfo(void) :
     sse2{ false },
     avx{ false },
+    fma{ false },
     avx512{ false },
     neon{ false }
 {
@@ -106,6 +107,7 @@ CpuInfo::CpuInfo(void) :
 
     sse2 = edx1[26];
     avx = ecx1[28];
+    fma = ecx1[12];
     avx512 = ebx7[16];
 }
 

+ 32 - 7
libmandel/src/Mandel.cpp

@@ -33,32 +33,44 @@ mnd::Generator* MandelDevice::getGenerator(mnd::GeneratorType type) const
 }
 
 
+std::vector<mnd::GeneratorType> MandelDevice::getSupportedTypes(void) const
+{
+    std::vector<GeneratorType> types;
+    for (auto& [type, gen] : generators) {
+        types.push_back(type);
+    }
+    return types;
+}
+
+
 MandelContext::MandelContext(void)
 {
 
 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) 
     if (cpuInfo.hasAvx()) {
         //auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX, true>>();
-        auto fl = std::make_unique<CpuGenerator<Fixed512, mnd::NONE, true>>();
+        auto fl = std::make_unique<CpuGenerator<float, mnd::X86_AVX, true>>();
         auto db = std::make_unique<CpuGenerator<double, mnd::X86_AVX, true>>();
         cpuGenerators.insert({ GeneratorType::FLOAT_AVX, std::move(fl) });
-        cpuGenerators.insert({ GeneratorType::DOUBLE_AVX512, std::move(db) });
+        cpuGenerators.insert({ GeneratorType::DOUBLE_AVX, std::move(db) });
+        if (cpuInfo.hasFma()) {
+            auto ddavx = std::make_unique<CpuGenerator<DoubleDouble, mnd::X86_AVX_FMA, true>>();
+            cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE_AVX, std::move(ddavx) });
+        }
     }
-    else if (cpuInfo.hasSse2()) {
+    if (cpuInfo.hasSse2()) {
         auto fl = std::make_unique<CpuGenerator<float, mnd::X86_SSE2, true>>();
         auto db = std::make_unique<CpuGenerator<double, mnd::X86_SSE2, true>>();
         cpuGenerators.insert({ GeneratorType::FLOAT_SSE2, std::move(fl) });
         cpuGenerators.insert({ GeneratorType::DOUBLE_SSE2, std::move(db) });
     }
-    else
 #elif defined(__aarch64__)
-    if (true) {
+    if (cpuInfo.hasNeon()) {
         auto fl = std::make_unique<CpuGenerator<float, mnd::ARM_NEON, true>>();
         auto db = std::make_unique<CpuGenerator<double, mnd::ARM_NEON, true>>();
         cpuGenerators.insert({ GeneratorType::FLOAT_NEON, std::move(fl) });
         cpuGenerators.insert({ GeneratorType::DOUBLE_NEON, std::move(db) });
     }
-    else
 #endif
     {
         auto fl = std::make_unique<CpuGenerator<float, mnd::NONE, true>>();
@@ -78,9 +90,12 @@ MandelContext::MandelContext(void)
     auto dd = std::make_unique<CpuGenerator<DoubleDouble, mnd::NONE, true>>();
     auto qd = std::make_unique<CpuGenerator<QuadDouble, mnd::NONE, true>>();
     cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE, std::move(dd) });
-    cpuGenerators.insert({ GeneratorType::DOUBLE_DOUBLE, std::move(qd) });
+    cpuGenerators.insert({ GeneratorType::QUAD_DOUBLE, std::move(qd) });
 #endif // WITH_QD
 
+    auto fix512 = std::make_unique<CpuGenerator<Fixed512, mnd::NONE, true>>();
+    cpuGenerators.insert({ GeneratorType::FIXED512, std::move(fix512) });
+
     devices = createDevices();
 
     adaptiveGenerator = createAdaptiveGenerator();
@@ -217,3 +232,13 @@ Generator* MandelContext::getCpuGenerator(mnd::GeneratorType type)
     else
         return nullptr;
 }
+
+
+std::vector<mnd::GeneratorType> MandelContext::getSupportedTypes(void) const
+{
+    std::vector<GeneratorType> types;
+    for (auto& [type, gen] : cpuGenerators) {
+        types.push_back(type);
+    }
+    return types;
+}

+ 89 - 0
libmandel/src/quaddouble.cl

@@ -0,0 +1,89 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+inline double2 twoSum(double a, double b) {
+    double s = a + b;
+    double bb = s - a;
+    double e = (a - (s - bb)) + (b - bb);
+    return (double2)(s, e);
+}
+
+inline double2 quickTwoSum(double a, double b) {
+    double s = a + b;
+    double e = b - (s - a);
+    return (double2)(s, e);
+}
+
+inline double2 twoProd(double a, double b) {
+//#ifdef QD_FMS
+    double p = a * b;
+    double e = fma(a, b, -p);
+    return (double2)(p, e);
+//#else
+//  double a_hi, a_lo, b_hi, b_lo;
+//  double p = a * b;
+//  split(a, a_hi, a_lo);
+//  split(b, b_hi, b_lo);
+//  err = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo;
+//  return p;
+//#endif
+}
+
+inline double2 mul(double2 a, double2 b) {
+    double2 p = twoProd(a.s0, b.s0);
+    p.s1 += (a.s0 * b.s1 + a.s1 * b.s0);
+    return quickTwoSum(p.s0, p.s1);
+}
+
+inline double2 add(double2 a, double2 b) {
+    double2 se = twoSum(a.s0, b.s0);
+    se.s1 += a.s1 + b.s1;
+    return quickTwoSum(se.s0, se.s1);
+}
+
+inline double2 mulDouble(double2 a, double b) {
+    double2 p = twoProd(a.s0, b);
+    p.s1 += a.s1 * b;
+    return quickTwoSum(p.s0, p.s1);
+}
+
+__kernel void iterate(__global float* A, const int width,
+                      double x1, double x2, double y1, double y2,
+                      double pw1, double pw2, double ph1, double ph2, int max, int smooth) {
+    int index = get_global_id(0);
+    int px = index % width;
+    int py = index / width;
+
+    double2 xl = (double2)(x1, x2);
+    double2 yt = (double2)(y1, y2);
+    double2 pixelScaleX = (double2)(pw1, pw2);
+    double2 pixelScaleY = (double2)(ph1, ph2);
+
+    double2 a = add(mulDouble(pixelScaleX, (double) px), xl); // pixelScaleX * px + xl
+    double2 b = add(mulDouble(pixelScaleY, (double) py), yt); // pixelScaleY * py + yt
+    double2 ca = a;
+    double2 cb = b;
+
+    int n = 0;
+    while (n < max - 1) {
+        double2 aa = mul(a, a);
+        double2 bb = mul(b, b);
+        double2 ab = mul(a, b);
+        if (aa.s0 + aa.s1 + bb.s0 + bb.s1 > 16) break;
+        double2 minusbb = (double2)(-bb.s0, -bb.s1);
+        a = add(add(aa, minusbb), ca);
+        b = add(add(ab, ab), cb);
+        n++;
+    }
+
+    // N + 1 - log (log  |Z(N)|) / log 2
+    if (n >= max - 1)
+        A[index] = max;
+    else {
+        if (smooth != 0)
+            A[index] = ((float) n) + 1 - log(log(a.s0 * a.s0 + b.s0 * b.s0) / 2) / log(2.0f);
+        else
+            A[index] = ((float)n);
+    }
+    //               A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);
+    //           A[get_global_id(0)] = 5;
+}