Nicolas Winkler 6 éve
szülő
commit
22697b66a1

+ 10 - 7
Almond.cpp

@@ -5,12 +5,13 @@
 #include "benchmarkdialog.h"
 
 Almond::Almond(QWidget *parent) :
-    QMainWindow(parent)
+    QMainWindow(parent),
+    mandelContext(mnd::initializeContext())
 {
     ui.setupUi(this);
     printf("not yet created!\n");
-    mw = std::make_unique<MandelWidget>(ui.centralWidget);
-    qRegisterMetaType<MandelWidget>("MandelWidget");
+    mw = std::make_unique<MandelWidget>(mandelContext, ui.centralWidget);
+    //qRegisterMetaType<MandelWidget>("MandelWidget");
     printf("created!\n");
     ui.verticalLayout_left->addWidget(mw.get());
     //ui.verticalLayout_left->addWidget(new MyGLWidget(ui.centralWidget));
@@ -22,14 +23,16 @@ void Almond::on_pushButton_clicked()
     ExportImageDialog dialog(this);
     auto response = dialog.exec();
     if (response == 1) {
-        MandelInfo mi;
+        mnd::MandelInfo mi;
         mi.maxIter = dialog.getMaxIterations();
         mi.view = mw->getViewport();
         mi.bWidth = dialog.getWidth();
         mi.bHeight = dialog.getHeight();
         mi.view.adjustAspectRatio(mi.bWidth, mi.bHeight);
-        CpuGenerator<double> cpg;
-        auto bitmap = cpg.generate(mi);
+        mnd::Generator& g = mandelContext.getDefaultGenerator();
+        auto fmap = Bitmap<float>(mi.bWidth, mi.bHeight);
+        g.generate(mi, fmap.pixels.get());
+        auto bitmap = fmap.map<RGBColor>([](float i) { return i < 0 ? RGBColor{ 0,0,0 } : RGBColor{ uint8_t(cos(i * 0.015f) * 127 + 127), uint8_t(sin(i * 0.01f) * 127 + 127), uint8_t(i) }; });//uint8_t(::sin(i * 0.01f) * 100 + 100), uint8_t(i) }; });
         QImage img((unsigned char*)bitmap.pixels.get(), bitmap.width, bitmap.height, bitmap.width * 3, QImage::Format_RGB888);
         img.save(dialog.getPath());
     }
@@ -90,6 +93,6 @@ void ExportImageDialog::on_buttonBox_accepted()
 
 void Almond::on_pushButton_2_clicked()
 {
-    BenchmarkDialog bd(this);
+    BenchmarkDialog bd(mandelContext, this);
     bd.exec();
 }

+ 2 - 0
Almond.h

@@ -5,6 +5,7 @@
 #include "ui_Almond.h"
 #include "ui_exportimagedialog.h"
 
+#include <Mandel.h>
 #include "MandelWidget.h"
 
 #include <memory>
@@ -13,6 +14,7 @@ class Almond : public QMainWindow
 {
     Q_OBJECT
 private:
+    mnd::MandelContext mandelContext;
     std::unique_ptr<MandelWidget> mw;
 public:
     Almond(QWidget *parent = Q_NULLPTR);

+ 23 - 17
Almond.pro

@@ -28,9 +28,7 @@ CONFIG += c++17
 SOURCES += \
         Almond.cpp \
         Bitmap.cpp \
-        Generators.cpp \
         MandelWidget.cpp \
-        QueueManager.cpp \
         SectionManager.cpp \
         VideoStream.cpp \
         benchmarkdialog.cpp \
@@ -41,10 +39,7 @@ HEADERS += \
         Bitmap.h \
         Color.h \
         Fixed.h \
-        Generators.h \
-        GenericMandelbrot.h \
         MandelWidget.h \
-        QueueManager.h \
         SectionManager.h \
         VideoStream.h \
         benchmarkdialog.h
@@ -63,15 +58,13 @@ else: unix:!android: target.path = /opt/$${TARGET}/bin
 win32:LIBS += -lopengl32
 else:LIBS += -lOpenGL
 
-win32:QMAKE_CXXFLAGS+= -openmp
-else:unix:QMAKE_CXXFLAGS+= -fopenmp
-win32:QMAKE_LFLAGS +=  -openmp
-else:unix:QMAKE_LFLAGS+= -fopenmp
-LIBS += -fopenmp
+#win32:QMAKE_CXXFLAGS+= -openmp
+#else:unix:QMAKE_CXXFLAGS+= -fopenmp
+#win32:QMAKE_LFLAGS +=  -openmp
+#else:unix:QMAKE_LFLAGS+= -fopenmp
+#LIBS += -fopenmp
 unix:LIBS += -lm -latomic
 
-QMAKE_CXXFLAGS += -mavx
-
 win32:CONFIG(release, debug|release): LIBS += -L$$PWD/../libs/ffmpeg-4.1.1-win32-dev/lib/ -lavcodec
 else:win32:CONFIG(debug, debug|release): LIBS += -L$$PWD/../libs/ffmpeg-4.1.1-win32-dev/lib/ -lavcodec
 else:unix: LIBS += -L$$PWD/../libs/ffmpeg-4.1.1-win32-dev/lib/ -lavcodec
@@ -80,12 +73,12 @@ INCLUDEPATH += $$PWD/../libs/ffmpeg-4.1.1-win32-dev/include
 DEPENDPATH += $$PWD/../libs/ffmpeg-4.1.1-win32-dev/include
 
 
-win32:CONFIG(release, debug|release): LIBS += -L$$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/lib/x86/' -lOpenCL
-else:win32:CONFIG(debug, debug|release): LIBS += -L$$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/lib/x86/' -lOpenCL
-else:unix: LIBS += -lOpenCL
+#win32:CONFIG(release, debug|release): LIBS += -L$$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/lib/x86/' -lOpenCL
+#else:win32:CONFIG(debug, debug|release): LIBS += -L$$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/lib/x86/' -lOpenCL
+#else:unix: LIBS += -lOpenCL
 
-win32:INCLUDEPATH += $$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/include'
-win32:DEPENDPATH += $$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/include'
+#win32:INCLUDEPATH += $$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/include'
+#win32:DEPENDPATH += $$PWD/'../../../../../Program Files (x86)/AMD APP SDK/3.0/include'
 
 win32:CONFIG(release, debug|release): LIBS += -L$$PWD/../libs/ffmpeg-4.1.1-win32-dev/lib/ -lavformat
 else:win32:CONFIG(debug, debug|release): LIBS += -L$$PWD/../libs/ffmpeg-4.1.1-win32-dev/lib/ -lavformat
@@ -124,3 +117,16 @@ DEPENDPATH += $$PWD/libmandel/include
 
 win32:!win32-g++: PRE_TARGETDEPS += $$PWD/libmandel/mandel.lib
 else:unix|win32-g++: PRE_TARGETDEPS += $$PWD/libmandel/libmandel.a
+
+
+win32:CONFIG(release, debug|release): LIBS += -L$$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/lib/x86/' -lopencl
+else:win32:CONFIG(debug, debug|release): LIBS += -L$$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/lib/x86/' -lopencl
+else:unix: LIBS += -L$$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/lib/x86/' -lopencl
+
+INCLUDEPATH += $$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/include'
+DEPENDPATH += $$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/include'
+
+win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/lib/x86/libopencl.a'
+else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/lib/x86/libopencl.a'
+else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/lib/x86/opencl.lib'
+else:win32:!win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$PWD/'../../../../../Program Files (x86)/OCL_SDK_Light/lib/x86/opencl.lib'

+ 0 - 254
Generators.cpp

@@ -1,254 +0,0 @@
-#include "Generators.h"
-
-#include "GenericMandelbrot.h"
-#include "Fixed.h"
-
-#include <iostream>
-#include <iterator>
-
-
-using namespace cl;
-
-Platform getPlatform() {
-    /* Returns the first platform found. */
-    std::vector<Platform> all_platforms;
-    Platform::get(&all_platforms);
-
-    if (all_platforms.size()==0) {
-        std::cout << "No platforms found. Check OpenCL installation!\n";
-        exit(1);
-    }
-    return all_platforms[0];
-}
-
-
-Device getDevice(Platform platform, int i, bool display = false) {
-    /* Returns the deviced specified by the index i on platform.
-    * If display is true, then all of the platforms are listed.
-    */
-    std::vector<Device> all_devices;
-    platform.getDevices(CL_DEVICE_TYPE_GPU, &all_devices);
-    if (all_devices.size() == 0) {
-        std::cout << "No devices found. Check OpenCL installation!\n";
-        exit(1);
-    }
-
-    if (display) {
-        for (::size_t j = 0; j < all_devices.size(); j++) {
-            printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
-            printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
-            printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
-        }
-    }
-    return all_devices[i];
-}
-
-
-ClGenerator::ClGenerator(void)
-{
-    Platform p = getPlatform();
-    device = getDevice(p, 0, true);
-    context = Context{ device };
-    Program::Sources sources;
-
-    std::string kcode;
- 
-
-    // TODO check for overflow
-    if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
-        kcode =
-            "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {\n"
-            "   int index = get_global_id(0) * 4;\n"
-            "   int x = index % (width);\n"
-            "   int y = index / (width);\n"
-            "   float4 av = (float4)(x * pixelScaleX + xl, (x + 1) * pixelScaleX + xl, (x + 2) * pixelScaleX + xl, (x + 3) * pixelScaleX + xl);\n"
-    //                        "(x + 4) * pixelScale + xl, (x + 5) * pixelScale + xl, (x + 6) * pixelScale + xl, (x + 7) * pixelScale + xl);\n"
-            "   float4 bv = (float4)(y * pixelScaleY + yt);\n"
-            "   float4 ca = av;\n"
-            "   float4 cb = bv;\n"
-            ""
-            "   int4 counter = (int4) 1;"
-            "   float4 threshold = (float4) 16;"
-            "   int n = 0;\n"
-            "   while (n < max) {\n"
-            "       float4 aa = av * av;\n"
-            "       float4 bb = bv * bv;\n"
-            "       float4 ab = av * bv;\n"
-            "       av = aa - bb + ca;\n"
-            "       bv = 2 * ab + cb;\n"
-            "       counter += -(threshold > (aa + bb));\n"
-            "       if(all(threshold < (aa + bb))) break;\n"
-            "       //if (aa + bb > 16) break;\n"
-            "       n++;\n"
-            "   }\n\n"
-            "   A[index] = (float) counter[0];\n"
-            "   A[index + 1] = (float) counter[1];\n"
-            "   A[index + 2] = (float) counter[2];\n"
-            "   A[index + 3] = (float) counter[3];\n"
-    /*        "   A[index + 4] = (float) counter[4];\n"
-            "   A[index + 5] = (float) counter[5];\n"
-            "   A[index + 6] = (float) counter[6];\n"
-            "   A[index + 7] = (float) counter[7];\n"*/
-    //        "   A[get_global_id(0)] = 1;\n"
-            "}\n";
-    }
-    else {
-
-        kcode =
-    //        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-            "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {"
-            "   int index = get_global_id(0);\n"
-            "   int x = index % width;"
-            "   int y = index / width;"
-            "   float a = x * pixelScaleX + xl;"
-            "   float b = y * pixelScaleY + yt;"
-            "   float ca = a;"
-            "   float cb = b;"
-            ""
-            "   int n = 0;"
-            "   while (n < max) {"
-            "       float aa = a * a;"
-            "       float bb = b * b;"
-            "       float ab = a * b;"
-            "       if (aa + bb > 16) break;"
-            "       a = aa - bb + ca;"
-            "       b = 2 * ab + cb;"
-            "       n++;"
-            "   }\n"
-                // N + 1 - log (log  |Z(N)|) / log 2
-            "   A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
-//            "   A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
-    //        "   A[get_global_id(0)] = 5;"
-            "}";
-    }
-
-
-    sources.push_back({ kcode.c_str(), kcode.length() });
-
-    program = Program{ context, sources };
-    if (program.build({ device }) != CL_SUCCESS) {
-        std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
-        exit(1);
-    }
-
-    queue = CommandQueue(context, device);
-}
-
-/*Bitmap<RGBColor> ClGenerator::generate(const MandelInfo& info)
-{
-    return enqueueMandelbrot(info.bWidth, info.bHeight, info.view.x, info.view.y, info.view.width).get();
-}*/
-
-Bitmap<float> ClGenerator::generateRaw(const MandelInfo& info)
-{
-    ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
-    
-    Bitmap<float> bitmap{ info.bWidth, info.bHeight };
-    Buffer buffer_A(context, CL_MEM_READ_WRITE, bufferSize);
-    float pixelScaleX = info.view.width / info.bWidth;
-    float pixelScaleY = info.view.height / info.bHeight;
-
-    Kernel iterate = Kernel(program, "iterate");
-    iterate.setArg(0, buffer_A);
-    iterate.setArg(1, int(info.bWidth));
-    iterate.setArg(2, float(info.view.x));
-    iterate.setArg(3, float(info.view.y));
-    iterate.setArg(4, float(pixelScaleX));
-    iterate.setArg(5, float(pixelScaleY));
-    iterate.setArg(6, int(info.maxIter));
-
-    // TODO check for overflow
-    if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
-        queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight / 4));
-    } else {
-        queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
-    }
-    queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, bitmap.pixels.get());
-
-    return bitmap;
-}
-
-std::future<Bitmap<RGBColor>> ClGenerator::enqueueMandelbrot(long width, long height, float x, float y, float fwidth)
-{
-    x = x - fwidth / 2;
-    y = y - fwidth * height / width / 2;
-    auto mandelCreator = [width, height, x, y, fwidth, this] () -> Bitmap<RGBColor> {
-        ::size_t bufferSize = width * height * sizeof(float);
-        Bitmap<float> bitmap{ width, height };
-        Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
-        //CommandQueue queue(context, device);
-        //queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, bufferSize, A);
-
-        /*float x = -2.3;
-        float y = -1.5;*/
-        float pixelScale = fwidth / width;
-
-        Kernel iterate = Kernel(program, "iterate");
-        iterate.setArg(0, buffer_A);
-        iterate.setArg(1, width);
-        iterate.setArg(2, x);
-        iterate.setArg(3, y);
-        iterate.setArg(4, pixelScale);
-
-        queue.enqueueNDRangeKernel(iterate, NullRange, NDRange(width * height), NDRange(32));
-
-
-        queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, bitmap.pixels.get());
-
-        auto converted = bitmap.map<RGBColor>([](float i) { return i < 0 ? RGBColor{ 0,0,0 } : RGBColor{ uint8_t(cos(i * 0.015f) * 127 + 127), uint8_t(sin(i * 0.01f) * 127 + 127), uint8_t(i) }; });//uint8_t(::sin(i * 0.01f) * 100 + 100), uint8_t(i) }; });
-        return converted;
-    };
-    //return std::future<Bitmap<RGBColor>(mandelCreator(), );
-    return std::async(/*std::launch::deferred,*/ mandelCreator);
-}
-
-
-/*
-std::future<Bitmap<RGBColor>> createMandelbrot()
-{
-    auto mandelCreator = [] () -> Bitmap<RGBColor> {
-        Bitmap<int> bitmap{1024, 1024};
-        calculateMandel(bitmap);
-        return bitmap.map<RGBColor>([](int x) { return RGBColor{ unsigned char(x), unsigned char(x), unsigned char(x) }; });
-    };
-    return std::async(mandelCreator);
-}
-
-*/
-
-std::future<Bitmap<RGBColor>> createHPM()
-{
-    /*auto mandelCreator = [] () -> Bitmap<RGBColor> {
-        Fixed128 smallFact { 10000ULL, 0 };
-        Bitmap<float> bitmap{ 128, 128 };
-        for (::size_t y = 0; y < bitmap.height; y++) {
-            for (::size_t x = 0; x < bitmap.width; x++) {
-                Fixed128 a = Fixed128(x) * smallFact;
-                Fixed128 b = Fixed128(y) * smallFact;
-                bitmap.get(x, y) = iterate<Fixed128>(a, b, 250);
-            }
-        }
-        return bitmap.map<RGBColor>([](float i) { return i < 0 ? RGBColor{ 0,0,0 } : RGBColor{ uint8_t(cos(i * 0.015f) * 127 + 127), uint8_t(sin(i * 0.01f) * 127 + 127), uint8_t(i) }; });//uint8_t(::sin(i * 0.01f) * 100 + 100), uint8_t(i) }; });
-    };*/
-    double xx = -10.6;
-    double yy = 4.7;
-    Fixed128 x = xx;
-    Fixed128 y = yy;
-
-    std::cout << double(-x) << " * " << double(-y) << " = " << double(x * y) << " --> " << (xx * yy) << std::endl;
-    //exit(0);
-
-    auto mandelCreator = [] () -> Bitmap<RGBColor> {
-        Bitmap<float> bitmap{ 512, 512 };
-        for (::size_t y = 0; y < bitmap.height; y++) {
-            for (::size_t x = 0; x < bitmap.width; x++) {
-                Fixed128 a = x * 2.0 / bitmap.width - 1;
-                Fixed128 b = y * 2.0 / bitmap.height - 1;
-                bitmap.get(x, y) = iterate<Fixed128>(a, b, 250);
-            }
-        }
-        return bitmap.map<RGBColor>([](float i) { return i < 0 ? RGBColor{ 0,0,0 } : RGBColor{ uint8_t(cos(i * 0.015f) * 127 + 127), uint8_t(sin(i * 0.01f) * 127 + 127), uint8_t(i) }; });//uint8_t(::sin(i * 0.01f) * 100 + 100), uint8_t(i) }; });
-    };
-
-    return std::async(mandelCreator);
-}

+ 0 - 196
Generators.h

@@ -1,196 +0,0 @@
-#pragma once
-#ifndef MANDELQUEUE_H_
-#define MANDELQUEUE_H_
-
-#include "QueueManager.h"
-#include "GenericMandelbrot.h"
-#include <omp.h>
-#include <cmath>
-#include <future>
-#include <cstdlib>
-
-#ifdef __APPLE__
-#include <OpenCL/cl.hpp>
-#else
-#include <CL/cl.hpp>
-#endif
-
-#include <immintrin.h>
-
-
-class ClGenerator : public MandelGenerator
-{
-    cl::Device device;
-    cl::Context context;
-    cl::Program program;
-    cl::CommandQueue queue;
-public:
-    ClGenerator(void);
-    ~ClGenerator(void) = default;
-
-    //Bitmap<RGBColor> generate(const MandelInfo& info);
-    Bitmap<float> generateRaw(const MandelInfo& info);
-
-    std::future<Bitmap<RGBColor>> enqueueMandelbrot(long width, long height, float x, float y, float fwidth);
-};
-
-
-std::future<Bitmap<RGBColor>> createHPM();
-
-
-template<typename T>
-class CpuGenerator : public MandelGenerator
-{
-public:
-
-    Bitmap<float> generateRaw(const MandelInfo& info)
-    {
-        const MandelViewport& view = info.view;
-        Bitmap<float> res{ info.bWidth, info.bHeight };
-
-        omp_set_num_threads(2 * omp_get_num_procs());
-#pragma omp parallel for
-        for (int j = 0; j < res.height; j++) {
-            T y = T(view.y) + T(j) * view.height / res.height;
-            for (::size_t i = 0; i < res.width; i++) {
-                T x = T(view.x) + T(i) * view.width / res.width;
-                res.get(i, j) = iterate<T>(x, y, info.maxIter);
-            }
-        }
-        return res;
-    }
-};
-
-
-template<>
-inline Bitmap<float> CpuGenerator<double>::generateRaw(const MandelInfo& info)
-{
-    using T = double;
-    const MandelViewport& view = info.view;
-    Bitmap<float> res{ info.bWidth, info.bHeight };
-
-    omp_set_num_threads(2 * omp_get_num_procs());
-#pragma omp parallel for
-    for (long j = 0; j < res.height; j++) {
-        T y = T(view.y) + T(j) * view.height / res.height;
-        long i = 0;
-        for (i; i < res.width; i += 4) {
-            __m256d xs = {
-                double(view.x) + double(i) * view.width / res.width,
-                double(view.x) + double(i + 1) * view.width / res.width,
-                double(view.x) + double(i + 2) * view.width / res.width,
-                double(view.x) + double(i + 3) * view.width / res.width
-            };
-
-            int itRes[4] = {0, 0, 0, 0};
-
-            __m256d threshold = {16.0, 16.0, 16.0, 16.0};
-            __m256d counter = {0, 0, 0, 0};
-            __m256d adder = {1, 1, 1, 1};
-
-            __m256d ys = {y, y, y, y};
-            __m256d a = xs;
-            __m256d b = ys;
-
-            for (int k = 0; k < info.maxIter; k++) {
-                __m256d aa = _mm256_mul_pd(a, a);
-                __m256d bb = _mm256_mul_pd(b, b);
-                __m256d abab = _mm256_mul_pd(a, b); abab = _mm256_add_pd(abab, abab);
-                a = _mm256_add_pd(_mm256_sub_pd(aa, bb), xs);
-                b = _mm256_add_pd(abab, ys);
-                __m256i cmp = _mm256_castpd_si256(_mm256_cmp_pd(_mm256_add_pd(aa, bb), threshold, _CMP_LE_OQ));
-                adder = _mm256_and_pd(adder, _mm256_castsi256_pd(cmp));
-                counter = _mm256_add_pd(counter, adder);
-                if (_mm256_testz_si256(cmp, cmp) != 0) {
-                    break;
-                }
-            }
-
-            auto alignVec = [](double* data) -> double* {
-                void* aligned = data;
-                ::size_t length = 64;
-                std::align(32, 4 * sizeof(double), aligned, length);
-                return static_cast<double*>(aligned);
-            };
-
-            double resData[8];
-            double* ftRes = alignVec(resData);
-            _mm256_store_pd(ftRes, counter);
-            for (int k = 0; k < 4 && i + k < res.width; k++)
-                res.get(i + k, j) = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
-        }
-    }
-
-    return res;
-}
-
-
-template<>
-inline Bitmap<float> CpuGenerator<float>::generateRaw(const MandelInfo& info)
-{
-    using T = float;
-    const MandelViewport& view = info.view;
-    Bitmap<float> res{ info.bWidth, info.bHeight };
-
-    omp_set_num_threads(2 * omp_get_num_procs());
-#pragma omp parallel for
-    for (long j = 0; j < res.height; j++) {
-        T y = T(view.y) + T(j) * view.height / res.height;
-        long i = 0;
-        for (i; i < res.width; i += 8) {
-            __m256 xs = {
-                float(view.x + double(i) * view.width / res.width),
-                float(view.x + double(i + 1) * view.width / res.width),
-                float(view.x + double(i + 2) * view.width / res.width),
-                float(view.x + double(i + 3) * view.width / res.width),
-                float(view.x + double(i + 4) * view.width / res.width),
-                float(view.x + double(i + 5) * view.width / res.width),
-                float(view.x + double(i + 6) * view.width / res.width),
-                float(view.x + double(i + 7) * view.width / res.width)
-            };
-
-            __m256 counter = {0, 0, 0, 0, 0, 0, 0, 0};
-            __m256 adder = {1, 1, 1, 1, 1, 1, 1, 1};
-
-            __m256 threshold = {16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f};
-
-            __m256 ys = {y, y, y, y, y, y, y, y};
-            __m256 a = xs;
-            __m256 b = ys;
-
-            for (int k = 0; k < info.maxIter; k++) {
-                __m256 aa = _mm256_mul_ps(a, a);
-                __m256 bb = _mm256_mul_ps(b, b);
-                __m256 abab = _mm256_mul_ps(a, b); abab = _mm256_add_ps(abab, abab);
-                a = _mm256_add_ps(_mm256_sub_ps(aa, bb), xs);
-                b = _mm256_add_ps(abab, ys);
-                __m256i cmp = _mm256_castps_si256(_mm256_cmp_ps(_mm256_add_ps(aa, bb), threshold, _CMP_LE_OQ));
-                adder = _mm256_and_ps(adder, _mm256_castsi256_ps(cmp));
-                counter = _mm256_add_ps(counter, adder);
-                if (_mm256_testz_si256(cmp, cmp) != 0) {
-                    break;
-                }
-            }
-
-            auto alignVec = [](float* data) -> float* {
-                void* aligned = data;
-                ::size_t length = 64;
-                std::align(32, 8 * sizeof(float), aligned, length);
-                return static_cast<float*>(aligned);
-            };
-
-            float resData[16];
-            float* ftRes = alignVec(resData);
-
-            _mm256_store_ps(ftRes, counter);
-            for (int k = 0; k < 8 && i + k < res.width; k++)
-                res.get(i + k, j) = ftRes[k] > 0 ? ftRes[k] : info.maxIter;
-        }
-    }
-
-    return res;
-}
-
-
-
-#endif // MANDELQUEUE_H_

+ 0 - 24
GenericMandelbrot.h

@@ -1,24 +0,0 @@
-#pragma once
-
-
-template<typename T>
-float iterate(const T& ca, const T& cb, int maxIterations)
-{
-    T a = ca;
-    T b = cb;
-
-    int n;
-    for(n = 0; n < maxIterations; n++) {
-        T aa = a * a;
-        T bb = b * b;
-        T abab = a * b; abab += abab;
-
-        a = aa - bb + ca;
-        b = abab + cb;
-        if (aa + bb > T(16)) {
-            break;
-        }
-    }
-
-    return float(n);
-}

+ 10 - 6
MandelWidget.cpp

@@ -1,5 +1,7 @@
 #include "MandelWidget.h"
 
+using namespace mnd;
+
 #include <QOpenGLVertexArrayObject>
 
 Texture::Texture(const Bitmap<RGBColor>& bitmap)
@@ -77,16 +79,16 @@ void MandelView::adaptViewport(const MandelViewport& vp)
         hasToCalc = true;
         calc = std::async([this] () {
             do {
-                CpuGenerator<double> cpg;
                 //static ClGenerator cpg;
                 MandelInfo mi;
                 mi.bWidth = 1024;//ql.geometry().width();
                 mi.bHeight = 1024; //ql.geometry().height();
                 mi.maxIter = 4000;
                 mi.view = toCalc;
-                Bitmap<RGBColor>* bmp = //new Bitmap<RGBColor>(1, 1);
-                                        new Bitmap<RGBColor>(cpg.generate(mi));
-                emit updated(bmp);
+                auto fmap = Bitmap<float>(mi.bWidth, mi.bHeight);
+                generator.generate(mi, fmap.pixels.get());
+                auto bitmap = fmap.map<RGBColor>([](float i) { return i < 0 ? RGBColor{ 0,0,0 } : RGBColor{ uint8_t(cos(i * 0.015f) * 127 + 127), uint8_t(sin(i * 0.01f) * 127 + 127), uint8_t(i) }; });//uint8_t(::sin(i * 0.01f) * 100 + 100), uint8_t(i) }; });
+                emit updated(new Bitmap(std::move(bitmap)));
             } while(hasToCalc.exchange(false));
         });
     }
@@ -97,8 +99,10 @@ void MandelView::adaptViewport(const MandelViewport& vp)
 }
 
 
-MandelWidget::MandelWidget(QWidget* parent) :
-    QGLWidget{ QGLFormat(QGL::SampleBuffers), parent }
+MandelWidget::MandelWidget(mnd::MandelContext& ctxt, QWidget* parent) :
+    QGLWidget{ QGLFormat(QGL::SampleBuffers), parent },
+    mndContext{ ctxt },
+    mv{ *ctxt.getDevices()[0].getGeneratorDouble() }
 {
     this->setContentsMargins(0, 0, 0, 0);
     this->setSizePolicy(QSizePolicy::Expanding,

+ 21 - 9
MandelWidget.h

@@ -8,8 +8,11 @@
 #include <qlabel.h>
 #include <qevent.h>
 #include <qrubberband.h>
-#include "Generators.h"
 
+#include "Bitmap.h"
+#include <Mandel.h>
+
+#include <future>
 #include <atomic>
 
 class Texture
@@ -29,11 +32,16 @@ class MandelView : public QObject
     Q_OBJECT
 private:
     std::future<void> calc;
-    std::atomic<MandelViewport> toCalc;
+    std::atomic<mnd::MandelViewport> toCalc;
     std::atomic_bool hasToCalc;
+    mnd::Generator& generator;
 public:
+    inline MandelView(mnd::Generator& generator) :
+        generator{ generator }
+    {
+    }
 public slots:
-    void adaptViewport(const MandelViewport& vp);
+    void adaptViewport(const mnd::MandelViewport& vp);
 signals:
     void updated(const Bitmap<RGBColor>* bitmap);
 };
@@ -44,6 +52,7 @@ class MandelWidget : public QGLWidget
 private:
     //QScrollArea qsa;
     //QLabel ql;
+    mnd::MandelContext& mndContext;
 
     bool initialized = false;
 
@@ -51,15 +60,18 @@ private:
     QRectF rubberband;
 
     std::unique_ptr<Texture> tex;
-    MandelViewport viewport;
+    mnd::MandelViewport viewport;
     MandelView mv;
 public:
-    MandelWidget(QWidget* parent = nullptr);
+    MandelWidget(mnd::MandelContext& ctxt, QWidget* parent = nullptr);
     ~MandelWidget(void) override;
 
 
-    inline MandelWidget(const MandelWidget& other) {
-    }
+    /*inline MandelWidget(const MandelWidget& other) :
+        mndContext{ other.mndContext },
+        mv{ other.mndContext }
+    {
+    }*/
 
     void initializeGL(void) override;
 
@@ -76,9 +88,9 @@ public:
     void mouseMoveEvent(QMouseEvent* me) override;
     void mouseReleaseEvent(QMouseEvent* me) override;
 
-    inline const MandelViewport& getViewport(void) const { return viewport; }
+    inline const mnd::MandelViewport& getViewport(void) const { return viewport; }
 signals:
-    void needsUpdate(const MandelViewport& vp);
+    void needsUpdate(const mnd::MandelViewport& vp);
 public slots:
     void viewUpdated(const Bitmap<RGBColor>* bitmap);
 };

+ 0 - 54
QueueManager.cpp

@@ -1,54 +0,0 @@
-#include "QueueManager.h"
-#include <cmath>
-
-void MandelViewport::adjustAspectRatio(double nwidth, double nheight)
-{
-    double otherRatio = nwidth / nheight;
-    if (width < height * otherRatio)
-        width = height * otherRatio;
-    else if (height < width / otherRatio)
-            height = width / otherRatio;
-}
-
-
-void MandelViewport::normalize(void)
-{
-    if (width < 0) {
-        x += width;
-        width = -width;
-    }
-    if (height < 0) {
-        y += height;
-        height = -height;
-    }
-}
-
-
-MandelGenerator::~MandelGenerator(void)
-{
-}
-
-
-Bitmap<RGBColor> MandelGenerator::generate(const MandelInfo& mandelInfo)
-{
-    auto converter = [max = mandelInfo.maxIter](float i) {
-        return i >= max ?
-            RGBColor{ 0,0,0 } :
-            RGBColor{
-                uint8_t(cos(i * 0.15f) * 127 + 127),
-                uint8_t(sin(i * 0.03f) * 127 + 127),
-                uint8_t(cos(i * 0.04f) * 127 + 127)
-            };
-    };
-    return generateRaw(mandelInfo).map<RGBColor>(converter);
-}
-
-
-QueueManager::QueueManager()
-{
-}
-
-
-QueueManager::~QueueManager()
-{
-}

+ 0 - 73
QueueManager.h

@@ -1,73 +0,0 @@
-#pragma once
-#ifndef QUEUEMANAGER_H_
-#define QUEUEMANAGER_H_
-
-#include <cinttypes>
-#include <vector>
-#include <future>
-#include "Bitmap.h"
-
-struct MandelViewport
-{
-    /// real part of the top left corner
-    double x = -2.1;
-
-    /// imaginary part of the top left corner
-    double y = -1.5;
-
-    /// real-part span of the picture to be generated
-    double width = 3;
-
-    /// imaginary-part span of the picture to be generated
-    double height = 3;
-
-    /*!
-     * \brief adjusts the aspect ratio of the viewport, making sure
-     *        the updated viewport contains all of the original one.
-     */
-    void adjustAspectRatio(double nwidth, double nheight);
-
-    /*!
-     * \brief make sure width and height are positive
-     */
-    void normalize(void);
-};
-
-struct MandelInfo
-{
-    /// viewport
-    MandelViewport view;
-
-    /// width of the bitmap to be generated
-    long bWidth;
-
-    /// height of the bitmap to be generated
-    long bHeight;
-    
-    /// maximum iterations
-    int maxIter;
-};
-
-
-class MandelGenerator
-{
-public:
-    MandelGenerator(void) = default;
-    virtual ~MandelGenerator(void);
-
-    virtual Bitmap<RGBColor> generate(const MandelInfo& mandelInfo);
-    virtual Bitmap<float> generateRaw(const MandelInfo& info) = 0;
-};
-
-
-class QueueManager
-{
-public:
-    QueueManager(void);
-    ~QueueManager(void);
-
-    std::future<Bitmap<RGBColor>> generate(const MandelInfo& mandelInfo);
-};
-
-#endif // QUEUEMANAGER_H_
-

+ 0 - 1
SectionManager.h

@@ -2,7 +2,6 @@
 #ifndef SECTIONMANAGER_H
 #define SECTIONMANAGER_H
 
-#include "QueueManager.h"
 
 class SectionManager
 {

+ 41 - 12
benchmarkdialog.cpp

@@ -1,14 +1,29 @@
 #include "benchmarkdialog.h"
 #include <chrono>
 
-BenchmarkDialog::BenchmarkDialog(QWidget *parent) : QDialog(parent)
+BenchmarkDialog::BenchmarkDialog(mnd::MandelContext& mndContext, QWidget *parent) :
+    QDialog(parent),
+    mndContext{ mndContext }
 {
     ui.setupUi(this);
+
+    auto& devices = mndContext.getDevices();
+    int nDevices = devices.size() + 1;
+    ui.tableWidget->setColumnCount(2);
+    ui.tableWidget->setRowCount(nDevices);
+    ui.tableWidget->setHorizontalHeaderLabels({"Single Precision", "Double Precision"});
+
+    QString cpuDesc = ("CPU [" + mndContext.getCpuInfo().getBrand() + "]").c_str();
+    ui.tableWidget->setVerticalHeaderItem(0, new QTableWidgetItem(cpuDesc));
+    for (int i = 0; i < devices.size(); i++) {
+        QString cpuDesc = ("GPU " + std::to_string(i + 1) + " [" + devices[i].getVendor() + " " + devices[i].getName() + "]").c_str();
+        ui.tableWidget->setVerticalHeaderItem(i + 1, new QTableWidgetItem(cpuDesc));
+    }
 }
 
-MandelViewport BenchmarkDialog::benchViewport(void) const
+mnd::MandelViewport BenchmarkDialog::benchViewport(void) const
 {
-    return MandelViewport{ -0.758267525104592591494, -0.066895616551111110830, 0.000000043217777777655, 0.000000043217777777655 };
+    return mnd::MandelViewport{ -0.758267525104592591494, -0.066895616551111110830, 0.000000043217777777655, 0.000000043217777777655 };
 }
 
 
@@ -31,25 +46,39 @@ double BenchmarkDialog::measureMips(const std::function<Bitmap<float>()>& bench)
     return megaItersPerSecond;
 }
 
-QString BenchmarkDialog::benchmarkResult(MandelGenerator& mg, int size, int iters) const
+QString BenchmarkDialog::benchmarkResult(mnd::Generator& mg, int size, int iters) const
 {
-    MandelInfo mi;
+    mnd::MandelInfo mi;
     mi.bWidth = size;
     mi.bHeight = size;
     mi.maxIter = iters;
     mi.view = benchViewport();
-    double megaItersPerSecond = measureMips([&mg, &mi] () { return mg.generateRaw(mi); });
+    double megaItersPerSecond = measureMips([&mg, &mi] () {
+        Bitmap<float> bmp(mi.bWidth, mi.bHeight);
+        mg.generate(mi, bmp.pixels.get());
+        return bmp;
+    });
+
     QString mips = QString::number(megaItersPerSecond, 'f', 2);
     return mips;
 }
 
 void BenchmarkDialog::on_run_clicked()
 {
-    CpuGenerator<double> cpg;
-    CpuGenerator<float> cpgf;
-    ClGenerator clg;
+    mnd::Generator& cpuf = mndContext.getCpuGeneratorFloat();
+    mnd::Generator& cpud = mndContext.getCpuGeneratorDouble();
+
+    ui.tableWidget->setItem(0, 0, new QTableWidgetItem(benchmarkResult(cpuf, 1000, 5000)));
+    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(cpud, 1000, 5000)));
 
-    ui.tableWidget->setItem(1, 0, new QTableWidgetItem(benchmarkResult(cpg, 1000, 5000)));
-    ui.tableWidget->setItem(0, 0, new QTableWidgetItem(benchmarkResult(cpgf, 1000, 5000)));
-    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(clg, 4000, 10000)));
+    auto& devices = mndContext.getDevices();
+    for (int i = 0; i < devices.size(); i++) {
+        if (mnd::Generator* gpuf; gpuf = devices[i].getGeneratorFloat()) {
+            ui.tableWidget->setItem(i + 1, 0, new QTableWidgetItem(benchmarkResult(*gpuf, 4000, 5000)));
+        }
+        if (mnd::Generator* gpud; gpud = devices[i].getGeneratorDouble()) {
+            ui.tableWidget->setItem(i + 1, 1, new QTableWidgetItem(benchmarkResult(*gpud, 4000, 5000)));
+        }
+    }
+//    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(clg, 4000, 10000)));
 }

+ 6 - 4
benchmarkdialog.h

@@ -4,20 +4,22 @@
 #include <QDialog>
 #include <functional>
 #include "ui_benchmarks.h"
-#include "Generators.h"
+#include <Mandel.h>
+#include "Bitmap.h"
 
 class BenchmarkDialog : public QDialog
 {
     Q_OBJECT
 private:
     Ui::BenchmarkDialog ui;
+    mnd::MandelContext& mndContext;
 public:
-    explicit BenchmarkDialog(QWidget *parent = nullptr);
+    explicit BenchmarkDialog(mnd::MandelContext& mndContext, QWidget *parent = nullptr);
 
-    MandelViewport benchViewport(void) const;
+    mnd::MandelViewport benchViewport(void) const;
 
     double measureMips(const std::function<Bitmap<float>()>& bench) const;
-    QString benchmarkResult(MandelGenerator& mg, int size, int iters) const;
+    QString benchmarkResult(mnd::Generator& mg, int size, int iters) const;
 
 signals:
 

+ 10 - 29
benchmarks.ui

@@ -6,8 +6,8 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>416</width>
-    <height>312</height>
+    <width>545</width>
+    <height>264</height>
    </rect>
   </property>
   <property name="windowTitle">
@@ -52,37 +52,18 @@
        <property name="alternatingRowColors">
         <bool>true</bool>
        </property>
+       <property name="verticalScrollMode">
+        <enum>QAbstractItemView::ScrollPerPixel</enum>
+       </property>
+       <property name="horizontalScrollMode">
+        <enum>QAbstractItemView::ScrollPerPixel</enum>
+       </property>
        <property name="rowCount">
-        <number>3</number>
+        <number>0</number>
        </property>
        <property name="columnCount">
-        <number>2</number>
+        <number>0</number>
        </property>
-       <row>
-        <property name="text">
-         <string>Single precision</string>
-        </property>
-       </row>
-       <row>
-        <property name="text">
-         <string>Double precision</string>
-        </property>
-       </row>
-       <row>
-        <property name="text">
-         <string>128-bit fixed-point</string>
-        </property>
-       </row>
-       <column>
-        <property name="text">
-         <string>CPU</string>
-        </property>
-       </column>
-       <column>
-        <property name="text">
-         <string>GPU</string>
-        </property>
-       </column>
       </widget>
      </item>
     </layout>

+ 7 - 1
libmandel/CMakeLists.txt

@@ -4,6 +4,8 @@ cmake_minimum_required(VERSION 3.9)
 project(mandel VERSION 1.0.0 DESCRIPTION "library for mandelbrot calculations")
 
 find_package(OpenCL REQUIRED)
+find_package(OpenMP)
+
 
 FILE(GLOB MandelSources src/*.cpp
 FILE(GLOB MandelHeaders include/*.h))
@@ -22,4 +24,8 @@ endif(MSVC)
 add_library(mandel STATIC ${MandelSources})
 
 
-target_link_libraries(mandel OpenCL::OpenCL)
+if(OpenMP_CXX_FOUND)
+    target_link_libraries(mandel PUBLIC OpenCL::OpenCL OpenMP::OpenMP_CXX)
+else()
+    target_link_libraries(mandel OpenCL::OpenCL)
+endif()

+ 16 - 4
libmandel/include/ClGenerators.h

@@ -13,6 +13,7 @@ namespace mnd
 {
     class ClGenerator;
     class ClGeneratorFloat;
+    class ClGeneratorDouble;
 }
 
 
@@ -24,8 +25,8 @@ protected:
     cl::Program program;
     cl::CommandQueue queue;
 public:
-    ClGenerator(void);
-    ~ClGenerator(void);
+    ClGenerator(cl::Device device);
+    virtual ~ClGenerator(void);
 
     virtual void generate(const MandelInfo& info, float* data);
 
@@ -37,12 +38,23 @@ protected:
 class mnd::ClGeneratorFloat : public ClGenerator
 {
 public:
-    ClGeneratorFloat(void) = default;
-    ~ClGeneratorFloat(void) = default;
+    ClGeneratorFloat(cl::Device device);
+    virtual ~ClGeneratorFloat(void) = default;
 
 protected:
     virtual std::string getKernelCode(void) const;
 };
 
 
+class mnd::ClGeneratorDouble : public ClGenerator
+{
+public:
+    ClGeneratorDouble(cl::Device device);
+    virtual ~ClGeneratorDouble(void) = default;
+
+    virtual void generate(const MandelInfo& info, float* data);
+protected:
+    virtual std::string getKernelCode(void) const;
+};
+
 #endif // MANDEL_CLGENERATORS_H

+ 4 - 2
libmandel/include/CpuGeneratorsAVX.h

@@ -9,13 +9,15 @@ namespace mnd
     class CpuGeneratorAvxDouble;
 }
 
-class mnd::CpuGeneratorAvxFloat
+
+class mnd::CpuGeneratorAvxFloat : public Generator
 {
 public:
     virtual void generate(const MandelInfo& info, float* data);
 };
 
-class mnd::CpuGeneratorAvxDouble
+
+class mnd::CpuGeneratorAvxDouble : public Generator
 {
 public:
     virtual void generate(const MandelInfo& info, float* data);

+ 43 - 7
libmandel/include/Mandel.h

@@ -2,31 +2,67 @@
 #define MANDEL_MANDEL_H
 
 #include <vector>
+#include <string>
 #include <memory>
 
 #include "MandelUtil.h"
 #include "Generators.h"
+#include "Hardware.h"
 
-namespace mnd 
+namespace mnd
 {
     class MandelContext;
-
-    struct MandelViewport;
-    struct MandelInfo;
-
+    class MandelDevice;
 
     extern MandelContext initializeContext(void);
 }
 
 
+class mnd::MandelDevice
+{
+private:
+    friend class MandelContext;
+
+    std::string vendor;
+    std::string name;
+
+    std::unique_ptr<Generator> floatGenerator;
+    std::unique_ptr<Generator> doubleGenerator;
+    MandelDevice(void);
+public:
+
+    inline const std::string& getVendor(void) const { return vendor; }
+    const std::string& getName(void) const;
+
+    Generator* getGeneratorFloat(void) const;
+    Generator* getGeneratorDouble(void) const;
+};
+
+
 class mnd::MandelContext
 {
 private:
     friend MandelContext initializeContext(void);
 
-    std::vector<std::unique_ptr<Generator>> generators;
-    MandelContext(void) = default;
+    CpuInfo cpuInfo;
+
+    std::unique_ptr<Generator> cpuGeneratorFloat;
+    std::unique_ptr<Generator> cpuGeneratorDouble;
+
+    std::vector<MandelDevice> devices;
+
+    MandelContext(void);
+
+    std::vector<MandelDevice> createDevices(void);
 public:
+
+    Generator& getDefaultGenerator(void);
+    const std::vector<MandelDevice>& getDevices(void);
+
+    Generator& getCpuGeneratorFloat(void);
+    Generator& getCpuGeneratorDouble(void);
+
+    const CpuInfo& getCpuInfo(void) const { return cpuInfo; }
 };
 
 

+ 115 - 9
libmandel/src/ClGenerators.cpp

@@ -8,6 +8,7 @@ using namespace cl;
 
 using mnd::ClGenerator;
 using mnd::ClGeneratorFloat;
+using mnd::ClGeneratorDouble;
 
 Platform getPlatform() {
     /* Returns the first platform found. */
@@ -18,16 +19,21 @@ Platform getPlatform() {
         std::cout << "No platforms found. Check OpenCL installation!\n";
         exit(1);
     }
+    for (auto& p : all_platforms) {
+        std::string name = p.getInfo<CL_PLATFORM_NAME>();
+        std::string profile = p.getInfo<CL_PLATFORM_PROFILE>();
+        printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
+    }
     return all_platforms[0];
 }
 
 
-Device getDevice(Platform platform, int i, bool display = false) {
+Device getDevice(Platform& platform, int i, bool display = false) {
     /* Returns the deviced specified by the index i on platform.
     * If display is true, then all of the platforms are listed.
     */
     std::vector<Device> all_devices;
-    platform.getDevices(CL_DEVICE_TYPE_GPU, &all_devices);
+    platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
     if (all_devices.size() == 0) {
         std::cout << "No devices found. Check OpenCL installation!\n";
         exit(1);
@@ -44,14 +50,15 @@ Device getDevice(Platform platform, int i, bool display = false) {
 }
 
 
-ClGenerator::ClGenerator(void)
+ClGenerator::ClGenerator(cl::Device device) :
+    device{ device }
 {
-    Platform p = getPlatform();
+    /*Platform p = getPlatform();
     device = getDevice(p, 0, true);
     context = Context{ device };
     Program::Sources sources;
 
-    std::string kcode = getKernelCode();
+    std::string kcode = this->getKernelCode();
 
     sources.push_back({ kcode.c_str(), kcode.length() });
 
@@ -61,7 +68,7 @@ ClGenerator::ClGenerator(void)
         exit(1);
     }
 
-    queue = CommandQueue(context, device);
+    queue = CommandQueue(context, device);*/
 }
 
 
@@ -75,8 +82,8 @@ ClGenerator::~ClGenerator(void)
 void ClGenerator::generate(const mnd::MandelInfo& info, float* data)
 {
     ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
-    
-    Buffer buffer_A(context, CL_MEM_READ_WRITE, bufferSize);
+
+    Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
     float pixelScaleX = info.view.width / info.bWidth;
     float pixelScaleY = info.view.height / info.bHeight;
 
@@ -99,6 +106,27 @@ void ClGenerator::generate(const mnd::MandelInfo& info, float* data)
 }
 
 
+ClGeneratorFloat::ClGeneratorFloat(cl::Device device) :
+    ClGenerator{ device }
+{
+    /*Platform p = getPlatform();
+    device = getDevice(p, 0, true);*/
+    context = Context{ device };
+    Program::Sources sources;
+
+    std::string kcode = this->getKernelCode();
+
+    sources.push_back({ kcode.c_str(), kcode.length() });
+
+    program = Program{ context, sources };
+    if (program.build({ device }) != CL_SUCCESS) {
+        throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
+    }
+
+    queue = CommandQueue(context, device);
+}
+
+
 std::string ClGeneratorFloat::getKernelCode(void) const
 {
     if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
@@ -162,9 +190,87 @@ std::string ClGeneratorFloat::getKernelCode(void) const
             "       n++;"
             "   }\n"
                 // N + 1 - log (log  |Z(N)|) / log 2
-            "   A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
+            "   if (n >= max)\n"
+            "       A[index] = max;\n"
+            "   else"
+            "       A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
 //            "   A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
     //        "   A[get_global_id(0)] = 5;"
             "}";
     }
 }
+
+
+ClGeneratorDouble::ClGeneratorDouble(cl::Device device) :
+    ClGenerator{ device }
+{
+    context = Context{ device };
+    Program::Sources sources;
+
+    std::string kcode = this->getKernelCode();
+
+    sources.push_back({ kcode.c_str(), kcode.length() });
+
+    program = Program{ context, sources };
+    if (program.build({ device }) != CL_SUCCESS) {
+        throw std::string(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
+    }
+
+    queue = CommandQueue(context, device);
+}
+
+
+void ClGeneratorDouble::generate(const mnd::MandelInfo& info, float* data)
+{
+    ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
+
+    Buffer buffer_A(context, CL_MEM_WRITE_ONLY, bufferSize);
+    float pixelScaleX = info.view.width / info.bWidth;
+    float pixelScaleY = info.view.height / info.bHeight;
+
+    Kernel iterate = Kernel(program, "iterate");
+    iterate.setArg(0, buffer_A);
+    iterate.setArg(1, int(info.bWidth));
+    iterate.setArg(2, double(info.view.x));
+    iterate.setArg(3, double(info.view.y));
+    iterate.setArg(4, double(pixelScaleX));
+    iterate.setArg(5, double(pixelScaleY));
+    iterate.setArg(6, int(info.maxIter));
+
+    queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
+    queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
+}
+
+
+std::string ClGeneratorDouble::getKernelCode(void) const
+{
+    return 
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+        "__kernel void iterate(__global float* A, const int width, double xl, double yt, double pixelScaleX, double pixelScaleY, int max) {\n"
+        "   int index = get_global_id(0);\n"
+        "   int x = index % width;"
+        "   int y = index / width;"
+        "   double a = x * pixelScaleX + xl;"
+        "   double b = y * pixelScaleY + yt;"
+        "   double ca = a;"
+        "   double cb = b;"
+        ""
+        "   int n = 0;"
+        "   while (n < max) {"
+        "       double aa = a * a;"
+        "       double bb = b * b;"
+        "       double ab = a * b;"
+        "       if (aa + bb > 16) break;"
+        "       a = aa - bb + ca;"
+        "       b = 2 * ab + cb;"
+        "       n++;"
+        "   }\n"
+        // N + 1 - log (log  |Z(N)|) / log 2
+        "   if (n >= max)\n"
+        "       A[index] = max;\n"
+        "   else"
+        "       A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
+        //            "   A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
+        //        "   A[get_global_id(0)] = 5;"
+        "}";
+}

+ 135 - 4
libmandel/src/mandel.cpp

@@ -1,10 +1,141 @@
 #include "Mandel.h"
 
+#include "CpuGenerators.h"
+#include "CpuGeneratorsAVX.h"
+#include "ClGenerators.h"
 
-namespace mnd
+using mnd::MandelDevice;
+using mnd::MandelContext;
+using mnd::Generator;
+
+
+
+MandelContext mnd::initializeContext(void)
+{
+    MandelContext context = MandelContext();
+    return context;
+}
+
+
+MandelDevice::MandelDevice(void) :
+    floatGenerator{ nullptr },
+    doubleGenerator{ nullptr }
+{
+}
+
+
+mnd::Generator* MandelDevice::getGeneratorFloat(void) const
+{
+    if (floatGenerator)
+        return floatGenerator.get();
+    else
+        return nullptr;
+}
+
+
+mnd::Generator* MandelDevice::getGeneratorDouble(void) const
 {
-    MandelContext initializeContext(void)
-    {
-        return MandelContext();
+    if (doubleGenerator)
+        return doubleGenerator.get();
+    else
+        return nullptr;
+}
+
+
+MandelContext::MandelContext(void)
+{
+    if (cpuInfo.hasAvx()) {
+        cpuGeneratorFloat = std::make_unique<CpuGeneratorAvxFloat>();
+        cpuGeneratorDouble = std::make_unique<CpuGeneratorAvxDouble>();
     }
+    else {
+        cpuGeneratorFloat = std::make_unique<CpuGeneratorFloat>();
+        cpuGeneratorDouble = std::make_unique<CpuGeneratorDouble>();
+    }
+
+    devices = createDevices();
+}
+
+
+std::vector<MandelDevice> MandelContext::createDevices(void)
+{
+    std::vector<MandelDevice> mandelDevices;
+
+    std::vector<cl::Platform> platforms;
+    cl::Platform::get(&platforms);
+    platforms.erase(platforms.begin() + 1);
+
+    for (auto& platform : platforms) {
+        std::string name = platform.getInfo<CL_PLATFORM_NAME>();
+        std::string profile = platform.getInfo<CL_PLATFORM_PROFILE>();
+
+       std::string ext = platform.getInfo<CL_PLATFORM_EXTENSIONS>();
+        printf("Platform extensions: %s\n", ext.c_str());
+        printf("Platform: %s, %s\n", name.c_str(), profile.c_str());
+
+        std::vector<cl::Device> devices;
+        platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
+        for (auto& device : devices) {
+            //printf("Device: %s\n", device.getInfo<CL_DEVICE_NAME>().c_str());
+            //printf("preferred float width: %d\n", device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
+            //printf("vendor: %s\n", device.getInfo<CL_DEVICE_VENDOR>().c_str());
+
+            std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
+            auto supportsDouble = extensions.find("cl_khr_fp64") != std::string::npos;
+
+            printf("Device extensions: %s\n", ext.c_str());
+            MandelDevice md;
+
+            printf("clock: %d", device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>());
+
+            md.name = device.getInfo<CL_DEVICE_NAME>();
+            md.vendor = device.getInfo<CL_DEVICE_VENDOR>();
+            try {
+                md.floatGenerator = std::make_unique<ClGeneratorFloat>(device);
+            }
+            catch (const std::string& err) {
+                printf("err: %s", err.c_str());
+            }
+
+            if (supportsDouble) {
+                try {
+                    md.doubleGenerator = std::make_unique<ClGeneratorDouble>(device);
+                }
+                catch (const std::string& err) {
+                }
+            }
+            mandelDevices.push_back(std::move(md));
+        }
+    }
+    return mandelDevices;
+}
+
+
+const std::string& MandelDevice::getName(void) const
+{
+    return name;
+}
+
+
+Generator& MandelContext::getDefaultGenerator(void)
+{
+    return getCpuGeneratorDouble();
+}
+
+
+const std::vector<MandelDevice>& MandelContext::getDevices(void)
+{
+    return devices;
+}
+
+
+Generator& MandelContext::getCpuGeneratorFloat(void)
+{
+    return *cpuGeneratorFloat;
+}
+
+
+Generator& MandelContext::getCpuGeneratorDouble(void)
+{
+    return *cpuGeneratorDouble;
 }