소스 검색

added libmandel

Nicolas Winkler 6 년 전
부모
커밋
e38ec9531f

+ 1 - 1
Almond.cpp

@@ -28,7 +28,7 @@ void Almond::on_pushButton_clicked()
         mi.bWidth = dialog.getWidth();
         mi.bHeight = dialog.getHeight();
         mi.view.adjustAspectRatio(mi.bWidth, mi.bHeight);
-        ClGenerator cpg;
+        CpuGenerator<double> cpg;
         auto bitmap = cpg.generate(mi);
         QImage img((unsigned char*)bitmap.pixels.get(), bitmap.width, bitmap.height, bitmap.width * 3, QImage::Format_RGB888);
         img.save(dialog.getPath());

+ 8 - 0
Almond.pro

@@ -116,3 +116,11 @@ DEPENDPATH += $$PWD/../libs/ffmpeg-4.1.1-win32-dev/include
 
 RESOURCES += \
     Almond.qrc
+
+unix|win32: LIBS += -L$$PWD/libmandel/ -lmandel
+
+INCLUDEPATH += $$PWD/libmandel/include
+DEPENDPATH += $$PWD/libmandel/include
+
+win32:!win32-g++: PRE_TARGETDEPS += $$PWD/libmandel/mandel.lib
+else:unix|win32-g++: PRE_TARGETDEPS += $$PWD/libmandel/libmandel.a

+ 7 - 3
Generators.cpp

@@ -54,7 +54,8 @@ ClGenerator::ClGenerator(void)
     std::string kcode;
  
 
-    if (device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
+    // TODO check for overflow
+    if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
         kcode =
             "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {\n"
             "   int index = get_global_id(0) * 4;\n"
@@ -114,7 +115,9 @@ ClGenerator::ClGenerator(void)
             "       b = 2 * ab + cb;"
             "       n++;"
             "   }\n"
-            "   A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
+                // N + 1 - log (log  |Z(N)|) / log 2
+            "   A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
+//            "   A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
     //        "   A[get_global_id(0)] = 5;"
             "}";
     }
@@ -154,7 +157,8 @@ Bitmap<float> ClGenerator::generateRaw(const MandelInfo& info)
     iterate.setArg(5, float(pixelScaleY));
     iterate.setArg(6, int(info.maxIter));
 
-    if (device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
+    // TODO check for overflow
+    if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
         queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight / 4));
     } else {
         queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));

+ 3 - 2
MandelWidget.cpp

@@ -77,8 +77,8 @@ void MandelView::adaptViewport(const MandelViewport& vp)
         hasToCalc = true;
         calc = std::async([this] () {
             do {
-                //CpuGenerator<float> cpg;
-                static ClGenerator cpg;
+                CpuGenerator<double> cpg;
+                //static ClGenerator cpg;
                 MandelInfo mi;
                 mi.bWidth = 1024;//ql.geometry().width();
                 mi.bHeight = 1024; //ql.geometry().height();
@@ -257,6 +257,7 @@ void MandelWidget::mouseReleaseEvent(QMouseEvent* me)
     viewport.y += double(rect.top()) * viewport.height / full.height();
     viewport.width *= double(rect.width()) / full.width();
     viewport.height *= double(rect.height()) / full.height();
+    viewport.normalize();
     rubberbandDragging = false;
     emit needsUpdate(viewport);
 }

+ 24 - 1
QueueManager.cpp

@@ -10,13 +10,36 @@ void MandelViewport::adjustAspectRatio(double nwidth, double nheight)
             height = width / otherRatio;
 }
 
+
+void MandelViewport::normalize(void)
+{
+    if (width < 0) {
+        x += width;
+        width = -width;
+    }
+    if (height < 0) {
+        y += height;
+        height = -height;
+    }
+}
+
+
 MandelGenerator::~MandelGenerator(void)
 {
 }
 
+
 Bitmap<RGBColor> MandelGenerator::generate(const MandelInfo& mandelInfo)
 {
-    auto converter = [](float i) { return i < 0 ? RGBColor{ 0,0,0 } : RGBColor{ uint8_t(cos(i * 0.15f) * 127 + 127), uint8_t(sin(i * 0.01f) * 127 + 127), uint8_t(sin(i * 0.04f) * 127 + 127) }; };
+    auto converter = [max = mandelInfo.maxIter](float i) {
+        return i >= max ?
+            RGBColor{ 0,0,0 } :
+            RGBColor{
+                uint8_t(cos(i * 0.15f) * 127 + 127),
+                uint8_t(sin(i * 0.03f) * 127 + 127),
+                uint8_t(cos(i * 0.04f) * 127 + 127)
+            };
+    };
     return generateRaw(mandelInfo).map<RGBColor>(converter);
 }
 

+ 5 - 0
QueueManager.h

@@ -26,6 +26,11 @@ struct MandelViewport
      *        the updated viewport contains all of the original one.
      */
     void adjustAspectRatio(double nwidth, double nheight);
+
+    /*!
+     * \brief make sure width and height are positive
+     */
+    void normalize(void);
 };
 
 struct MandelInfo

+ 1 - 1
benchmarkdialog.cpp

@@ -51,5 +51,5 @@ void BenchmarkDialog::on_run_clicked()
 
     ui.tableWidget->setItem(1, 0, new QTableWidgetItem(benchmarkResult(cpg, 1000, 5000)));
     ui.tableWidget->setItem(0, 0, new QTableWidgetItem(benchmarkResult(cpgf, 1000, 5000)));
-    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(clg, 2000, 8000)));
+    ui.tableWidget->setItem(0, 1, new QTableWidgetItem(benchmarkResult(clg, 4000, 10000)));
 }

+ 25 - 0
libmandel/CMakeLists.txt

@@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 3.9)
+
+
+project(mandel VERSION 1.0.0 DESCRIPTION "library for mandelbrot calculations")
+
+find_package(OpenCL REQUIRED)
+
+FILE(GLOB MandelSources src/*.cpp
+FILE(GLOB MandelHeaders include/*.h))
+
+
+include_directories(
+    "include"
+)
+
+if (MSVC)
+    set_source_files_properties(src/CpuGeneratorsAVX.cpp PROPERTIES COMPILE_FLAGS /arch:AVX)
+else()
+    set_source_files_properties(src/CpuGeneratorsAVX.cpp PROPERTIES COMPILE_FLAGS -mavx)
+endif(MSVC)
+
+add_library(mandel STATIC ${MandelSources})
+
+
+target_link_libraries(mandel OpenCL::OpenCL)

+ 48 - 0
libmandel/include/ClGenerators.h

@@ -0,0 +1,48 @@
+#ifndef MANDEL_CLGENERATORS_H
+#define MANDEL_CLGENERATORS_H
+
+#include "Generators.h"
+
+#ifdef __APPLE__
+#include <OpenCL/cl.hpp>
+#else
+#include <CL/cl.hpp>
+#endif
+
+namespace mnd
+{
+    class ClGenerator;
+    class ClGeneratorFloat;
+}
+
+
+class mnd::ClGenerator : public Generator
+{
+protected:
+    cl::Device device;
+    cl::Context context;
+    cl::Program program;
+    cl::CommandQueue queue;
+public:
+    ClGenerator(void);
+    ~ClGenerator(void);
+
+    virtual void generate(const MandelInfo& info, float* data);
+
+protected:
+    virtual std::string getKernelCode(void) const = 0;
+};
+
+
+class mnd::ClGeneratorFloat : public ClGenerator
+{
+public:
+    ClGeneratorFloat(void) = default;
+    ~ClGeneratorFloat(void) = default;
+
+protected:
+    virtual std::string getKernelCode(void) const;
+};
+
+
+#endif // MANDEL_CLGENERATORS_H

+ 24 - 0
libmandel/include/CpuGeneratorsAVX.h

@@ -0,0 +1,24 @@
+#ifndef MANDEL_CPUGENERATORSAVX_H
+#define MANDEL_CPUGENERATORSAVX_H
+
+#include "Generators.h"
+
+namespace mnd
+{
+    class CpuGeneratorAvxFloat;
+    class CpuGeneratorAvxDouble;
+}
+
+class mnd::CpuGeneratorAvxFloat
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+class mnd::CpuGeneratorAvxDouble
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+#endif // MANDEL_CPUGENERATORSAVX_H

+ 30 - 0
libmandel/include/Generators.h

@@ -0,0 +1,30 @@
+#ifndef MANDEL_GENERATORS_H
+#define MANDEL_GENERATORS_H
+
+#include "MandelUtil.h"
+
+
+namespace mnd
+{
+    class Generator;
+}
+
+
+class mnd::Generator
+{
+public:
+    Generator(void) = default;
+    virtual ~Generator(void);
+
+    
+    Generator(const Generator&) = delete;
+    Generator& operator=(const Generator&) = delete;
+    
+    Generator(Generator&&) = default;
+    Generator& operator=(Generator&&) = default;
+
+    virtual void generate(const MandelInfo& info, float* data) = 0;
+};
+
+
+#endif // MANDEL_GENERATORS_H

+ 33 - 0
libmandel/include/Mandel.h

@@ -0,0 +1,33 @@
+#ifndef MANDEL_MANDEL_H
+#define MANDEL_MANDEL_H
+
+#include <vector>
+#include <memory>
+
+#include "MandelUtil.h"
+#include "Generators.h"
+
+namespace mnd 
+{
+    class MandelContext;
+
+    struct MandelViewport;
+    struct MandelInfo;
+
+
+    extern MandelContext initializeContext(void);
+}
+
+
+class mnd::MandelContext
+{
+private:
+    friend MandelContext initializeContext(void);
+
+    std::vector<std::unique_ptr<Generator>> generators;
+    MandelContext(void) = default;
+public:
+};
+
+
+#endif // MANDEL_MANDEL_H

+ 52 - 0
libmandel/include/MandelUtil.h

@@ -0,0 +1,52 @@
+#ifndef MANDEL_MANDELUTIL_H
+#define MANDEL_MANDELUTIL_H
+
+namespace mnd 
+{
+    struct MandelViewport;
+    struct MandelInfo;
+}
+
+
+struct mnd::MandelViewport
+{
+    /// real part of the top left corner
+    double x = -2.1;
+
+    /// imaginary part of the top left corner
+    double y = -1.5;
+
+    /// real-part span of the picture to be generated
+    double width = 3;
+
+    /// imaginary-part span of the picture to be generated
+    double height = 3;
+
+    /*!
+     * \brief adjusts the aspect ratio of the viewport, making sure
+     *        the updated viewport contains all of the original one.
+     */
+    void adjustAspectRatio(double nwidth, double nheight);
+
+    /*!
+     * \brief make sure width and height are positive
+     */
+    void normalize(void);
+};
+
+struct mnd::MandelInfo
+{
+    /// viewport
+    MandelViewport view;
+
+    /// width of the bitmap to be generated
+    long bWidth;
+
+    /// height of the bitmap to be generated
+    long bHeight;
+    
+    /// maximum iterations
+    int maxIter;
+};
+
+#endif // MANDEL_MANDELUTIL_H

+ 170 - 0
libmandel/src/ClGenerators.cpp

@@ -0,0 +1,170 @@
+#include "ClGenerators.h"
+
+#include <iostream>
+#include <iterator>
+
+
+using namespace cl;
+
+using mnd::ClGenerator;
+using mnd::ClGeneratorFloat;
+
+Platform getPlatform() {
+    /* Returns the first platform found. */
+    std::vector<Platform> all_platforms;
+    Platform::get(&all_platforms);
+
+    if (all_platforms.size()==0) {
+        std::cout << "No platforms found. Check OpenCL installation!\n";
+        exit(1);
+    }
+    return all_platforms[0];
+}
+
+
+Device getDevice(Platform platform, int i, bool display = false) {
+    /* Returns the deviced specified by the index i on platform.
+    * If display is true, then all of the platforms are listed.
+    */
+    std::vector<Device> all_devices;
+    platform.getDevices(CL_DEVICE_TYPE_GPU, &all_devices);
+    if (all_devices.size() == 0) {
+        std::cout << "No devices found. Check OpenCL installation!\n";
+        exit(1);
+    }
+
+    if (display) {
+        for (::size_t j = 0; j < all_devices.size(); j++) {
+            printf("Device %d: %s\n", int(j), all_devices[j].getInfo<CL_DEVICE_NAME>().c_str());
+            printf("preferred float width: %d\n", all_devices[j].getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>());
+            printf("vendor: %s\n", all_devices[j].getInfo<CL_DEVICE_VENDOR>().c_str());
+        }
+    }
+    return all_devices[i];
+}
+
+
+ClGenerator::ClGenerator(void)
+{
+    Platform p = getPlatform();
+    device = getDevice(p, 0, true);
+    context = Context{ device };
+    Program::Sources sources;
+
+    std::string kcode = getKernelCode();
+
+    sources.push_back({ kcode.c_str(), kcode.length() });
+
+    program = Program{ context, sources };
+    if (program.build({ device }) != CL_SUCCESS) {
+        std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
+        exit(1);
+    }
+
+    queue = CommandQueue(context, device);
+}
+
+
+ClGenerator::~ClGenerator(void)
+{
+    queue.flush();
+    queue.finish();
+}
+
+
+void ClGenerator::generate(const mnd::MandelInfo& info, float* data)
+{
+    ::size_t bufferSize = info.bWidth * info.bHeight * sizeof(float);
+    
+    Buffer buffer_A(context, CL_MEM_READ_WRITE, bufferSize);
+    float pixelScaleX = info.view.width / info.bWidth;
+    float pixelScaleY = info.view.height / info.bHeight;
+
+    Kernel iterate = Kernel(program, "iterate");
+    iterate.setArg(0, buffer_A);
+    iterate.setArg(1, int(info.bWidth));
+    iterate.setArg(2, float(info.view.x));
+    iterate.setArg(3, float(info.view.y));
+    iterate.setArg(4, float(pixelScaleX));
+    iterate.setArg(5, float(pixelScaleY));
+    iterate.setArg(6, int(info.maxIter));
+
+    // TODO check for overflow
+    if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
+        queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight / 4));
+    } else {
+        queue.enqueueNDRangeKernel(iterate, 0, NDRange(info.bWidth * info.bHeight));
+    }
+    queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, bufferSize, data);
+}
+
+
+std::string ClGeneratorFloat::getKernelCode(void) const
+{
+    if (false && device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>() == 4) {
+        return
+            "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {\n"
+            "   int index = get_global_id(0) * 4;\n"
+            "   int x = index % (width);\n"
+            "   int y = index / (width);\n"
+            "   float4 av = (float4)(x * pixelScaleX + xl, (x + 1) * pixelScaleX + xl, (x + 2) * pixelScaleX + xl, (x + 3) * pixelScaleX + xl);\n"
+    //                        "(x + 4) * pixelScale + xl, (x + 5) * pixelScale + xl, (x + 6) * pixelScale + xl, (x + 7) * pixelScale + xl);\n"
+            "   float4 bv = (float4)(y * pixelScaleY + yt);\n"
+            "   float4 ca = av;\n"
+            "   float4 cb = bv;\n"
+            ""
+            "   int4 counter = (int4) 1;"
+            "   float4 threshold = (float4) 16;"
+            "   int n = 0;\n"
+            "   while (n < max) {\n"
+            "       float4 aa = av * av;\n"
+            "       float4 bb = bv * bv;\n"
+            "       float4 ab = av * bv;\n"
+            "       av = aa - bb + ca;\n"
+            "       bv = 2 * ab + cb;\n"
+            "       counter += -(threshold > (aa + bb));\n"
+            "       if(all(threshold < (aa + bb))) break;\n"
+            "       //if (aa + bb > 16) break;\n"
+            "       n++;\n"
+            "   }\n\n"
+            "   A[index] = (float) counter[0];\n"
+            "   A[index + 1] = (float) counter[1];\n"
+            "   A[index + 2] = (float) counter[2];\n"
+            "   A[index + 3] = (float) counter[3];\n"
+    /*        "   A[index + 4] = (float) counter[4];\n"
+            "   A[index + 5] = (float) counter[5];\n"
+            "   A[index + 6] = (float) counter[6];\n"
+            "   A[index + 7] = (float) counter[7];\n"*/
+    //        "   A[get_global_id(0)] = 1;\n"
+            "}\n";
+    }
+    else {
+
+        return 
+    //        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+            "__kernel void iterate(__global float* A, const int width, float xl, float yt, float pixelScaleX, float pixelScaleY, int max) {"
+            "   int index = get_global_id(0);\n"
+            "   int x = index % width;"
+            "   int y = index / width;"
+            "   float a = x * pixelScaleX + xl;"
+            "   float b = y * pixelScaleY + yt;"
+            "   float ca = a;"
+            "   float cb = b;"
+            ""
+            "   int n = 0;"
+            "   while (n < max) {"
+            "       float aa = a * a;"
+            "       float bb = b * b;"
+            "       float ab = a * b;"
+            "       if (aa + bb > 16) break;"
+            "       a = aa - bb + ca;"
+            "       b = 2 * ab + cb;"
+            "       n++;"
+            "   }\n"
+                // N + 1 - log (log  |Z(N)|) / log 2
+            "   A[index] = ((float)n) + 1 - log(log(a * a + b * b) / 2) / log(2.0f);\n"
+//            "   A[index] = ((float)n) + 1 - (a * a + b * b - 16) / (256 - 16);\n"
+    //        "   A[get_global_id(0)] = 5;"
+            "}";
+    }
+}

+ 131 - 0
libmandel/src/CpuGeneratorsAVX.cpp

@@ -0,0 +1,131 @@
+#include "CpuGeneratorsAVX.h"
+
+#include <immintrin.h>
+#include <omp.h>
+
+#include <memory>
+
+using mnd::CpuGeneratorAvxFloat;
+using mnd::CpuGeneratorAvxDouble;
+
+
+void CpuGeneratorAvxFloat::generate(const mnd::MandelInfo& info, float* data)
+{
+    using T = float;
+    const MandelViewport& view = info.view;
+    omp_set_num_threads(2 * omp_get_num_procs());
+#pragma omp parallel for
+    for (long j = 0; j < info.bHeight; j++) {
+        T y = T(view.y) + T(j) * T(view.height / info.bHeight);
+        long i = 0;
+        for (i; i < info.bWidth; i += 8) {
+            __m256 xs = {
+                float(view.x + double(i) * view.width / info.bWidth),
+                float(view.x + double(i + 1) * view.width / info.bWidth),
+                float(view.x + double(i + 2) * view.width / info.bWidth),
+                float(view.x + double(i + 3) * view.width / info.bWidth),
+                float(view.x + double(i + 4) * view.width / info.bWidth),
+                float(view.x + double(i + 5) * view.width / info.bWidth),
+                float(view.x + double(i + 6) * view.width / info.bWidth),
+                float(view.x + double(i + 7) * view.width / info.bWidth)
+            };
+
+            __m256 counter = {0, 0, 0, 0, 0, 0, 0, 0};
+            __m256 adder = {1, 1, 1, 1, 1, 1, 1, 1};
+
+            __m256 threshold = {16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f};
+
+            __m256 ys = {y, y, y, y, y, y, y, y};
+            __m256 a = xs;
+            __m256 b = ys;
+
+            for (int k = 0; k < info.maxIter; k++) {
+                __m256 aa = _mm256_mul_ps(a, a);
+                __m256 bb = _mm256_mul_ps(b, b);
+                __m256 abab = _mm256_mul_ps(a, b); abab = _mm256_add_ps(abab, abab);
+                a = _mm256_add_ps(_mm256_sub_ps(aa, bb), xs);
+                b = _mm256_add_ps(abab, ys);
+                __m256i cmp = _mm256_castps_si256(_mm256_cmp_ps(_mm256_add_ps(aa, bb), threshold, _CMP_LE_OQ));
+                adder = _mm256_and_ps(adder, _mm256_castsi256_ps(cmp));
+                counter = _mm256_add_ps(counter, adder);
+                if (_mm256_testz_si256(cmp, cmp) != 0) {
+                    break;
+                }
+            }
+
+            auto alignVec = [](float* data) -> float* {
+                void* aligned = data;
+                ::size_t length = 64;
+                std::align(32, 8 * sizeof(float), aligned, length);
+                return static_cast<float*>(aligned);
+            };
+
+            float resData[16];
+            float* ftRes = alignVec(resData);
+
+            _mm256_store_ps(ftRes, counter);
+            for (int k = 0; k < 8 && i + k < info.bWidth; k++)
+                data[i + k + j * info.bWidth] = ftRes[k] > 0 ? ftRes[k] : info.maxIter;
+        }
+    }
+}
+
+
+void CpuGeneratorAvxDouble::generate(const mnd::MandelInfo& info, float* data)
+{
+    using T = double;
+    const MandelViewport& view = info.view;
+
+    omp_set_num_threads(2 * omp_get_num_procs());
+#pragma omp parallel for
+    for (long j = 0; j < info.bHeight; j++) {
+        T y = T(view.y) + T(j) * view.height / info.bHeight;
+        long i = 0;
+        for (i; i < info.bWidth; i += 4) {
+            __m256d xs = {
+                double(view.x) + double(i) * view.width / info.bWidth,
+                double(view.x) + double(i + 1) * view.width / info.bWidth,
+                double(view.x) + double(i + 2) * view.width / info.bWidth,
+                double(view.x) + double(i + 3) * view.width / info.bWidth
+            };
+
+            int itRes[4] = { 0, 0, 0, 0 };
+
+            __m256d threshold = { 16.0, 16.0, 16.0, 16.0 };
+            __m256d counter = { 0, 0, 0, 0 };
+            __m256d adder = { 1, 1, 1, 1 };
+
+            __m256d ys = { y, y, y, y };
+            __m256d a = xs;
+            __m256d b = ys;
+
+            for (int k = 0; k < info.maxIter; k++) {
+                __m256d aa = _mm256_mul_pd(a, a);
+                __m256d bb = _mm256_mul_pd(b, b);
+                __m256d abab = _mm256_mul_pd(a, b); abab = _mm256_add_pd(abab, abab);
+                a = _mm256_add_pd(_mm256_sub_pd(aa, bb), xs);
+                b = _mm256_add_pd(abab, ys);
+                __m256i cmp = _mm256_castpd_si256(_mm256_cmp_pd(_mm256_add_pd(aa, bb), threshold, _CMP_LE_OQ));
+                adder = _mm256_and_pd(adder, _mm256_castsi256_pd(cmp));
+                counter = _mm256_add_pd(counter, adder);
+                if (_mm256_testz_si256(cmp, cmp) != 0) {
+                    break;
+                }
+            }
+
+            auto alignVec = [](double* data) -> double* {
+                void* aligned = data;
+                ::size_t length = 64;
+                std::align(32, 4 * sizeof(double), aligned, length);
+                return static_cast<double*>(aligned);
+            };
+
+            double resData[8];
+            double* ftRes = alignVec(resData);
+            _mm256_store_pd(ftRes, counter);
+            for (int k = 0; k < 4 && i + k < info.bWidth; k++)
+                data[i + k + j * info.bWidth] = ftRes[k] > 0 ? float(ftRes[k]) : info.maxIter;
+        }
+    }
+}
+

+ 9 - 0
libmandel/src/Generators.cpp

@@ -0,0 +1,9 @@
+#include "Generators.h"
+
+using mnd::Generator;
+
+Generator::~Generator(void)
+{
+}
+
+

+ 26 - 0
libmandel/src/MandelUtil.cpp

@@ -0,0 +1,26 @@
+#include "MandelUtil.h"
+
+using mnd::MandelViewport;
+
+void MandelViewport::adjustAspectRatio(double nwidth, double nheight)
+{
+    double otherRatio = nwidth / nheight;
+    if (width < height * otherRatio)
+        width = height * otherRatio;
+    else if (height < width / otherRatio)
+        height = width / otherRatio;
+}
+
+
+void MandelViewport::normalize(void)
+{
+    if (width < 0) {
+        x += width;
+        width = -width;
+    }
+    if (height < 0) {
+        y += height;
+        height = -height;
+    }
+}
+

+ 10 - 0
libmandel/src/mandel.cpp

@@ -0,0 +1,10 @@
+#include "Mandel.h"
+
+
+namespace mnd
+{
+    MandelContext initializeContext(void)
+    {
+        return MandelContext();
+    }
+}