Nicolas Winkler преди 5 години
родител
ревизия
10745f0cac
променени са 5 файла, в които са добавени 16 реда и са изтрити 16 реда
  1. 3 2
      libmandel/include/ClGenerators.h
  2. 1 2
      libmandel/include/IterationGenerator.h
  3. 2 2
      libmandel/src/ClGenerators.cpp
  4. 9 9
      libmandel/src/CpuGeneratorsAVX.cpp
  5. 1 1
      libmandel/src/Mandel.cpp

+ 3 - 2
libmandel/include/ClGenerators.h

@@ -4,6 +4,7 @@
 #ifdef WITH_OPENCL
 
 #include "Generators.h"
+#include "OpenClCode.h"
 
 #ifdef __APPLE__
 #include <OpenCL/cl.hpp>
@@ -46,7 +47,7 @@ class mnd::ClGeneratorFloat : public ClGenerator
 {
     bool useVec;
 public:
-    ClGeneratorFloat(MandelDevice& device, const std::string& code);
+    ClGeneratorFloat(MandelDevice& device, const std::string& code = getFloat_cl());
     virtual ~ClGeneratorFloat(void) = default;
 
     virtual void generate(const MandelInfo& info, float* data) override;
@@ -70,7 +71,7 @@ protected:
 class mnd::ClGeneratorDouble : public ClGenerator
 {
 public:
-    ClGeneratorDouble(mnd::MandelDevice& device);
+    ClGeneratorDouble(mnd::MandelDevice& device, const std::string& source = getDouble_cl());
     virtual ~ClGeneratorDouble(void) = default;
 
     virtual void generate(const MandelInfo& info, float* data) override;

+ 1 - 2
libmandel/include/IterationGenerator.h

@@ -84,7 +84,7 @@ class mnd::CompiledClGenerator : public mnd::ClGeneratorFloat
 public:
     CompiledClGenerator(MandelDevice& device, const std::string& code);
     CompiledClGenerator(CompiledClGenerator&&) = default;
-    virtual void generate(const MandelInfo& info, float* data);
+    virtual void generate(const MandelInfo& info, float* data) override;
 };
 
 class mnd::CompiledClGeneratorDouble : public mnd::ClGeneratorDouble
@@ -92,7 +92,6 @@ class mnd::CompiledClGeneratorDouble : public mnd::ClGeneratorDouble
 public:
     CompiledClGeneratorDouble(MandelDevice& device, const std::string& code);
     CompiledClGeneratorDouble(CompiledClGeneratorDouble&&) = default;
-    virtual void generate(const MandelInfo& info, float* data);
 };
 #endif // WITH_OPENCL
 

+ 2 - 2
libmandel/src/ClGenerators.cpp

@@ -301,8 +301,8 @@ std::string ClGeneratorDoubleFloat::getKernelCode(bool smooth) const
 }
 
 
-ClGeneratorDouble::ClGeneratorDouble(mnd::MandelDevice& device) :
-    ClGenerator{ device, getDouble_cl(), mnd::getPrecision<double>() }
+ClGeneratorDouble::ClGeneratorDouble(mnd::MandelDevice& device, const std::string& source) :
+    ClGenerator{ device, source, mnd::getPrecision<double>() }
 {
     kernel = Kernel(program, "iterate");
 }

+ 9 - 9
libmandel/src/CpuGeneratorsAVX.cpp

@@ -49,17 +49,17 @@ void CpuGenerator<float, mnd::X86_AVX, parallel>::generate(const mnd::MandelInfo
             __m256 xs = _mm256_add_ps(_mm256_mul_ps(dpp, pixc), viewx);
             __m256 xs2 = _mm256_add_ps(_mm256_mul_ps(dpp, pixc2), viewx);
 
-            __m256 counter = { 0, 0, 0, 0, 0, 0, 0, 0 };
-            __m256 adder = { 1, 1, 1, 1, 1, 1, 1, 1 };
-            __m256 resultsa = { 0, 0, 0, 0, 0, 0, 0, 0 };
-            __m256 resultsb = { 0, 0, 0, 0, 0, 0, 0, 0 };
+            __m256 counter = _mm256_setzero_ps();
+            __m256 adder = _mm256_set1_ps(1);
+            __m256 resultsa = _mm256_setzero_ps();
+            __m256 resultsb = _mm256_setzero_ps();
 
-            __m256 counter2 = { 0, 0, 0, 0, 0, 0, 0, 0 };
-            __m256 adder2 = { 1, 1, 1, 1, 1, 1, 1, 1 };
-            __m256 resultsa2 = { 0, 0, 0, 0, 0, 0, 0, 0 };
-            __m256 resultsb2 = { 0, 0, 0, 0, 0, 0, 0, 0 };
+            __m256 counter2 = _mm256_setzero_ps();
+            __m256 adder2 = _mm256_set1_ps(1);
+            __m256 resultsa2 = _mm256_setzero_ps();
+            __m256 resultsb2 = _mm256_setzero_ps();
 
-            __m256 threshold = { 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f };
+            __m256 threshold = _mm256_set1_ps(16);
 
             __m256 a = xs;
             __m256 a2 = xs2;

+ 1 - 1
libmandel/src/Mandel.cpp

@@ -278,7 +278,7 @@ std::vector<MandelDevice> MandelContext::createDevices(void)
             md.vendor = device.getInfo<CL_DEVICE_VENDOR>();
             //printf("    using opencl device: %s\n", md.name.c_str());
             try {
-                md.mandelGenerators.insert({ GeneratorType::FLOAT, std::make_unique<ClGeneratorFloat>(md, mnd::getFloat_cl()) });
+                md.mandelGenerators.insert({ GeneratorType::FLOAT, std::make_unique<ClGeneratorFloat>(md) });
                 md.mandelGenerators.insert({ GeneratorType::FIXED64, std::make_unique<ClGenerator64>(md) });
                 md.mandelGenerators.insert({ GeneratorType::DOUBLE_FLOAT, std::make_unique<ClGeneratorDoubleFloat>(md) });
             }