Pārlūkot izejas kodu

implemented sse2 version

Nicolas Winkler 6 gadi atpakaļ
vecāks
revīzija
2c649495e8

+ 7 - 0
libmandel/CMakeLists.txt

@@ -21,6 +21,13 @@ else()
     set_source_files_properties(src/CpuGeneratorsAVX.cpp PROPERTIES COMPILE_FLAGS -mavx)
 endif(MSVC)
 
+
+if (MSVC)
+    set_source_files_properties(src/CpuGeneratorsAVX.cpp PROPERTIES COMPILE_FLAGS /arch:SSE2)
+else()
+    set_source_files_properties(src/CpuGeneratorsAVX.cpp PROPERTIES COMPILE_FLAGS -msse2)
+endif(MSVC)
+
 add_library(mandel STATIC ${MandelSources})
 
 

+ 34 - 0
libmandel/include/CpuGenerators.h

@@ -7,6 +7,12 @@ namespace mnd
 {
     class CpuGeneratorFloat;
     class CpuGeneratorDouble;
+
+    class CpuGeneratorSse2Float;
+    class CpuGeneratorSse2Double;
+
+    class CpuGeneratorAvxFloat;
+    class CpuGeneratorAvxDouble;
 }
 
 
@@ -23,4 +29,32 @@ public:
     virtual void generate(const MandelInfo& info, float* data);
 };
 
+
+class mnd::CpuGeneratorSse2Float : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+
+class mnd::CpuGeneratorSse2Double : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+
+class mnd::CpuGeneratorAvxFloat : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+
+class mnd::CpuGeneratorAvxDouble : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
 #endif // MANDEL_CPUGENERATORS_H

+ 0 - 26
libmandel/include/CpuGeneratorsAVX.h

@@ -1,26 +0,0 @@
-#ifndef MANDEL_CPUGENERATORSAVX_H
-#define MANDEL_CPUGENERATORSAVX_H
-
-#include "Generators.h"
-
-namespace mnd
-{
-    class CpuGeneratorAvxFloat;
-    class CpuGeneratorAvxDouble;
-}
-
-
-class mnd::CpuGeneratorAvxFloat : public Generator
-{
-public:
-    virtual void generate(const MandelInfo& info, float* data);
-};
-
-
-class mnd::CpuGeneratorAvxDouble : public Generator
-{
-public:
-    virtual void generate(const MandelInfo& info, float* data);
-};
-
-#endif // MANDEL_CPUGENERATORSAVX_H

+ 2 - 0
libmandel/include/Hardware.h

@@ -14,6 +14,7 @@ class mnd::CpuInfo
     std::string brand;
 
     bool avx;
+    bool sse2;
 public:
     CpuInfo(void);
 
@@ -21,6 +22,7 @@ public:
     inline const std::string& getBrand(void) const { return brand; };
 
     inline bool hasAvx(void) const { return avx; };
+    inline bool hasSse2(void) const { return sse2; };
 };
 
 #endif // MANDEL_HARDWARE_H

+ 1 - 1
libmandel/src/CpuGeneratorsAVX.cpp

@@ -1,4 +1,4 @@
-#include "CpuGeneratorsAVX.h"
+#include "CpuGenerators.h"
 
 #include <immintrin.h>
 #include <omp.h>

+ 5 - 4
libmandel/src/Hardware.cpp

@@ -29,9 +29,9 @@ CpuInfo::CpuInfo(void)
     __cpuid(0x80000000, dat[0], dat[1], dat[2], dat[3]);
     nExtData = dat[0];
 #else
-    __cpuid(dat.data(), 0);
+    __cpuid((int*) dat.data(), 0);
     nData = dat[0];
-    __cpuid(dat.data(), 0x80000000);
+    __cpuid((int*) dat.data(), 0x80000000);
     nExtData = dat[0];
 #endif
 
@@ -39,7 +39,7 @@ CpuInfo::CpuInfo(void)
 #ifdef __GNUC__
         __get_cpuid(i, &dat[0], &dat[1], &dat[2], &dat[3]);
 #else
-        __cpuidex(dat.data(), i, 0);
+        __cpuidex((int*) dat.data(), i, 0);
 #endif
         cpuData.push_back(dat);
     }
@@ -48,7 +48,7 @@ CpuInfo::CpuInfo(void)
 #ifdef __GNUC__
         __get_cpuid(i, &dat[0], &dat[1], &dat[2], &dat[3]);
 #else
-        __cpuidex(dat.data(), i, 0);
+        __cpuidex((int*) dat.data(), i, 0);
 #endif
         extData.push_back(dat);
     }
@@ -84,5 +84,6 @@ CpuInfo::CpuInfo(void)
     }
 
     avx = ecx1[28];
+    sse2 = edx1[26];
 }
 

+ 4 - 1
libmandel/src/mandel.cpp

@@ -1,7 +1,6 @@
 #include "Mandel.h"
 
 #include "CpuGenerators.h"
-#include "CpuGeneratorsAVX.h"
 #include "ClGenerators.h"
 
 using mnd::MandelDevice;
@@ -48,6 +47,10 @@ MandelContext::MandelContext(void)
         cpuGeneratorFloat = std::make_unique<CpuGeneratorAvxFloat>();
         cpuGeneratorDouble = std::make_unique<CpuGeneratorAvxDouble>();
     }
+    else if (cpuInfo.hasSse2()) {
+        cpuGeneratorFloat = std::make_unique<CpuGeneratorSse2Float>();
+        cpuGeneratorDouble = std::make_unique<CpuGeneratorSse2Double>();
+    }
     else {
         cpuGeneratorFloat = std::make_unique<CpuGeneratorFloat>();
         cpuGeneratorDouble = std::make_unique<CpuGeneratorDouble>();