소스 검색

adding avx512

Nicolas Winkler 6 년 전
부모
커밋
a24722f3c1
3개의 변경된 파일31개의 추가작업 그리고 5개의 파일을 삭제
  1. 17 0
      libmandel/include/CpuGenerators.h
  2. 4 2
      libmandel/include/Hardware.h
  3. 10 3
      libmandel/src/Hardware.cpp

+ 17 - 0
libmandel/include/CpuGenerators.h

@@ -13,6 +13,9 @@ namespace mnd
 
     class CpuGeneratorAvxFloat;
     class CpuGeneratorAvxDouble;
+
+    class CpuGeneratorAvx512Float;
+    class CpuGeneratorAvx512Double;
 }
 
 
@@ -57,4 +60,18 @@ public:
     virtual void generate(const MandelInfo& info, float* data);
 };
 
+
+class mnd::CpuGeneratorAvx512Float : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
+
+class mnd::CpuGeneratorAvx512Double : public Generator
+{
+public:
+    virtual void generate(const MandelInfo& info, float* data);
+};
+
 #endif // MANDEL_CPUGENERATORS_H

+ 4 - 2
libmandel/include/Hardware.h

@@ -13,16 +13,18 @@ class mnd::CpuInfo
     std::string vendor;
     std::string brand;
 
-    bool avx;
     bool sse2;
+    bool avx;
+    bool avx512;
 public:
     CpuInfo(void);
 
     inline const std::string& getVendor(void) const { return vendor; };
     inline const std::string& getBrand(void) const { return brand; };
 
-    inline bool hasAvx(void) const { return avx; };
     inline bool hasSse2(void) const { return sse2; };
+    inline bool hasAvx(void) const { return avx; };
+    inline bool hasAvx512(void) const { return avx512; };
 };
 
 #endif // MANDEL_HARDWARE_H

+ 10 - 3
libmandel/src/Hardware.cpp

@@ -75,15 +75,22 @@ CpuInfo::CpuInfo(void)
         this->brand.erase(this->brand.find_last_not_of(" \n\r\t") + 1);
     }
 
-    std::bitset<32> ecx1;
-    std::bitset<32> edx1;
+    std::bitset<32> ecx1 = 0;
+    std::bitset<32> edx1 = 0;
+    std::bitset<32> ebx7 = 0;
+    std::bitset<32> ecx7 = 0;
 
     if (nData >= 1) {
         ecx1 = cpuData[1][2];
         edx1 = cpuData[1][3];
     }
+    if (nData >= 7) {
+        ebx7 = cpuData[7][1];
+        ecx7 = cpuData[7][2];
+    }
 
-    avx = ecx1[28];
     sse2 = edx1[26];
+    avx = ecx1[28];
+    avx512 = ebx7[16];
 }