5 vuotta sitten · d1c37a91b4
--- a/choosegenerators.cpp
+++ b/choosegenerators.cpp
@@ -300,6 +300,7 @@ void ChooseGenerators::on_compile_clicked()
 
				     std::string expr = mnd::toString(*itf.expr);
			
 
				     printf("%s\n", expr.c_str()); fflush(stdout);
			
 
				     //chosenGenerator = std::make_unique<mnd::NaiveGenerator>(std::move(itf), mnd::getPrecision<double>());
			
 
				+    //return;
			
 
				     mnd::ir::Formula irform = mnd::expand(itf);
			
 
				     printf("%s\n", irform.toString().c_str()); fflush(stdout);
			
 
				     auto cg = std::make_unique<mnd::CompiledGenerator>(mnd::compile(irform));
			
--- a/libmandel/include/Arena.h
+++ b/libmandel/include/Arena.h
@@ -10,7 +10,17 @@ namespace mnd
 
				 {
			
 
				     namespace util
			
 
				     {
			
 
				-        template <typename T, int chunkSize = 32>
			
 
				+        //!
			
 
				+        //! \brief Arena-allocator for a generic type
			
 
				+        //!
			
 
				+        //! The arena allocator provides an allocate function to allocate
			
 
				+        //! and construct objects of type T. All allocated objects live as
			
 
				+        //! long as the Arena lives and are destructed in the inverse order.
			
 
				+        //! 
			
 
				+        //! \tparam T the type for the Arena to allocate
			
 
				+        //! \tparam chunkSize the Arena allocates objects in chunks of this size
			
 
				+        //!
			
 
				+        template <typename T, int chunkSize = 64>
			
 
				         class Arena
			
 
				         {
			
 
				             struct Chunk
			
@@ -35,8 +45,9 @@ namespace mnd
 
				             };
			
 
				 
			
 
				             std::vector<std::unique_ptr<Chunk>> chunks;
			
 
				-        public:
			
 
				 
			
 
				+            Chunk& lastChunk(void) { return *chunks[chunks.size() - 1]; }
			
 
				+        public:
			
 
				             Arena(void) = default;
			
 
				             Arena(const Arena&) = delete;
			
 
				             Arena(Arena&&) = default;
			
@@ -50,8 +61,10 @@ namespace mnd
 
				             Arena& operator=(const Arena&) = delete;
			
 
				             Arena& operator=(Arena&&) = default;
			
 
				 
			
 
				-            Chunk& lastChunk(void) { return *chunks[chunks.size() - 1]; }
			
 
				-
			
 
				+            //!
			
 
				+            //! \brief construct one object whose lifetime is managed by
			
 
				+            //!        the arena.
			
 
				+            //!
			
 
				             template<typename... Args>
			
 
				             T* allocate(Args&&... args)
			
 
				             {
			
--- a/libmandel/include/IterationIR.h
+++ b/libmandel/include/IterationIR.h
@@ -27,6 +27,8 @@ namespace mnd
 
				         struct Pow;
			
 
				         struct Cos;
			
 
				         struct Sin;
			
 
				+        struct Exp;
			
 
				+        struct Ln;
			
 
				 
			
 
				         using Node = std::variant<
			
 
				             Constant,
			
@@ -38,7 +40,9 @@ namespace mnd
 
				             Atan2,
			
 
				             Pow,
			
 
				             Cos,
			
 
				-            Sin
			
 
				+            Sin,
			
 
				+            Exp,
			
 
				+            Ln
			
 
				         >;
			
 
				 
			
 
				         struct Formula
			
@@ -144,4 +148,16 @@ struct mnd::ir::Sin : mnd::ir::UnaryOperation
 
				 };
			
 
				 
			
 
				 
			
 
				+struct mnd::ir::Exp : mnd::ir::UnaryOperation
			
 
				+{
			
 
				+    inline Exp(Node* value) : UnaryOperation{ value } {}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct mnd::ir::Ln : mnd::ir::UnaryOperation
			
 
				+{
			
 
				+    inline Ln(Node* value) : UnaryOperation{ value } {}
			
 
				+};
			
 
				+
			
 
				+
			
 
				 #endif // MANDEL_ITERATIONIR_H
			
--- a/libmandel/src/IterationCompiler.cpp
+++ b/libmandel/src/IterationCompiler.cpp
@@ -172,6 +172,30 @@ namespace mnd
 
				             call->setRet(0, arg);
			
 
				             return arg;
			
 
				         }
			
 
				+
			
 
				+        Reg operator()(const ir::Exp& ex) {
			
 
				+            using namespace asmjit;
			
 
				+            auto a = visitNode(*ex.value);
			
 
				+
			
 
				+            auto arg = cc.newXmmSd();
			
 
				+            double(*expFunc)(double) = ::exp;
			
 
				+            auto call = cc.call(imm(expFunc), FuncSignatureT<double, double>(CallConv::kIdHostCDecl));
			
 
				+            call->setArg(0, a);
			
 
				+            call->setRet(0, arg);
			
 
				+            return arg;
			
 
				+        }
			
 
				+
			
 
				+        Reg operator()(const ir::Ln& l) {
			
 
				+            using namespace asmjit;
			
 
				+            auto a = visitNode(*l.value);
			
 
				+
			
 
				+            auto arg = cc.newXmmSd();
			
 
				+            double(*logFunc)(double) = ::log;
			
 
				+            auto call = cc.call(imm(logFunc), FuncSignatureT<double, double>(CallConv::kIdHostCDecl));
			
 
				+            call->setArg(0, a);
			
 
				+            call->setRet(0, arg);
			
 
				+            return arg;
			
 
				+        }
			
 
				     };
			
 
				 
			
 
				 
			
@@ -305,6 +329,14 @@ namespace mnd
 
				         std::string operator()(const ir::Sin& a) {
			
 
				             return "sin("s + visitNode(*a.value) + ")";
			
 
				         }
			
 
				+
			
 
				+        std::string operator()(const ir::Exp& a) {
			
 
				+            return "exp("s + visitNode(*a.value) + ")";
			
 
				+        }
			
 
				+
			
 
				+        std::string operator()(const ir::Ln& a) {
			
 
				+            return "ln("s + visitNode(*a.value) + ")";
			
 
				+        }
			
 
				     };
			
 
				 
			
 
				     std::string compileToOpenCl(const ir::Formula& formula)
			
--- a/libmandel/src/IterationIR.cpp
+++ b/libmandel/src/IterationIR.cpp
@@ -107,7 +107,7 @@ namespace mnd
 
				         NodePair operator() (const Pow& p)
			
 
				         {
			
 
				             auto [a, b] = std::visit(*this, *p.left);
			
 
				-            auto [c, unused] = std::visit(*this, *p.right);
			
 
				+            auto [c, d] = std::visit(*this, *p.right);
			
 
				 
			
 
				             auto half = arena.allocate(ir::Constant{ 0.5 });
			
 
				 
			
@@ -117,12 +117,23 @@ namespace mnd
 
				             auto absSq = arena.allocate(ir::Addition{ aa, bb });
			
 
				 
			
 
				             auto halfc = arena.allocate(ir::Multiplication{ c, half });
			
 
				+            auto darg = arena.allocate(ir::Multiplication{ d, arg });
			
 
				+            auto minusdarg = arena.allocate(ir::Negation{ darg });
			
 
				 
			
 
				-            auto newAbs = arena.allocate(ir::Pow{ absSq, halfc });
			
 
				-            auto newArg = arena.allocate(ir::Multiplication{ arg, c });
			
 
				+            auto abspowc = arena.allocate(ir::Pow{ absSq, halfc });
			
 
				+            auto expdarg = arena.allocate(ir::Exp{ minusdarg });
			
 
				+
			
 
				+            auto newAbs = arena.allocate(ir::Multiplication{ abspowc, expdarg });
			
 
				+            auto carg = arena.allocate(ir::Multiplication{ arg, c });
			
 
				+
			
 
				+            auto halfd = arena.allocate(ir::Multiplication{ d, half });
			
 
				+            auto lnabsSq = arena.allocate(ir::Ln{ absSq });
			
 
				+            auto halfdlnabsSq = arena.allocate(ir::Multiplication{ halfd, absSq });
			
 
				+            auto newArg = arena.allocate(ir::Addition{ halfdlnabsSq, carg });
			
 
				 
			
 
				             auto cosArg = arena.allocate(ir::Cos{ newArg });
			
 
				             auto sinArg = arena.allocate(ir::Sin{ newArg });
			
 
				+
			
 
				             auto newA = arena.allocate(ir::Multiplication{ cosArg, newAbs });
			
 
				             auto newB = arena.allocate(ir::Multiplication{ sinArg, newAbs });
			
 
				 
			
@@ -183,6 +194,14 @@ std::string mnd::ir::Formula::toString(void) const
 
				         std::string operator()(const ir::Sin& n) {
			
 
				             return "sin(" + std::visit(*this, *n.value) + ")";
			
 
				         }
			
 
				+
			
 
				+        std::string operator()(const ir::Exp& n) {
			
 
				+            return "exp(" + std::visit(*this, *n.value) + ")";
			
 
				+        }
			
 
				+
			
 
				+        std::string operator()(const ir::Ln& n) {
			
 
				+            return "ln(" + std::visit(*this, *n.value) + ")";
			
 
				+        }
			
 
				     };
			
 
				 
			
 
				     return std::string("a = ") + std::visit(ToStringVisitor{}, *this->newA) + 
			
--- a/libmandel/src/JuliaGenerators.cpp
+++ b/libmandel/src/JuliaGenerators.cpp
@@ -26,32 +26,32 @@ void mnd::JuliaGeneratorFloat::generate(const MandelInfo& info, float * data)
 
				 
			
 
				     const float dppf = float(view.width / info.bWidth);
			
 
				     const float viewxf = float(view.x);
			
 
				-    __m256 viewx = { viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf };
			
 
				-    __m256 dpp = { dppf, dppf, dppf, dppf, dppf, dppf, dppf, dppf };
			
 
				+    __m256 viewx = _mm256_set1_ps(viewxf);
			
 
				+    __m256 dpp = _mm256_set1_ps(dppf);
			
 
				 
			
 
				 
			
 
				     T juliaa = mnd::convert<T>(info.juliaX);
			
 
				     T juliab = mnd::convert<T>(info.juliaY);
			
 
				-    __m256 constA = { juliaa, juliaa, juliaa, juliaa, juliaa, juliaa, juliaa,juliaa };
			
 
				-    __m256 constB = { juliab, juliab, juliab, juliab, juliab, juliab, juliab, juliab};
			
 
				+    __m256 constA = _mm256_set1_ps(juliaa);
			
 
				+    __m256 constB = _mm256_set1_ps(juliab);
			
 
				 
			
 
				     omp_set_num_threads(omp_get_num_procs());
			
 
				 #pragma omp parallel for schedule(static, 1)
			
 
				     for (long j = 0; j < info.bHeight; j++) {
			
 
				         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
			
 
				-        __m256 ys = {y, y, y, y, y, y, y, y};
			
 
				+        __m256 ys = _mm256_set1_ps(y);
			
 
				         long i = 0;
			
 
				         for (i; i < info.bWidth; i += 8) {
			
 
				             __m256 pixc = { float(i), float(i + 1), float(i + 2), float(i + 3), float(i + 4), float(i + 5), float(i + 6), float(i + 7) };
			
 
				 
			
 
				             __m256 xs = _mm256_add_ps(_mm256_mul_ps(dpp, pixc), viewx);
			
 
				 
			
 
				-            __m256 counter = { 0, 0, 0, 0, 0, 0, 0, 0 };
			
 
				-            __m256 adder = { 1, 1, 1, 1, 1, 1, 1, 1 };
			
 
				-            __m256 resultsa = { 0, 0, 0, 0, 0, 0, 0, 0 };
			
 
				-            __m256 resultsb = { 0, 0, 0, 0, 0, 0, 0, 0 };
			
 
				+            __m256 counter = _mm256_setzero_ps();
			
 
				+            __m256 adder = _mm256_set1_ps(1.0f);
			
 
				+            __m256 resultsa = _mm256_setzero_ps();
			
 
				+            __m256 resultsb = _mm256_setzero_ps();
			
 
				 
			
 
				-            __m256 threshold = { 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f };
			
 
				+            __m256 threshold = _mm256_set1_ps(16.0f);
			
 
				 
			
 
				             __m256 a = xs;
			
 
				             __m256 b = ys;