Selaa lähdekoodia

implementing exponentiaiton

Nicolas Winkler 5 vuotta sitten
vanhempi
commit
d1c37a91b4

+ 1 - 0
choosegenerators.cpp

@@ -300,6 +300,7 @@ void ChooseGenerators::on_compile_clicked()
     std::string expr = mnd::toString(*itf.expr);
     printf("%s\n", expr.c_str()); fflush(stdout);
     //chosenGenerator = std::make_unique<mnd::NaiveGenerator>(std::move(itf), mnd::getPrecision<double>());
+    //return;
     mnd::ir::Formula irform = mnd::expand(itf);
     printf("%s\n", irform.toString().c_str()); fflush(stdout);
     auto cg = std::make_unique<mnd::CompiledGenerator>(mnd::compile(irform));

+ 17 - 4
libmandel/include/Arena.h

@@ -10,7 +10,17 @@ namespace mnd
 {
     namespace util
     {
-        template <typename T, int chunkSize = 32>
+        //!
+        //! \brief Arena-allocator for a generic type
+        //!
+        //! The arena allocator provides an allocate function to allocate
+        //! and construct objects of type T. All allocated objects live as
+        //! long as the Arena lives and are destructed in the inverse order.
+        //! 
+        //! \tparam T the type for the Arena to allocate
+        //! \tparam chunkSize the Arena allocates objects in chunks of this size
+        //!
+        template <typename T, int chunkSize = 64>
         class Arena
         {
             struct Chunk
@@ -35,8 +45,9 @@ namespace mnd
             };
 
             std::vector<std::unique_ptr<Chunk>> chunks;
-        public:
 
+            Chunk& lastChunk(void) { return *chunks[chunks.size() - 1]; }
+        public:
             Arena(void) = default;
             Arena(const Arena&) = delete;
             Arena(Arena&&) = default;
@@ -50,8 +61,10 @@ namespace mnd
             Arena& operator=(const Arena&) = delete;
             Arena& operator=(Arena&&) = default;
 
-            Chunk& lastChunk(void) { return *chunks[chunks.size() - 1]; }
-
+            //!
+            //! \brief construct one object whose lifetime is managed by
+            //!        the arena.
+            //!
             template<typename... Args>
             T* allocate(Args&&... args)
             {

+ 17 - 1
libmandel/include/IterationIR.h

@@ -27,6 +27,8 @@ namespace mnd
         struct Pow;
         struct Cos;
         struct Sin;
+        struct Exp;
+        struct Ln;
 
         using Node = std::variant<
             Constant,
@@ -38,7 +40,9 @@ namespace mnd
             Atan2,
             Pow,
             Cos,
-            Sin
+            Sin,
+            Exp,
+            Ln
         >;
 
         struct Formula
@@ -144,4 +148,16 @@ struct mnd::ir::Sin : mnd::ir::UnaryOperation
 };
 
 
+struct mnd::ir::Exp : mnd::ir::UnaryOperation
+{
+    inline Exp(Node* value) : UnaryOperation{ value } {}
+};
+
+
+struct mnd::ir::Ln : mnd::ir::UnaryOperation
+{
+    inline Ln(Node* value) : UnaryOperation{ value } {}
+};
+
+
 #endif // MANDEL_ITERATIONIR_H

+ 32 - 0
libmandel/src/IterationCompiler.cpp

@@ -172,6 +172,30 @@ namespace mnd
             call->setRet(0, arg);
             return arg;
         }
+
+        Reg operator()(const ir::Exp& ex) {
+            using namespace asmjit;
+            auto a = visitNode(*ex.value);
+
+            auto arg = cc.newXmmSd();
+            double(*expFunc)(double) = ::exp;
+            auto call = cc.call(imm(expFunc), FuncSignatureT<double, double>(CallConv::kIdHostCDecl));
+            call->setArg(0, a);
+            call->setRet(0, arg);
+            return arg;
+        }
+
+        Reg operator()(const ir::Ln& l) {
+            using namespace asmjit;
+            auto a = visitNode(*l.value);
+
+            auto arg = cc.newXmmSd();
+            double(*logFunc)(double) = ::log;
+            auto call = cc.call(imm(logFunc), FuncSignatureT<double, double>(CallConv::kIdHostCDecl));
+            call->setArg(0, a);
+            call->setRet(0, arg);
+            return arg;
+        }
     };
 
 
@@ -305,6 +329,14 @@ namespace mnd
         std::string operator()(const ir::Sin& a) {
             return "sin("s + visitNode(*a.value) + ")";
         }
+
+        std::string operator()(const ir::Exp& a) {
+            return "exp("s + visitNode(*a.value) + ")";
+        }
+
+        std::string operator()(const ir::Ln& a) {
+            return "ln("s + visitNode(*a.value) + ")";
+        }
     };
 
     std::string compileToOpenCl(const ir::Formula& formula)

+ 22 - 3
libmandel/src/IterationIR.cpp

@@ -107,7 +107,7 @@ namespace mnd
         NodePair operator() (const Pow& p)
         {
             auto [a, b] = std::visit(*this, *p.left);
-            auto [c, unused] = std::visit(*this, *p.right);
+            auto [c, d] = std::visit(*this, *p.right);
 
             auto half = arena.allocate(ir::Constant{ 0.5 });
 
@@ -117,12 +117,23 @@ namespace mnd
             auto absSq = arena.allocate(ir::Addition{ aa, bb });
 
             auto halfc = arena.allocate(ir::Multiplication{ c, half });
+            auto darg = arena.allocate(ir::Multiplication{ d, arg });
+            auto minusdarg = arena.allocate(ir::Negation{ darg });
 
-            auto newAbs = arena.allocate(ir::Pow{ absSq, halfc });
-            auto newArg = arena.allocate(ir::Multiplication{ arg, c });
+            auto abspowc = arena.allocate(ir::Pow{ absSq, halfc });
+            auto expdarg = arena.allocate(ir::Exp{ minusdarg });
+
+            auto newAbs = arena.allocate(ir::Multiplication{ abspowc, expdarg });
+            auto carg = arena.allocate(ir::Multiplication{ arg, c });
+
+            auto halfd = arena.allocate(ir::Multiplication{ d, half });
+            auto lnabsSq = arena.allocate(ir::Ln{ absSq });
+            auto halfdlnabsSq = arena.allocate(ir::Multiplication{ halfd, absSq });
+            auto newArg = arena.allocate(ir::Addition{ halfdlnabsSq, carg });
 
             auto cosArg = arena.allocate(ir::Cos{ newArg });
             auto sinArg = arena.allocate(ir::Sin{ newArg });
+
             auto newA = arena.allocate(ir::Multiplication{ cosArg, newAbs });
             auto newB = arena.allocate(ir::Multiplication{ sinArg, newAbs });
 
@@ -183,6 +194,14 @@ std::string mnd::ir::Formula::toString(void) const
         std::string operator()(const ir::Sin& n) {
             return "sin(" + std::visit(*this, *n.value) + ")";
         }
+
+        std::string operator()(const ir::Exp& n) {
+            return "exp(" + std::visit(*this, *n.value) + ")";
+        }
+
+        std::string operator()(const ir::Ln& n) {
+            return "ln(" + std::visit(*this, *n.value) + ")";
+        }
     };
 
     return std::string("a = ") + std::visit(ToStringVisitor{}, *this->newA) + 

+ 10 - 10
libmandel/src/JuliaGenerators.cpp

@@ -26,32 +26,32 @@ void mnd::JuliaGeneratorFloat::generate(const MandelInfo& info, float * data)
 
     const float dppf = float(view.width / info.bWidth);
     const float viewxf = float(view.x);
-    __m256 viewx = { viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf, viewxf };
-    __m256 dpp = { dppf, dppf, dppf, dppf, dppf, dppf, dppf, dppf };
+    __m256 viewx = _mm256_set1_ps(viewxf);
+    __m256 dpp = _mm256_set1_ps(dppf);
 
 
     T juliaa = mnd::convert<T>(info.juliaX);
     T juliab = mnd::convert<T>(info.juliaY);
-    __m256 constA = { juliaa, juliaa, juliaa, juliaa, juliaa, juliaa, juliaa,juliaa };
-    __m256 constB = { juliab, juliab, juliab, juliab, juliab, juliab, juliab, juliab};
+    __m256 constA = _mm256_set1_ps(juliaa);
+    __m256 constB = _mm256_set1_ps(juliab);
 
     omp_set_num_threads(omp_get_num_procs());
 #pragma omp parallel for schedule(static, 1)
     for (long j = 0; j < info.bHeight; j++) {
         T y = T(view.y) + T(j) * T(view.height / info.bHeight);
-        __m256 ys = {y, y, y, y, y, y, y, y};
+        __m256 ys = _mm256_set1_ps(y);
         long i = 0;
         for (i; i < info.bWidth; i += 8) {
             __m256 pixc = { float(i), float(i + 1), float(i + 2), float(i + 3), float(i + 4), float(i + 5), float(i + 6), float(i + 7) };
 
             __m256 xs = _mm256_add_ps(_mm256_mul_ps(dpp, pixc), viewx);
 
-            __m256 counter = { 0, 0, 0, 0, 0, 0, 0, 0 };
-            __m256 adder = { 1, 1, 1, 1, 1, 1, 1, 1 };
-            __m256 resultsa = { 0, 0, 0, 0, 0, 0, 0, 0 };
-            __m256 resultsb = { 0, 0, 0, 0, 0, 0, 0, 0 };
+            __m256 counter = _mm256_setzero_ps();
+            __m256 adder = _mm256_set1_ps(1.0f);
+            __m256 resultsa = _mm256_setzero_ps();
+            __m256 resultsb = _mm256_setzero_ps();
 
-            __m256 threshold = { 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f, 16.0f };
+            __m256 threshold = _mm256_set1_ps(16.0f);
 
             __m256 a = xs;
             __m256 b = ys;