1
0

asmjit_test_misc.h 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. // [AsmJit]
  2. // Machine Code Generation for C++.
  3. //
  4. // [License]
  5. // Zlib - See LICENSE.md file in the package.
  6. #ifndef _ASMJIT_TEST_MISC_H
  7. #define _ASMJIT_TEST_MISC_H
  8. #include "./asmjit.h"
  9. namespace asmtest {
  10. // Generate a typical alpha blend function using SSE2 instruction set. Used
  11. // for benchmarking and also in test86. The generated code should be stable
  12. // and fully functional.
  13. static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
  14. using namespace asmjit;
  15. using namespace asmjit::x86;
  16. Gp dst = cc.newIntPtr("dst");
  17. Gp src = cc.newIntPtr("src");
  18. Gp i = cc.newIntPtr("i");
  19. Gp j = cc.newIntPtr("j");
  20. Gp t = cc.newIntPtr("t");
  21. Xmm vzero = cc.newXmm("vzero");
  22. Xmm v0080 = cc.newXmm("v0080");
  23. Xmm v0101 = cc.newXmm("v0101");
  24. Label L_SmallLoop = cc.newLabel();
  25. Label L_SmallEnd = cc.newLabel();
  26. Label L_LargeLoop = cc.newLabel();
  27. Label L_LargeEnd = cc.newLabel();
  28. Label L_DataPool = cc.newLabel();
  29. cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(cc.codeInfo().cdeclCallConv()));
  30. cc.setArg(0, dst);
  31. cc.setArg(1, src);
  32. cc.setArg(2, i);
  33. // How many pixels have to be processed to make the loop aligned.
  34. cc.lea(t, x86::ptr(L_DataPool));
  35. cc.xorps(vzero, vzero);
  36. cc.movaps(v0080, x86::ptr(t, 0));
  37. cc.movaps(v0101, x86::ptr(t, 16));
  38. cc.xor_(j, j);
  39. cc.sub(j, dst);
  40. cc.and_(j, 15);
  41. cc.shr(j, 2);
  42. cc.jz(L_SmallEnd);
  43. cc.cmp(j, i);
  44. cc.cmovg(j, i); // j = min(i, j).
  45. cc.sub(i, j); // i -= j.
  46. // Small loop.
  47. cc.bind(L_SmallLoop);
  48. {
  49. Xmm x0 = cc.newXmm("x0");
  50. Xmm y0 = cc.newXmm("y0");
  51. Xmm a0 = cc.newXmm("a0");
  52. cc.movd(y0, x86::ptr(src));
  53. cc.movd(x0, x86::ptr(dst));
  54. cc.pcmpeqb(a0, a0);
  55. cc.pxor(a0, y0);
  56. cc.psrlw(a0, 8);
  57. cc.punpcklbw(x0, vzero);
  58. cc.pshuflw(a0, a0, x86::Predicate::shuf(1, 1, 1, 1));
  59. cc.punpcklbw(y0, vzero);
  60. cc.pmullw(x0, a0);
  61. cc.paddsw(x0, v0080);
  62. cc.pmulhuw(x0, v0101);
  63. cc.paddw(x0, y0);
  64. cc.packuswb(x0, x0);
  65. cc.movd(x86::ptr(dst), x0);
  66. cc.add(dst, 4);
  67. cc.add(src, 4);
  68. cc.dec(j);
  69. cc.jnz(L_SmallLoop);
  70. }
  71. // Second section, prepare for an aligned loop.
  72. cc.bind(L_SmallEnd);
  73. cc.test(i, i);
  74. cc.mov(j, i);
  75. cc.jz(cc.func()->exitLabel());
  76. cc.and_(j, 3);
  77. cc.shr(i, 2);
  78. cc.jz(L_LargeEnd);
  79. // Aligned loop.
  80. cc.bind(L_LargeLoop);
  81. {
  82. Xmm x0 = cc.newXmm("x0");
  83. Xmm x1 = cc.newXmm("x1");
  84. Xmm y0 = cc.newXmm("y0");
  85. Xmm a0 = cc.newXmm("a0");
  86. Xmm a1 = cc.newXmm("a1");
  87. cc.movups(y0, x86::ptr(src));
  88. cc.movaps(x0, x86::ptr(dst));
  89. cc.pcmpeqb(a0, a0);
  90. cc.xorps(a0, y0);
  91. cc.movaps(x1, x0);
  92. cc.psrlw(a0, 8);
  93. cc.punpcklbw(x0, vzero);
  94. cc.movaps(a1, a0);
  95. cc.punpcklwd(a0, a0);
  96. cc.punpckhbw(x1, vzero);
  97. cc.punpckhwd(a1, a1);
  98. cc.pshufd(a0, a0, x86::Predicate::shuf(3, 3, 1, 1));
  99. cc.pshufd(a1, a1, x86::Predicate::shuf(3, 3, 1, 1));
  100. cc.pmullw(x0, a0);
  101. cc.pmullw(x1, a1);
  102. cc.paddsw(x0, v0080);
  103. cc.paddsw(x1, v0080);
  104. cc.pmulhuw(x0, v0101);
  105. cc.pmulhuw(x1, v0101);
  106. cc.add(src, 16);
  107. cc.packuswb(x0, x1);
  108. cc.paddw(x0, y0);
  109. cc.movaps(x86::ptr(dst), x0);
  110. cc.add(dst, 16);
  111. cc.dec(i);
  112. cc.jnz(L_LargeLoop);
  113. }
  114. cc.bind(L_LargeEnd);
  115. cc.test(j, j);
  116. cc.jnz(L_SmallLoop);
  117. cc.endFunc();
  118. // Data.
  119. cc.align(kAlignData, 16);
  120. cc.bind(L_DataPool);
  121. cc.dxmm(Data128::fromI16(0x0080));
  122. cc.dxmm(Data128::fromI16(0x0101));
  123. }
  124. } // {asmtest}
  125. #endif // _ASMJIT_TEST_MISC_H